Adding upstream version 18.2.2.upstream/18.2.2

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-21 11:54:28 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-21 11:54:28 +0000
commit: e6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree: 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/docs
parent: Initial commit. (diff)
download: ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
139 files changed, 22543 insertions, 0 deletions
diff --git a/src/arrow/docs/.gitignore b/src/arrow/docs/.gitignore
new file mode 100644
index 000000000..d2e9f6ccc
--- /dev/null
+++ b/src/arrow/docs/.gitignore
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+_build
+source/python/generated
diff --git a/src/arrow/docs/Makefile b/src/arrow/docs/Makefile
new file mode 100644
index 000000000..fdff066a3
--- /dev/null
+++ b/src/arrow/docs/Makefile
@@ -0,0 +1,248 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+
+# Do not fail the build if there are warnings
+# SPHINXOPTS    = -j8 -W
+SPHINXOPTS    = -j8
+
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = _build
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+
+.PHONY: help
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  applehelp  to make an Apple Help Book"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  epub3      to make an epub3"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  xml        to make Docutils-native XML files"
+	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+	@echo "  coverage   to run coverage check of the documentation (if enabled)"
+	@echo "  dummy      to check syntax errors of document sources"
+
+.PHONY: clean
+clean:
+	rm -rf $(BUILDDIR)/*
+	rm -rf source/python/generated/*
+
+.PHONY: html
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+.PHONY: dirhtml
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+.PHONY: singlehtml
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+.PHONY: pickle
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+.PHONY: json
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+.PHONY: htmlhelp
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+.PHONY: qthelp
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pyarrow.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pyarrow.qhc"
+
+.PHONY: applehelp
+applehelp:
+	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
+	@echo
+	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
+	@echo "N.B. You won't be able to view it unless you put it in" \
+	      "~/Library/Documentation/Help or install it in your application" \
+	      "bundle."
+
+.PHONY: devhelp
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/pyarrow"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pyarrow"
+	@echo "# devhelp"
+
+.PHONY: epub
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+.PHONY: epub3
+epub3:
+	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
+	@echo
+	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
+
+.PHONY: latex
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+.PHONY: latexpdf
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+.PHONY: latexpdfja
+latexpdfja:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through platex and dvipdfmx..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+.PHONY: text
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+.PHONY: man
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+.PHONY: texinfo
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+.PHONY: info
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+.PHONY: gettext
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+.PHONY: changes
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+.PHONY: linkcheck
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+.PHONY: doctest
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+.PHONY: coverage
+coverage:
+	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
+	@echo "Testing of coverage in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/coverage/python.txt."
+
+.PHONY: xml
+xml:
+	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+	@echo
+	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+.PHONY: pseudoxml
+pseudoxml:
+	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+	@echo
+	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
+
+.PHONY: dummy
+dummy:
+	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
+	@echo
+	@echo "Build finished. Dummy builder generates no files."
diff --git a/src/arrow/docs/README.md b/src/arrow/docs/README.md
new file mode 100644
index 000000000..213042641
--- /dev/null
+++ b/src/arrow/docs/README.md
@@ -0,0 +1,30 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Apache Arrow Documentation
+
+This directory contains source files for building the main project
+documentation. This includes the [Arrow columnar format specification][2].
+
+Instructions for building the documentation site are found in
+[docs/source/developers/documentation.rst][1]. The build depends on the API
+documentation for some of the project subcomponents.
+
+[1]: https://github.com/apache/arrow/blob/master/docs/source/developers/documentation.rst
+[2]: https://github.com/apache/arrow/tree/master/docs/source/format
diff --git a/src/arrow/docs/environment.yml b/src/arrow/docs/environment.yml
new file mode 100644
index 000000000..8d1fe9bfb
--- /dev/null
+++ b/src/arrow/docs/environment.yml
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+channels:
+- defaults
+- conda-forge
+dependencies:
+- arrow-cpp
+- parquet-cpp
+- pyarrow
+- numpydoc
diff --git a/src/arrow/docs/make.bat b/src/arrow/docs/make.bat
new file mode 100644
index 000000000..36f2086c2
--- /dev/null
+++ b/src/arrow/docs/make.bat
@@ -0,0 +1,52 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements.  See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership.  The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License.  You may obtain a copy of the License at
+@rem
+@rem   http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied.  See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=_build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/src/arrow/docs/requirements.txt b/src/arrow/docs/requirements.txt
new file mode 100644
index 000000000..0dbca6922
--- /dev/null
+++ b/src/arrow/docs/requirements.txt
@@ -0,0 +1,5 @@
+breathe
+ipython
+numpydoc
+sphinx==2.4.4
+pydata-sphinx-theme
diff --git a/src/arrow/docs/source/_static/arrow.png b/src/arrow/docs/source/_static/arrow.png
new file mode 100644
index 000000000..72104b075
--- /dev/null
+++ b/src/arrow/docs/source/_static/arrow.png
diff --git a/src/arrow/docs/source/_static/favicon.ico b/src/arrow/docs/source/_static/favicon.ico
new file mode 100644
index 000000000..33a554a8a
--- /dev/null
+++ b/src/arrow/docs/source/_static/favicon.ico
diff --git a/src/arrow/docs/source/_static/theme_overrides.css b/src/arrow/docs/source/_static/theme_overrides.css
new file mode 100644
index 000000000..d7d0bdfdb
--- /dev/null
+++ b/src/arrow/docs/source/_static/theme_overrides.css
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+/* Customizing with theme CSS variables */
+
+:root {
+  --pst-color-active-navigation: 215, 70, 51;
+  --pst-color-link-hover: 215, 70, 51;
+  --pst-color-headerlink: 215, 70, 51;
+  /* Use normal text color (like h3, ..) instead of primary color */
+  --pst-color-h1: var(--color-text-base);
+  --pst-color-h2: var(--color-text-base);
+  /* Use softer blue from bootstrap's default info color */
+  --pst-color-info: 23, 162, 184;
+  --pst-header-height: 0px;
+}
+
+code {
+  color: rgb(215, 70, 51);
+}
+
+.footer {
+  text-align: center;
+}
+
+/* Ensure the logo is properly displayed */
+
+.navbar-brand {
+  height: auto;
+  width: auto;
+}
+
+a.navbar-brand img {
+  height: auto;
+  width: auto;
+  max-height: 15vh;
+  max-width: 100%;
+}
+
+
+/* This is the bootstrap CSS style for "table-striped". Since the theme does
+not yet provide an easy way to configure this globaly, it easier to simply
+include this snippet here than updating each table in all rst files to
+add ":class: table-striped" */
+
+.table tbody tr:nth-of-type(odd) {
+  background-color: rgba(0, 0, 0, 0.05);
+}
+
+/* Iprove the vertical spacing in the C++ API docs
+(ideally this should be upstreamed to the pydata-sphinx-theme */
+
+dl.cpp dd p {
+  margin-bottom:.4rem;
+}
+ 
+dl.cpp.enumerator {
+  margin-bottom: 0.2rem;
+}
+
+p.breathe-sectiondef-title {
+  margin-top: 1rem;
+}
+
+/* Limit the max height of the sidebar navigation section. Because in our
+custimized template, there is more content above the navigation, i.e.
+larger logo: if we don't decrease the max-height, it will overlap with
+the footer.
+Details: min(15vh, 110px) for the logo size, 8rem for search box etc*/
+
+@media (min-width:720px) {
+  @supports (position:-webkit-sticky) or (position:sticky) {
+    .bd-links {
+      max-height: calc(100vh - min(15vh, 110px) - 8rem)
+    }
+  }
+}
+
+/* Styling to get the version dropdown and search box side-by-side on wide screens */
+
+#version-search-wrapper {
+  overflow: hidden;
+  width: inherit;
+  display: flex;
+  flex-wrap: wrap;
+  justify-content: left;
+  align-items: center;
+}
+
+#version-button {
+  padding-left: 0.5rem;
+  padding-right: 1rem;
+}
+
+#search-box {
+  flex: 1 0 12em;
+}
+
+/* Fix table text wrapping in RTD theme,
+ * see https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html
+ */
+
+@media screen {
+    table.docutils td {
+        /* !important prevents the common CSS stylesheets from overriding
+          this as on RTD they are loaded after this stylesheet */
+        white-space: normal !important;
+    }
+}
diff --git a/src/arrow/docs/source/_static/versions.json b/src/arrow/docs/source/_static/versions.json
new file mode 100644
index 000000000..d364cfe27
--- /dev/null
+++ b/src/arrow/docs/source/_static/versions.json
@@ -0,0 +1,26 @@
+[
+    {
+        "name": "6.0 (stable)",
+        "version": ""
+    },
+    {
+        "name": "5.0",
+        "version": "5.0/"
+    },
+    {
+        "name": "4.0",
+        "version": "4.0/"
+    },
+    {
+        "name": "3.0",
+        "version": "3.0/"
+    },
+    {
+        "name": "2.0",
+        "version": "2.0/"
+    },
+    {
+        "name": "1.0",
+        "version": "1.0/"
+    }
+]
+\ No newline at end of file
diff --git a/src/arrow/docs/source/_templates/docs-sidebar.html b/src/arrow/docs/source/_templates/docs-sidebar.html
new file mode 100644
index 000000000..fde4435df
--- /dev/null
+++ b/src/arrow/docs/source/_templates/docs-sidebar.html
@@ -0,0 +1,25 @@
+
+<a class="navbar-brand" href="{{ pathto(master_doc) }}">
+  <img src="{{ pathto('_static/' + logo, 1) }}" class="logo" alt="logo">
+</a>
+
+<div id="version-search-wrapper">
+
+{% include "version-switcher.html" %}
+
+<form id="search-box" class="bd-search d-flex align-items-center" action="{{ pathto('search') }}" method="get">
+  <i class="icon fas fa-search"></i>
+  <input type="search" class="form-control" name="q" id="search-input" placeholder="{{ theme_search_bar_text }}" aria-label="{{ theme_search_bar_text }}" autocomplete="off" >
+</form>
+
+</div>
+
+<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
+  <div class="bd-toc-item active">
+    {% if "python/api" in pagename or "python/generated" in pagename %}
+    {{ generate_nav_html("sidebar", startdepth=0, maxdepth=3, collapse=False, includehidden=True, titles_only=True) }}
+    {% else %}
+    {{ generate_nav_html("sidebar", startdepth=0, maxdepth=4, collapse=False, includehidden=True, titles_only=True) }}
+    {% endif %}
+  </div>
+</nav>
diff --git a/src/arrow/docs/source/_templates/layout.html b/src/arrow/docs/source/_templates/layout.html
new file mode 100644
index 000000000..a9d0f30bc
--- /dev/null
+++ b/src/arrow/docs/source/_templates/layout.html
@@ -0,0 +1,5 @@
+{% extends "pydata_sphinx_theme/layout.html" %}
+
+{# Silence the navbar #}
+{% block docs_navbar %}
+{% endblock %}
diff --git a/src/arrow/docs/source/_templates/version-switcher.html b/src/arrow/docs/source/_templates/version-switcher.html
new file mode 100644
index 000000000..24a8c15ac
--- /dev/null
+++ b/src/arrow/docs/source/_templates/version-switcher.html
@@ -0,0 +1,60 @@
+<div id="version-button" class="dropdown">
+    <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
+        {{ release }}
+        <span class="caret"></span>
+    </button>
+    <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
+    <!-- dropdown will be populated by javascript on page load -->
+    </div>
+</div>
+
+<script type="text/javascript">
+// Function to construct the target URL from the JSON components
+function buildURL(entry) {
+    var template = "{{ switcher_template_url }}";  // supplied by jinja
+    template = template.replace("{version}", entry.version);
+    return template;
+}
+
+// Function to check if corresponding page path exists in other version of docs
+// and, if so, go there instead of the homepage of the other docs version
+function checkPageExistsAndRedirect(event) {
+    const currentFilePath = "{{ pagename }}.html",
+          otherDocsHomepage = event.target.getAttribute("href");
+    let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
+    $.ajax({
+        type: 'HEAD',
+        url: tryUrl,
+        // if the page exists, go there
+        success: function() {
+            location.href = tryUrl;
+        }
+    }).fail(function() {
+        location.href = otherDocsHomepage;
+    });
+    return false;
+}
+
+// Function to populate the version switcher
+(function () {
+    // get JSON config
+    $.getJSON("{{ switcher_json_url }}", function(data, textStatus, jqXHR) {
+        // create the nodes first (before AJAX calls) to ensure the order is
+        // correct (for now, links will go to doc version homepage)
+        $.each(data, function(index, entry) {
+            // if no custom name specified (e.g., "latest"), use version string
+            if (!("name" in entry)) {
+                entry.name = entry.version;
+            }
+            // construct the appropriate URL, and add it to the dropdown
+            entry.url = buildURL(entry);
+            const node = document.createElement("a");
+            node.setAttribute("class", "list-group-item list-group-item-action py-1");
+            node.setAttribute("href", `${entry.url}`);
+            node.textContent = `${entry.name}`;
+            node.onclick = checkPageExistsAndRedirect;
+            $("#version_switcher").append(node);
+        });
+    });
+})();
+</script>
diff --git a/src/arrow/docs/source/c_glib/index.rst b/src/arrow/docs/source/c_glib/index.rst
new file mode 100644
index 000000000..56db23f2a
--- /dev/null
+++ b/src/arrow/docs/source/c_glib/index.rst
@@ -0,0 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+C/GLib docs
+===========
+
+Stub page for the C/GLib docs; actual source is located in c_glib/doc/ sub-directory.
diff --git a/src/arrow/docs/source/conf.py b/src/arrow/docs/source/conf.py
new file mode 100644
index 000000000..150cd4181
--- /dev/null
+++ b/src/arrow/docs/source/conf.py
@@ -0,0 +1,464 @@
+# -*- coding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+
+import datetime
+import os
+import sys
+import warnings
+from unittest import mock
+
+import pyarrow
+
+
+sys.path.extend([
+    os.path.join(os.path.dirname(__file__),
+                 '..', '../..')
+
+])
+
+# Suppresses all warnings printed when sphinx is traversing the code (e.g.
+# deprecation warnings)
+warnings.filterwarnings("ignore", category=FutureWarning, message=".*pyarrow.*")
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.autosummary',
+    'sphinx.ext.doctest',
+    'sphinx.ext.ifconfig',
+    'sphinx.ext.mathjax',
+    'sphinx.ext.viewcode',
+    'sphinx.ext.napoleon',
+    'IPython.sphinxext.ipython_directive',
+    'IPython.sphinxext.ipython_console_highlighting',
+    'breathe'
+]
+
+# Show members for classes in .. autosummary
+autodoc_default_options = {
+    'members': None,
+    'undoc-members': None,
+    'show-inheritance': None,
+    'inherited-members': None
+}
+
+# Breathe configuration
+breathe_projects = {"arrow_cpp": "../../cpp/apidoc/xml"}
+breathe_default_project = "arrow_cpp"
+
+# Overriden conditionally below
+autodoc_mock_imports = []
+
+# ipython directive options
+ipython_mplbackend = ''
+
+# numpydoc configuration
+napoleon_use_rtype = False
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+
+source_suffix = ['.rst']
+
+autosummary_generate = True
+
+# The encoding of source files.
+#
+# source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'Apache Arrow'
+copyright = f'2016-{datetime.datetime.now().year} Apache Software Foundation'
+author = u'Apache Software Foundation'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = os.environ.get('ARROW_DOCS_VERSION',
+                         pyarrow.__version__)
+# The full version, including alpha/beta/rc tags.
+release = os.environ.get('ARROW_DOCS_VERSION',
+                         pyarrow.__version__)
+
+if "+" in release:
+    release = release.split(".dev")[0] + " (dev)"
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#
+# today = ''
+#
+# Else, today_fmt is used as the format for a strftime call.
+#
+# today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#
+# default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#
+# add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#
+# add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#
+# show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+# modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+# keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'pydata_sphinx_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+html_theme_options = {
+    "show_toc_level": 2,
+    "google_analytics_id": "UA-107500873-1",
+}
+
+html_context = {
+    "switcher_json_url": "/docs/_static/versions.json",
+    "switcher_template_url": "https://arrow.apache.org/docs/{version}",
+    # for local testing
+    # "switcher_template_url": "http://0.0.0.0:8000/docs/{version}",
+}
+
+# Add any paths that contain custom themes here, relative to this directory.
+# html_theme_path = []
+
+# The name for this set of Sphinx documents.
+# "<project> v<release> documentation" by default.
+#
+html_title = u'Apache Arrow v{}'.format(version)
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#
+# html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#
+html_logo = "_static/arrow.png"
+
+# The name of an image file (relative to this directory) to use as a favicon of
+# the docs.  This file should be a Windows icon file (.ico) being 16x16 or
+# 32x32 pixels large.
+#
+html_favicon = "_static/favicon.ico"
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Custom fixes to the RTD theme
+html_css_files = ['theme_overrides.css']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#
+# html_extra_path = []
+
+# If not None, a 'Last updated on:' timestamp is inserted at every page
+# bottom, using the given strftime format.
+# The empty string is equivalent to '%b %d, %Y'.
+#
+# html_last_updated_fmt = None
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#
+# html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#
+html_sidebars = {
+#    '**': ['sidebar-logo.html', 'sidebar-search-bs.html', 'sidebar-nav-bs.html'],
+    '**': ['docs-sidebar.html'],
+}
+
+# The base URL which points to the root of the HTML documentation,
+# used for canonical url
+html_baseurl = "https://arrow.apache.org/docs/"
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#
+# html_additional_pages = {}
+
+# If false, no module index is generated.
+#
+# html_domain_indices = True
+
+# If false, no index is generated.
+#
+# html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#
+# html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#
+# html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#
+# html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#
+# html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#
+# html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+# html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
+#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
+#
+# html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# 'ja' uses this config value.
+# 'zh' user can custom change `jieba` dictionary path.
+#
+# html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#
+# html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'arrowdoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+     # The paper size ('letterpaper' or 'a4paper').
+     #
+     # 'papersize': 'letterpaper',
+
+     # The font size ('10pt', '11pt' or '12pt').
+     #
+     # 'pointsize': '10pt',
+
+     # Additional stuff for the LaTeX preamble.
+     #
+     # 'preamble': '',
+
+     # Latex figure (float) alignment
+     #
+     # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'arrow.tex', u'Apache Arrow Documentation',
+     u'Apache Arrow Team', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#
+# latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#
+# latex_use_parts = False
+
+# If true, show page references after internal links.
+#
+# latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#
+# latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#
+# latex_appendices = []
+
+# It false, will not define \strong, \code, 	itleref, \crossref ... but only
+# \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added
+# packages.
+#
+# latex_keep_old_macro_names = True
+
+# If false, no module index is generated.
+#
+# latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'arrow', u'Apache Arrow Documentation',
+     [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#
+# man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'arrow', u'Apache Arrow Documentation',
+     author, 'Apache Arrow', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#
+# texinfo_appendices = []
+
+# If false, no module index is generated.
+#
+# texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#
+# texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#
+# texinfo_no_detailmenu = False
+
+
+# -- Customization --------------------------------------------------------
+
+# Conditional API doc generation
+
+# Sphinx has two features for conditional inclusion:
+# - The "only" directive
+#   https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#including-content-based-on-tags
+# - The "ifconfig" extension
+#   https://www.sphinx-doc.org/en/master/usage/extensions/ifconfig.html
+#
+# Both have issues, but "ifconfig" seems to work in this setting.
+
+try:
+    import pyarrow.cuda
+    cuda_enabled = True
+except ImportError:
+    cuda_enabled = False
+    # Mock pyarrow.cuda to avoid autodoc warnings.
+    # XXX I can't get autodoc_mock_imports to work, so mock manually instead
+    # (https://github.com/sphinx-doc/sphinx/issues/2174#issuecomment-453177550)
+    pyarrow.cuda = sys.modules['pyarrow.cuda'] = mock.Mock()
+
+try:
+    import pyarrow.flight
+    flight_enabled = True
+except ImportError:
+    flight_enabled = False
+    pyarrow.flight = sys.modules['pyarrow.flight'] = mock.Mock()
+
+
+def setup(app):
+    # Use a config value to indicate whether CUDA API docs can be generated.
+    # This will also rebuild appropriately when the value changes.
+    app.add_config_value('cuda_enabled', cuda_enabled, 'env')
+    app.add_config_value('flight_enabled', flight_enabled, 'env')
diff --git a/src/arrow/docs/source/cpp/api.rst b/src/arrow/docs/source/cpp/api.rst
new file mode 100644
index 000000000..3df16a178
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api.rst
@@ -0,0 +1,42 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+*************
+API Reference
+*************
+
+.. toctree::
+   :maxdepth: 3
+
+   api/support
+   api/memory
+   api/datatype
+   api/array
+   api/scalar
+   api/builder
+   api/table
+   api/c_abi
+   api/compute
+   api/tensor
+   api/utilities
+   api/io
+   api/ipc
+   api/formats
+   api/cuda
+   api/flight
+   api/filesystem
+   api/dataset
diff --git a/src/arrow/docs/source/cpp/api/array.rst b/src/arrow/docs/source/cpp/api/array.rst
new file mode 100644
index 000000000..7f4e71158
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/array.rst
@@ -0,0 +1,80 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+======
+Arrays
+======
+
+.. doxygenclass:: arrow::ArrayData
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::Array
+   :project: arrow_cpp
+   :members:
+
+Concrete array subclasses
+=========================
+
+Primitive and temporal
+----------------------
+
+.. doxygenclass:: arrow::NullArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::BooleanArray
+   :project: arrow_cpp
+   :members:
+
+.. doxygengroup:: numeric-arrays
+   :content-only:
+   :members:
+
+Binary-like
+-----------
+
+.. doxygengroup:: binary-arrays
+   :content-only:
+   :members:
+
+Nested
+------
+
+.. doxygengroup:: nested-arrays
+   :content-only:
+   :members:
+
+Dictionary-encoded
+------------------
+
+.. doxygenclass:: arrow::DictionaryArray
+   :members:
+
+Extension arrays
+----------------
+
+.. doxygenclass:: arrow::ExtensionArray
+   :members:
+
+
+Chunked Arrays
+==============
+
+.. doxygenclass:: arrow::ChunkedArray
+   :project: arrow_cpp
+   :members:
diff --git a/src/arrow/docs/source/cpp/api/builder.rst b/src/arrow/docs/source/cpp/api/builder.rst
new file mode 100644
index 000000000..9e6540aa5
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/builder.rst
@@ -0,0 +1,56 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+==============
+Array Builders
+==============
+
+.. doxygenclass:: arrow::ArrayBuilder
+   :members:
+
+Concrete builder subclasses
+===========================
+
+.. doxygenclass:: arrow::NullBuilder
+   :members:
+
+.. doxygenclass:: arrow::BooleanBuilder
+   :members:
+
+.. doxygenclass:: arrow::NumericBuilder
+   :members:
+
+.. doxygenclass:: arrow::BinaryBuilder
+   :members:
+
+.. doxygenclass:: arrow::StringBuilder
+   :members:
+
+.. doxygenclass:: arrow::FixedSizeBinaryBuilder
+   :members:
+
+.. doxygenclass:: arrow::Decimal128Builder
+   :members:
+
+.. doxygenclass:: arrow::ListBuilder
+   :members:
+
+.. doxygenclass:: arrow::StructBuilder
+   :members:
+
+.. doxygenclass:: arrow::DictionaryBuilder
+   :members:
diff --git a/src/arrow/docs/source/cpp/api/c_abi.rst b/src/arrow/docs/source/cpp/api/c_abi.rst
new file mode 100644
index 000000000..4e451c3ec
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/c_abi.rst
@@ -0,0 +1,48 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+============
+C Interfaces
+============
+
+.. seealso::
+   The :ref:`C data interface <c-data-interface>` and
+   :ref:`C stream interface <c-stream-interface>` specifications.
+
+ABI Structures
+==============
+
+.. doxygenstruct:: ArrowSchema
+   :project: arrow_cpp
+
+.. doxygenstruct:: ArrowArray
+   :project: arrow_cpp
+
+.. doxygenstruct:: ArrowArrayStream
+   :project: arrow_cpp
+
+C Data Interface
+================
+
+.. doxygengroup:: c-data-interface
+   :content-only:
+
+C Stream Interface
+==================
+
+.. doxygengroup:: c-stream-interface
+   :content-only:
diff --git a/src/arrow/docs/source/cpp/api/compute.rst b/src/arrow/docs/source/cpp/api/compute.rst
new file mode 100644
index 000000000..3b0a89f83
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/compute.rst
@@ -0,0 +1,56 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Compute Functions
+=================
+
+Datum class
+-----------
+
+.. doxygenclass:: arrow::Datum
+   :members:
+
+Abstract Function classes
+-------------------------
+
+.. doxygengroup:: compute-functions
+   :content-only:
+   :members:
+
+Function registry
+-----------------
+
+.. doxygenclass:: arrow::compute::FunctionRegistry
+   :members:
+
+.. doxygenfunction:: arrow::compute::GetFunctionRegistry
+
+Convenience functions
+---------------------
+
+.. doxygengroup:: compute-call-function
+   :content-only:
+
+Concrete options classes
+------------------------
+
+.. doxygengroup:: compute-concrete-options
+   :content-only:
+   :members:
+   :undoc-members:
+
+.. TODO: List concrete function invocation shortcuts?
diff --git a/src/arrow/docs/source/cpp/api/cuda.rst b/src/arrow/docs/source/cpp/api/cuda.rst
new file mode 100644
index 000000000..caeb5be31
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/cuda.rst
@@ -0,0 +1,74 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+============
+CUDA support
+============
+
+Contexts
+========
+
+.. doxygenclass:: arrow::cuda::CudaDeviceManager
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::cuda::CudaContext
+   :project: arrow_cpp
+   :members:
+
+Devices
+=======
+
+.. doxygenclass:: arrow::cuda::CudaDevice
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::cuda::CudaMemoryManager
+   :project: arrow_cpp
+   :members:
+
+Buffers
+=======
+
+.. doxygenclass:: arrow::cuda::CudaBuffer
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::cuda::CudaHostBuffer
+   :project: arrow_cpp
+   :members:
+
+Memory Input / Output
+=====================
+
+.. doxygenclass:: arrow::cuda::CudaBufferReader
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::cuda::CudaBufferWriter
+   :project: arrow_cpp
+   :members:
+
+IPC
+===
+
+.. doxygenclass:: arrow::cuda::CudaIpcMemHandle
+   :project: arrow_cpp
+   :members:
+
+.. doxygengroup:: cuda-ipc-functions
+   :content-only:
diff --git a/src/arrow/docs/source/cpp/api/dataset.rst b/src/arrow/docs/source/cpp/api/dataset.rst
new file mode 100644
index 000000000..3f0df8a45
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/dataset.rst
@@ -0,0 +1,71 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+=======
+Dataset
+=======
+
+Interface
+=========
+
+.. doxygenclass:: arrow::dataset::Fragment
+   :members:
+
+.. doxygenclass:: arrow::dataset::Dataset
+   :members:
+
+Partitioning
+============
+
+.. doxygengroup:: dataset-partitioning
+   :content-only:
+   :members:
+
+Dataset discovery/factories
+===========================
+
+.. doxygengroup:: dataset-discovery
+   :content-only:
+   :members:
+
+Scanning
+========
+
+.. doxygengroup:: dataset-scanning
+   :content-only:
+   :members:
+
+Concrete implementations
+========================
+
+.. doxygengroup:: dataset-implementations
+   :content-only:
+   :members:
+
+File System Datasets
+--------------------
+
+.. doxygengroup:: dataset-filesystem
+   :content-only:
+   :members:
+
+File Formats
+------------
+
+.. doxygengroup:: dataset-file-formats
+   :content-only:
+   :members:
diff --git a/src/arrow/docs/source/cpp/api/datatype.rst b/src/arrow/docs/source/cpp/api/datatype.rst
new file mode 100644
index 000000000..2cbe1cf4d
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/datatype.rst
@@ -0,0 +1,102 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+==========
+Data Types
+==========
+
+.. doxygenenum:: arrow::Type::type
+
+.. doxygenclass:: arrow::DataType
+   :members:
+
+.. _api-type-factories:
+
+Factory functions
+=================
+
+These functions are recommended for creating data types.  They may return
+new objects or existing singletons, depending on the type requested.
+
+.. doxygengroup:: type-factories
+   :project: arrow_cpp
+   :content-only:
+
+Concrete type subclasses
+========================
+
+Primitive
+---------
+
+.. doxygenclass:: arrow::NullType
+   :members:
+
+.. doxygenclass:: arrow::BooleanType
+   :members:
+
+.. doxygengroup:: numeric-datatypes
+   :content-only:
+   :members:
+
+Temporal
+--------
+
+.. doxygenenum:: arrow::TimeUnit::type
+
+.. doxygengroup:: temporal-datatypes
+   :content-only:
+   :members:
+
+Binary-like
+-----------
+
+.. doxygengroup:: binary-datatypes
+   :content-only:
+   :members:
+
+Nested
+------
+
+.. doxygengroup:: nested-datatypes
+   :content-only:
+   :members:
+
+Dictionary-encoded
+------------------
+
+.. doxygenclass:: arrow::DictionaryType
+   :members:
+
+Extension types
+---------------
+
+.. doxygenclass:: arrow::ExtensionType
+   :members:
+
+
+Fields and Schemas
+==================
+
+.. doxygengroup:: schema-factories
+   :project: arrow_cpp
+   :content-only:
+
+.. doxygenclass:: arrow::Field
+   :members:
+
+.. doxygenclass:: arrow::Schema
+   :members:
diff --git a/src/arrow/docs/source/cpp/api/filesystem.rst b/src/arrow/docs/source/cpp/api/filesystem.rst
new file mode 100644
index 000000000..02fff9a6c
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/filesystem.rst
@@ -0,0 +1,64 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+===========
+Filesystems
+===========
+
+Interface
+=========
+
+.. doxygenenum:: arrow::fs::FileType
+
+.. doxygenstruct:: arrow::fs::FileInfo
+   :members:
+
+.. doxygenstruct:: arrow::fs::FileSelector
+   :members:
+
+.. doxygenclass:: arrow::fs::FileSystem
+   :members:
+
+High-level factory function
+===========================
+
+.. doxygengroup:: filesystem-factories
+   :content-only:
+
+Concrete implementations
+========================
+
+.. doxygenclass:: arrow::fs::SubTreeFileSystem
+   :members:
+
+.. doxygenstruct:: arrow::fs::LocalFileSystemOptions
+   :members:
+
+.. doxygenclass:: arrow::fs::LocalFileSystem
+   :members:
+
+.. doxygenstruct:: arrow::fs::S3Options
+   :members:
+
+.. doxygenclass:: arrow::fs::S3FileSystem
+   :members:
+
+.. doxygenstruct:: arrow::fs::HdfsOptions
+   :members:
+
+.. doxygenclass:: arrow::fs::HadoopFileSystem
+   :members:
diff --git a/src/arrow/docs/source/cpp/api/flight.rst b/src/arrow/docs/source/cpp/api/flight.rst
new file mode 100644
index 000000000..7cefd66ef
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/flight.rst
@@ -0,0 +1,202 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+================
+Arrow Flight RPC
+================
+
+.. note:: Flight is currently unstable. APIs are subject to change,
+          though we don't expect drastic changes.
+
+Common Types
+============
+
+.. doxygenstruct:: arrow::flight::Action
+   :project: arrow_cpp
+   :members:
+
+.. doxygenstruct:: arrow::flight::ActionType
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::AddCallHeaders
+   :project: arrow_cpp
+   :members:
+
+.. doxygenstruct:: arrow::flight::CallInfo
+   :project: arrow_cpp
+   :members:
+
+.. doxygenstruct:: arrow::flight::Criteria
+   :project: arrow_cpp
+   :members:
+
+.. doxygenstruct:: arrow::flight::FlightDescriptor
+   :project: arrow_cpp
+   :members:
+
+.. doxygenstruct:: arrow::flight::FlightEndpoint
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::FlightInfo
+   :project: arrow_cpp
+   :members:
+
+.. doxygenstruct:: arrow::flight::FlightPayload
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::FlightListing
+   :project: arrow_cpp
+   :members:
+
+.. doxygenenum:: arrow::flight::FlightMethod
+   :project: arrow_cpp
+
+.. doxygenstruct:: arrow::flight::Location
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::MetadataRecordBatchReader
+   :project: arrow_cpp
+   :members:
+
+.. doxygenstruct:: arrow::flight::Result
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::ResultStream
+   :project: arrow_cpp
+   :members:
+
+.. doxygenstruct:: arrow::flight::Ticket
+   :project: arrow_cpp
+   :members:
+
+Clients
+=======
+
+.. doxygenclass:: arrow::flight::FlightClient
+   :project: arrow_cpp
+   :members:
+
+.. doxygenstruct:: arrow::flight::FlightClientOptions
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::FlightCallOptions
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::ClientAuthHandler
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::ClientMiddleware
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::ClientMiddlewareFactory
+   :project: arrow_cpp
+   :members:
+
+.. doxygentypedef:: arrow::flight::TimeoutDuration
+   :project: arrow_cpp
+
+.. doxygenclass:: arrow::flight::FlightStreamReader
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::FlightStreamWriter
+   :project: arrow_cpp
+   :members:
+
+Servers
+=======
+
+.. doxygenclass:: arrow::flight::FlightServerBase
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::FlightServerOptions
+   :project: arrow_cpp
+   :members:
+
+.. doxygenstruct:: arrow::flight::CertKeyPair
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::FlightDataStream
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::FlightMessageReader
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::FlightMetadataWriter
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::RecordBatchStream
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::ServerAuthHandler
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::ServerCallContext
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::ServerMiddleware
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::ServerMiddlewareFactory
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::SimpleFlightListing
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::flight::SimpleResultStream
+   :project: arrow_cpp
+   :members:
+
+Error Handling
+==============
+
+Error handling uses the normal :class:`arrow::Status` class, combined
+with a custom :class:`arrow::StatusDetail` object for Flight-specific
+error codes.
+
+.. doxygenenum:: arrow::flight::FlightStatusCode
+   :project: arrow_cpp
+
+.. doxygenclass:: arrow::flight::FlightStatusDetail
+   :project: arrow_cpp
+   :members:
+
+.. doxygenfunction:: arrow::flight::MakeFlightError
+   :project: arrow_cpp
diff --git a/src/arrow/docs/source/cpp/api/formats.rst b/src/arrow/docs/source/cpp/api/formats.rst
new file mode 100644
index 000000000..2f6b24802
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/formats.rst
@@ -0,0 +1,109 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+============
+File Formats
+============
+
+.. _cpp-api-csv:
+
+CSV
+===
+
+.. doxygenstruct:: arrow::csv::ConvertOptions
+   :members:
+
+.. doxygenstruct:: arrow::csv::ParseOptions
+   :members:
+
+.. doxygenstruct:: arrow::csv::ReadOptions
+   :members:
+
+.. doxygenstruct:: arrow::csv::WriteOptions
+   :members:
+
+.. doxygenclass:: arrow::csv::TableReader
+   :members:
+
+.. doxygenfunction:: arrow::csv::MakeCSVWriter(io::OutputStream *, const std::shared_ptr<Schema>&, const WriteOptions&)
+
+.. doxygenfunction:: arrow::csv::MakeCSVWriter(std::shared_ptr<io::OutputStream>, const std::shared_ptr<Schema>&, const WriteOptions&)
+
+.. doxygenfunction:: arrow::csv::WriteCSV(const RecordBatch&, const WriteOptions&, arrow::io::OutputStream *)
+
+.. doxygenfunction:: arrow::csv::WriteCSV(const Table&, const WriteOptions&, arrow::io::OutputStream *)
+
+.. _cpp-api-json:
+
+Line-separated JSON
+===================
+
+.. doxygenenum:: arrow::json::UnexpectedFieldBehavior
+
+.. doxygenstruct:: arrow::json::ReadOptions
+   :members:
+
+.. doxygenstruct:: arrow::json::ParseOptions
+   :members:
+
+.. doxygenclass:: arrow::json::TableReader
+   :members:
+
+.. _cpp-api-parquet:
+
+Parquet reader
+==============
+
+.. doxygenclass:: parquet::ReaderProperties
+   :members:
+
+.. doxygenclass:: parquet::ArrowReaderProperties
+   :members:
+
+.. doxygenclass:: parquet::ParquetFileReader
+   :members:
+
+.. doxygenclass:: parquet::arrow::FileReader
+   :members:
+
+.. doxygenclass:: parquet::arrow::FileReaderBuilder
+   :members:
+
+.. doxygengroup:: parquet-arrow-reader-factories
+   :content-only:
+
+.. doxygenclass:: parquet::StreamReader
+   :members:
+
+Parquet writer
+==============
+
+.. doxygenclass:: parquet::WriterProperties
+   :members:
+
+.. doxygenclass:: parquet::ArrowWriterProperties
+   :members:
+
+.. doxygenclass:: parquet::arrow::FileWriter
+   :members:
+
+.. doxygenfunction:: parquet::arrow::WriteTable
+
+.. doxygenclass:: parquet::StreamWriter
+   :members:
+
+.. TODO ORC
diff --git a/src/arrow/docs/source/cpp/api/io.rst b/src/arrow/docs/source/cpp/api/io.rst
new file mode 100644
index 000000000..735136a0d
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/io.rst
@@ -0,0 +1,95 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+==============
+Input / output
+==============
+
+Interfaces
+==========
+
+.. doxygenclass:: arrow::io::FileInterface
+   :members:
+
+.. doxygenclass:: arrow::io::Readable
+   :members:
+
+.. doxygenclass:: arrow::io::Seekable
+   :members:
+
+.. doxygenclass:: arrow::io::Writable
+   :members:
+
+.. doxygenclass:: arrow::io::InputStream
+   :members:
+
+.. doxygenclass:: arrow::io::RandomAccessFile
+   :members:
+
+.. doxygenclass:: arrow::io::OutputStream
+   :members:
+
+.. doxygenclass:: arrow::io::ReadWriteFileInterface
+   :members:
+
+Concrete implementations
+========================
+
+In-memory streams
+-----------------
+
+.. doxygenclass:: arrow::io::BufferReader
+   :members:
+
+.. doxygenclass:: arrow::io::MockOutputStream
+   :members:
+
+.. doxygenclass:: arrow::io::BufferOutputStream
+   :members:
+
+.. doxygenclass:: arrow::io::FixedSizeBufferWriter
+   :members:
+
+Local files
+-----------
+
+.. doxygenclass:: arrow::io::ReadableFile
+   :members:
+
+.. doxygenclass:: arrow::io::FileOutputStream
+   :members:
+
+.. doxygenclass:: arrow::io::MemoryMappedFile
+   :members:
+
+Buffering input / output wrappers
+---------------------------------
+
+.. doxygenclass:: arrow::io::BufferedInputStream
+   :members:
+
+.. doxygenclass:: arrow::io::BufferedOutputStream
+   :members:
+
+Compressed input / output wrappers
+----------------------------------
+
+.. doxygenclass:: arrow::io::CompressedInputStream
+   :members:
+
+.. doxygenclass:: arrow::io::CompressedOutputStream
+   :members:
diff --git a/src/arrow/docs/source/cpp/api/ipc.rst b/src/arrow/docs/source/cpp/api/ipc.rst
new file mode 100644
index 000000000..6822b986a
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/ipc.rst
@@ -0,0 +1,90 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+=========
+Arrow IPC
+=========
+
+IPC options
+===========
+
+.. doxygenstruct:: arrow::ipc::IpcReadOptions
+   :members:
+
+.. doxygenstruct:: arrow::ipc::IpcWriteOptions
+   :members:
+
+Reading IPC streams and files
+=============================
+
+Blocking API
+------------
+
+Use either of these two classes, depending on which IPC format you want
+to read.  The file format requires a random-access file, while the stream
+format only requires a sequential input stream.
+
+.. doxygenclass:: arrow::ipc::RecordBatchStreamReader
+   :members:
+
+.. doxygenclass:: arrow::ipc::RecordBatchFileReader
+   :members:
+
+Event-driven API
+----------------
+
+To read an IPC stream in event-driven fashion, you must implement a
+:class:`~arrow::ipc::Listener` subclass that you will pass to
+:class:`~arrow::ipc::StreamDecoder`.
+
+.. doxygenclass:: arrow::ipc::Listener
+   :members:
+
+.. doxygenclass:: arrow::ipc::StreamDecoder
+   :members:
+
+Statistics
+----------
+
+.. doxygenstruct:: arrow::ipc::ReadStats
+   :members:
+
+Writing IPC streams and files
+=============================
+
+Blocking API
+------------
+
+The IPC stream format is only optionally terminated, whereas the IPC file format
+must include a terminating footer. Thus a writer of the IPC file format must be
+explicitly finalized with :func:`~arrow::ipc::RecordBatchWriter::Close()` or the resulting
+file will be corrupt.
+
+.. doxygengroup:: record-batch-writer-factories
+   :content-only:
+
+.. doxygenclass:: arrow::ipc::RecordBatchWriter
+   :members:
+
+Statistics
+----------
+
+.. doxygenstruct:: arrow::ipc::WriteStats
+   :members:
diff --git a/src/arrow/docs/source/cpp/api/memory.rst b/src/arrow/docs/source/cpp/api/memory.rst
new file mode 100644
index 000000000..807a4e2f7
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/memory.rst
@@ -0,0 +1,124 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Memory (management)
+===================
+
+Devices
+-------
+
+.. doxygenclass:: arrow::Device
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::CPUDevice
+   :project: arrow_cpp
+   :members:
+
+.. doxygenfunction:: arrow::default_cpu_memory_manager
+   :project: arrow_cpp
+
+Memory Managers
+---------------
+
+.. doxygenclass:: arrow::MemoryManager
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::CPUMemoryManager
+   :project: arrow_cpp
+   :members:
+
+Buffers
+-------
+
+.. doxygenclass:: arrow::Buffer
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::MutableBuffer
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::ResizableBuffer
+   :project: arrow_cpp
+   :members:
+
+Memory Pools
+------------
+
+.. doxygenfunction:: arrow::default_memory_pool
+   :project: arrow_cpp
+
+.. doxygenfunction:: arrow::jemalloc_memory_pool
+   :project: arrow_cpp
+
+.. doxygenfunction:: arrow::mimalloc_memory_pool
+   :project: arrow_cpp
+
+.. doxygenfunction:: arrow::system_memory_pool
+   :project: arrow_cpp
+
+.. doxygenclass:: arrow::MemoryPool
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::LoggingMemoryPool
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::ProxyMemoryPool
+   :project: arrow_cpp
+   :members:
+
+Allocation Functions
+--------------------
+
+These functions allocate a buffer from a particular memory pool.
+
+.. doxygengroup:: buffer-allocation-functions
+   :project: arrow_cpp
+   :content-only:
+
+Slicing
+-------
+
+.. doxygengroup:: buffer-slicing-functions
+   :project: arrow_cpp
+   :content-only:
+
+Buffer Builders
+---------------
+
+.. doxygenclass:: arrow::BufferBuilder
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::TypedBufferBuilder
+   :project: arrow_cpp
+   :members:
+
+STL Integration
+---------------
+
+.. doxygenclass:: arrow::stl::allocator
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::stl::STLMemoryPool
+   :project: arrow_cpp
+   :members:
diff --git a/src/arrow/docs/source/cpp/api/scalar.rst b/src/arrow/docs/source/cpp/api/scalar.rst
new file mode 100644
index 000000000..391c9d57b
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/scalar.rst
@@ -0,0 +1,38 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+=======
+Scalars
+=======
+
+.. doxygenstruct:: arrow::Scalar
+   :project: arrow_cpp
+   :members:
+
+Factory functions
+=================
+
+.. doxygengroup:: scalar-factories
+   :content-only:
+
+Concrete scalar subclasses
+==========================
+
+.. doxygengroup:: concrete-scalar-classes
+   :content-only:
+   :members:
+   :undoc-members:
diff --git a/src/arrow/docs/source/cpp/api/support.rst b/src/arrow/docs/source/cpp/api/support.rst
new file mode 100644
index 000000000..c3310e5d8
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/support.rst
@@ -0,0 +1,57 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+===================
+Programming Support
+===================
+
+General information
+-------------------
+
+.. doxygenfunction:: arrow::GetBuildInfo
+   :project: arrow_cpp
+
+.. doxygenstruct:: arrow::BuildInfo
+   :project: arrow_cpp
+   :members:
+
+Error return and reporting
+--------------------------
+
+.. doxygenclass:: arrow::Status
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::StatusDetail
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::Result
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: parquet::ParquetException
+   :project: arrow_cpp
+   :members:
+
+.. doxygendefine:: ARROW_RETURN_NOT_OK
+
+.. doxygendefine:: ARROW_ASSIGN_OR_RAISE
+
+.. doxygendefine:: PARQUET_THROW_NOT_OK
+
+.. doxygendefine:: PARQUET_ASSIGN_OR_THROW
diff --git a/src/arrow/docs/source/cpp/api/table.rst b/src/arrow/docs/source/cpp/api/table.rst
new file mode 100644
index 000000000..53e2d72e6
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/table.rst
@@ -0,0 +1,45 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+========================
+Two-dimensional Datasets
+========================
+
+Record Batches
+==============
+
+.. doxygenclass:: arrow::RecordBatch
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::RecordBatchReader
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::TableBatchReader
+   :project: arrow_cpp
+   :members:
+
+Tables
+======
+
+.. doxygenclass:: arrow::Table
+   :project: arrow_cpp
+   :members:
+
+.. doxygenfunction:: arrow::ConcatenateTables
+   :project: arrow_cpp
diff --git a/src/arrow/docs/source/cpp/api/tensor.rst b/src/arrow/docs/source/cpp/api/tensor.rst
new file mode 100644
index 000000000..1d51786db
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/tensor.rst
@@ -0,0 +1,57 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+=======
+Tensors
+=======
+
+Dense Tensors
+=============
+
+.. doxygenclass:: arrow::Tensor
+   :members:
+
+.. doxygenclass:: arrow::NumericTensor
+   :members:
+
+Sparse Tensors
+==============
+
+.. doxygenenum:: arrow::SparseTensorFormat::type
+
+.. doxygenclass:: arrow::SparseIndex
+   :members:
+
+.. doxygenclass:: arrow::SparseCOOIndex
+   :members:
+
+.. doxygenclass:: arrow::SparseCSRIndex
+   :members:
+
+.. doxygenclass:: arrow::SparseTensor
+   :members:
+
+.. doxygenclass:: arrow::SparseTensorImpl
+   :members:
+
+.. doxygentypedef:: arrow::SparseCOOTensor
+
+.. doxygentypedef:: arrow::SparseCSCMatrix
+
+.. doxygentypedef:: arrow::SparseCSFTensor
+
+.. doxygentypedef:: arrow::SparseCSRMatrix
diff --git a/src/arrow/docs/source/cpp/api/utilities.rst b/src/arrow/docs/source/cpp/api/utilities.rst
new file mode 100644
index 000000000..87c5a3bbe
--- /dev/null
+++ b/src/arrow/docs/source/cpp/api/utilities.rst
@@ -0,0 +1,52 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+=========
+Utilities
+=========
+
+Decimal Numbers
+===============
+
+.. doxygenclass:: arrow::Decimal128
+   :project: arrow_cpp
+   :members:
+
+Abstract Sequences
+==================
+
+.. doxygenclass:: arrow::Iterator
+   :project: arrow_cpp
+   :members:
+
+.. doxygenclass:: arrow::VectorIterator
+   :project: arrow_cpp
+   :members:
+
+Compression
+===========
+
+.. doxygenenum:: arrow::Compression::type
+
+.. doxygenclass:: arrow::util::Codec
+   :members:
+
+.. doxygenclass:: arrow::util::Compressor
+   :members:
+
+.. doxygenclass:: arrow::util::Decompressor
+   :members:
diff --git a/src/arrow/docs/source/cpp/arrays.rst b/src/arrow/docs/source/cpp/arrays.rst
new file mode 100644
index 000000000..ff76e9d02
--- /dev/null
+++ b/src/arrow/docs/source/cpp/arrays.rst
@@ -0,0 +1,225 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+======
+Arrays
+======
+
+.. seealso::
+   :doc:`Array API reference <api/array>`
+
+The central type in Arrow is the class :class:`arrow::Array`.   An array
+represents a known-length sequence of values all having the same type.
+Internally, those values are represented by one or several buffers, the
+number and meaning of which depend on the array's data type, as documented
+in :ref:`the Arrow data layout specification <format_layout>`.
+
+Those buffers consist of the value data itself and an optional bitmap buffer
+that indicates which array entries are null values.  The bitmap buffer
+can be entirely omitted if the array is known to have zero null values.
+
+There are concrete subclasses of :class:`arrow::Array` for each data type,
+that help you access individual values of the array.
+
+Building an array
+=================
+
+Available strategies
+--------------------
+
+As Arrow objects are immutable, they cannot be populated directly like for
+example a ``std::vector``.  Instead, several strategies can be used:
+
+* if the data already exists in memory with the right layout, you can wrap
+  said memory inside :class:`arrow::Buffer` instances and then construct
+  a :class:`arrow::ArrowData` describing the array;
+
+  .. seealso:: :ref:`cpp_memory_management`
+
+* otherwise, the :class:`arrow::ArrayBuilder` base class and its concrete
+  subclasses help building up array data incrementally, without having to
+  deal with details of the Arrow format yourself.
+
+Using ArrayBuilder and its subclasses
+-------------------------------------
+
+To build an ``Int64`` Arrow array, we can use the :class:`arrow::Int64Builder`
+class. In the following example, we build an array of the range 1 to 8 where
+the element that should hold the value 4 is nulled::
+
+   arrow::Int64Builder builder;
+   builder.Append(1);
+   builder.Append(2);
+   builder.Append(3);
+   builder.AppendNull();
+   builder.Append(5);
+   builder.Append(6);
+   builder.Append(7);
+   builder.Append(8);
+
+   auto maybe_array = builder.Finish();
+   if (!maybe_array.ok()) {
+      // ... do something on array building failure
+   }
+   std::shared_ptr<arrow::Array> array = *maybe_array;
+
+The resulting Array (which can be casted to the concrete :class:`arrow::Int64Array`
+subclass if you want to access its values) then consists of two
+:class:`arrow::Buffer`\s.
+The first buffer holds the null bitmap, which consists here of a single byte with
+the bits ``1|1|1|1|0|1|1|1``. As we use  `least-significant bit (LSB) numbering`_.
+this indicates that the fourth entry in the array is null. The second
+buffer is simply an ``int64_t`` array containing all the above values.
+As the fourth entry is null, the value at that position in the buffer is
+undefined.
+
+Here is how you could access the concrete array's contents::
+
+   // Cast the Array to its actual type to access its data
+   auto int64_array = std::static_pointer_cast<arrow::Int64Array>(array);
+
+   // Get the pointer to the null bitmap.
+   const uint8_t* null_bitmap = int64_array->null_bitmap_data();
+
+   // Get the pointer to the actual data
+   const int64_t* data = int64_array->raw_values();
+
+   // Alternatively, given an array index, query its null bit and value directly
+   int64_t index = 2;
+   if (!int64_array->IsNull(index)) {
+      int64_t value = int64_array->Value(index);
+   }
+
+.. note::
+   :class:`arrow::Int64Array` (respectively :class:`arrow::Int64Builder`) is
+   just a ``typedef``, provided for convenience, of ``arrow::NumericArray<Int64Type>``
+   (respectively ``arrow::NumericBuilder<Int64Type>``).
+
+.. _least-significant bit (LSB) numbering: https://en.wikipedia.org/wiki/Bit_numbering
+
+Performance
+-----------
+
+While it is possible to build an array value-by-value as in the example above,
+to attain highest performance it is recommended to use the bulk appending
+methods (usually named ``AppendValues``) in the concrete :class:`arrow::ArrayBuilder`
+subclasses.
+
+If you know the number of elements in advance, it is also recommended to
+presize the working area by calling the :func:`~arrow::ArrayBuilder::Resize`
+or :func:`~arrow::ArrayBuilder::Reserve` methods.
+
+Here is how one could rewrite the above example to take advantage of those
+APIs::
+
+   arrow::Int64Builder builder;
+   // Make place for 8 values in total
+   builder.Reserve(8);
+   // Bulk append the given values (with a null in 4th place as indicated by the
+   // validity vector)
+   std::vector<bool> validity = {true, true, true, false, true, true, true, true};
+   std::vector<int64_t> values = {1, 2, 3, 0, 5, 6, 7, 8};
+   builder.AppendValues(values, validity);
+
+   auto maybe_array = builder.Finish();
+
+If you still must append values one by one, some concrete builder subclasses
+have methods marked "Unsafe" that assume the working area has been correctly
+presized, and offer higher performance in exchange::
+
+   arrow::Int64Builder builder;
+   // Make place for 8 values in total
+   builder.Reserve(8);
+   builder.UnsafeAppend(1);
+   builder.UnsafeAppend(2);
+   builder.UnsafeAppend(3);
+   builder.UnsafeAppendNull();
+   builder.UnsafeAppend(5);
+   builder.UnsafeAppend(6);
+   builder.UnsafeAppend(7);
+   builder.UnsafeAppend(8);
+
+   auto maybe_array = builder.Finish();
+
+Size Limitations and Recommendations
+====================================
+
+Some array types are structurally limited to 32-bit sizes.  This is the case
+for list arrays (which can hold up to 2^31 elements), string arrays and binary
+arrays (which can hold up to 2GB of binary data), at least.  Some other array
+types can hold up to 2^63 elements in the C++ implementation, but other Arrow
+implementations can have a 32-bit size limitation for those array types as well.
+
+For these reasons, it is recommended that huge data be chunked in subsets of
+more reasonable size.
+
+Chunked Arrays
+==============
+
+A :class:`arrow::ChunkedArray` is, like an array, a logical sequence of values;
+but unlike a simple array, a chunked array does not require the entire sequence
+to be physically contiguous in memory.  Also, the constituents of a chunked array
+need not have the same size, but they must all have the same data type.
+
+A chunked array is constructed by aggregating any number of arrays.  Here we'll
+build a chunked array with the same logical values as in the example above,
+but in two separate chunks::
+
+   std::vector<std::shared_ptr<arrow::Array>> chunks;
+   std::shared_ptr<arrow::Array> array;
+
+   // Build first chunk
+   arrow::Int64Builder builder;
+   builder.Append(1);
+   builder.Append(2);
+   builder.Append(3);
+   if (!builder.Finish(&array).ok()) {
+      // ... do something on array building failure
+   }
+   chunks.push_back(std::move(array));
+
+   // Build second chunk
+   builder.Reset();
+   builder.AppendNull();
+   builder.Append(5);
+   builder.Append(6);
+   builder.Append(7);
+   builder.Append(8);
+   if (!builder.Finish(&array).ok()) {
+      // ... do something on array building failure
+   }
+   chunks.push_back(std::move(array));
+
+   auto chunked_array = std::make_shared<arrow::ChunkedArray>(std::move(chunks));
+
+   assert(chunked_array->num_chunks() == 2);
+   // Logical length in number of values
+   assert(chunked_array->length() == 8);
+   assert(chunked_array->null_count() == 1);
+
+Slicing
+=======
+
+Like for physical memory buffers, it is possible to make zero-copy slices
+of arrays and chunked arrays, to obtain an array or chunked array referring
+to some logical subsequence of the data.  This is done by calling the
+:func:`arrow::Array::Slice` and :func:`arrow::ChunkedArray::Slice` methods,
+respectively.
+
diff --git a/src/arrow/docs/source/cpp/build_system.rst b/src/arrow/docs/source/cpp/build_system.rst
new file mode 100644
index 000000000..c0d05e9da
--- /dev/null
+++ b/src/arrow/docs/source/cpp/build_system.rst
@@ -0,0 +1,136 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+===================================
+Using Arrow C++ in your own project
+===================================
+
+This section assumes you already have the Arrow C++ libraries on your
+system, either after installing them using a package manager or after
+:ref:`building them yourself <building-arrow-cpp>`.
+
+The recommended way to integrate the Arrow C++ libraries in your own
+C++ project is to use CMake's `find_package
+<https://cmake.org/cmake/help/latest/command/find_package.html>`_
+function for locating and integrating dependencies. If you don't use
+CMake as a build system, you can use `pkg-config
+<https://www.freedesktop.org/wiki/Software/pkg-config/>`_ to find
+installed the Arrow C++ libraries.
+
+CMake
+=====
+
+Basic usage
+-----------
+
+This minimal ``CMakeLists.txt`` file compiles a ``my_example.cc`` source
+file into an executable linked with the Arrow C++ shared library:
+
+.. code-block:: cmake
+
+   project(MyExample)
+
+   find_package(Arrow REQUIRED)
+
+   add_executable(my_example my_example.cc)
+   target_link_libraries(my_example PRIVATE arrow_shared)
+
+Available variables and targets
+-------------------------------
+
+The directive ``find_package(Arrow REQUIRED)`` asks CMake to find an Arrow
+C++ installation on your system.  When it returns, it will have set a few
+CMake variables:
+
+* ``${Arrow_FOUND}`` is true if the Arrow C++ libraries have been found
+* ``${ARROW_VERSION}`` contains the Arrow version string
+* ``${ARROW_FULL_SO_VERSION}`` contains the Arrow DLL version string
+
+In addition, it will have created some targets that you can link against
+(note these are plain strings, not variables):
+
+* ``arrow_shared`` links to the Arrow shared libraries
+* ``arrow_static`` links to the Arrow static libraries
+
+In most cases, it is recommended to use the Arrow shared libraries.
+
+.. note::
+   CMake is case-sensitive.  The names and variables listed above have to be
+   spelt exactly that way!
+
+.. seealso::
+   A Docker-based :doc:`minimal build example <examples/cmake_minimal_build>`.
+
+pkg-config
+==========
+
+Basic usage
+-----------
+
+You can get suitable build flags by the following command line:
+
+.. code-block:: shell
+
+   pkg-config --cflags --libs arrow
+
+If you want to link the Arrow C++ static library, you need to add
+``--static`` option:
+
+.. code-block:: shell
+
+   pkg-config --cflags --libs --static arrow
+
+This minimal ``Makefile`` file compiles a ``my_example.cc`` source
+file into an executable linked with the Arrow C++ shared library:
+
+.. code-block:: makefile
+
+   my_example: my_example.cc
+   	$(CXX) -o $@ $(CXXFLAGS) $< $$(pkg-config --cflags --libs arrow)
+
+Many build systems support pkg-config. For example:
+
+  * `GNU Autotools <https://people.freedesktop.org/~dbn/pkg-config-guide.html#using>`_
+  * `CMake <https://cmake.org/cmake/help/latest/module/FindPkgConfig.html>`_
+    (But you should use ``find_package(Arrow)`` instead.)
+  * `Meson <https://mesonbuild.com/Reference-manual.html#dependency>`_
+
+Available packages
+------------------
+
+The Arrow C++ provides a pkg-config package for each module. Here are
+all available packages:
+
+  * ``arrow-csv``
+  * ``arrow-cuda``
+  * ``arrow-dataset``
+  * ``arrow-filesystem``
+  * ``arrow-flight-testing``
+  * ``arrow-flight``
+  * ``arrow-json``
+  * ``arrow-orc``
+  * ``arrow-python-flight``
+  * ``arrow-python``
+  * ``arrow-tensorflow``
+  * ``arrow-testing``
+  * ``arrow``
+  * ``gandiva``
+  * ``parquet``
+  * ``plasma``
diff --git a/src/arrow/docs/source/cpp/compute.rst b/src/arrow/docs/source/cpp/compute.rst
new file mode 100644
index 000000000..dd5696020
--- /dev/null
+++ b/src/arrow/docs/source/cpp/compute.rst
@@ -0,0 +1,1606 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+.. cpp:namespace:: arrow::compute
+
+=================
+Compute Functions
+=================
+
+The generic Compute API
+=======================
+
+.. TODO: describe API and how to invoke compute functions
+
+Functions and function registry
+-------------------------------
+
+Functions represent compute operations over inputs of possibly varying
+types.  Internally, a function is implemented by one or several
+"kernels", depending on the concrete input types (for example, a function
+adding values from two inputs can have different kernels depending on
+whether the inputs are integral or floating-point).
+
+Functions are stored in a global :class:`FunctionRegistry` where
+they can be looked up by name.
+
+Input shapes
+------------
+
+Computation inputs are represented as a general :class:`Datum` class,
+which is a tagged union of several shapes of data such as :class:`Scalar`,
+:class:`Array` and :class:`ChunkedArray`.  Many compute functions support
+both array (chunked or not) and scalar inputs, however some will mandate
+either.  For example, while ``sort_indices`` requires its first and only
+input to be an array.
+
+.. _invoking-compute-functions:
+
+Invoking functions
+------------------
+
+Compute functions can be invoked by name using
+:func:`arrow::compute::CallFunction`::
+
+   std::shared_ptr<arrow::Array> numbers_array = ...;
+   std::shared_ptr<arrow::Scalar> increment = ...;
+   arrow::Datum incremented_datum;
+
+   ARROW_ASSIGN_OR_RAISE(incremented_datum,
+                         arrow::compute::CallFunction("add", {numbers_array, increment}));
+   std::shared_ptr<Array> incremented_array = std::move(incremented_datum).array();
+
+(note this example uses implicit conversion from ``std::shared_ptr<Array>``
+to ``Datum``)
+
+Many compute functions are also available directly as concrete APIs, here
+:func:`arrow::compute::Add`::
+
+   std::shared_ptr<arrow::Array> numbers_array = ...;
+   std::shared_ptr<arrow::Scalar> increment = ...;
+   arrow::Datum incremented_datum;
+
+   ARROW_ASSIGN_OR_RAISE(incremented_datum,
+                         arrow::compute::Add(numbers_array, increment));
+   std::shared_ptr<Array> incremented_array = std::move(incremented_datum).array();
+
+Some functions accept or require an options structure that determines the
+exact semantics of the function::
+
+   ScalarAggregateOptions scalar_aggregate_options;
+   scalar_aggregate_options.skip_nulls = false;
+
+   std::shared_ptr<arrow::Array> array = ...;
+   arrow::Datum min_max;
+
+   ARROW_ASSIGN_OR_RAISE(min_max,
+                         arrow::compute::CallFunction("min_max", {array},
+                                                      &scalar_aggregate_options));
+
+   // Unpack struct scalar result (a two-field {"min", "max"} scalar)
+   std::shared_ptr<arrow::Scalar> min_value, max_value;
+   min_value = min_max.scalar_as<arrow::StructScalar>().value[0];
+   max_value = min_max.scalar_as<arrow::StructScalar>().value[1];
+
+.. seealso::
+   :doc:`Compute API reference <api/compute>`
+
+Implicit casts
+==============
+
+Functions may require conversion of their arguments before execution if a
+kernel does not match the argument types precisely. For example comparison
+of dictionary encoded arrays is not directly supported by any kernel, but an
+implicit cast can be made allowing comparison against the decoded array.
+
+Each function may define implicit cast behaviour as appropriate. For example
+comparison and arithmetic kernels require identically typed arguments, and
+support execution against differing numeric types by promoting their arguments
+to numeric type which can accommodate any value from either input.
+
+.. _common-numeric-type:
+
+Common numeric type
+-------------------
+
+The common numeric type of a set of input numeric types is the smallest numeric
+type which can accommodate any value of any input. If any input is a floating
+point type the common numeric type is the widest floating point type among the
+inputs. Otherwise the common numeric type is integral and is signed if any input
+is signed. For example:
+
++-------------------+----------------------+------------------------------------------------+
+| Input types       | Common numeric type  | Notes                                          |
++===================+======================+================================================+
+| int32, int32      | int32                |                                                |
++-------------------+----------------------+------------------------------------------------+
+| int16, int32      | int32                | Max width is 32, promote LHS to int32          |
++-------------------+----------------------+------------------------------------------------+
+| uint16, int32     | int32                | One input signed, override unsigned            |
++-------------------+----------------------+------------------------------------------------+
+| uint32, int32     | int64                | Widen to accommodate range of uint32           |
++-------------------+----------------------+------------------------------------------------+
+| uint16, uint32    | uint32               | All inputs unsigned, maintain unsigned         |
++-------------------+----------------------+------------------------------------------------+
+| int16, uint32     | int64                |                                                |
++-------------------+----------------------+------------------------------------------------+
+| uint64, int16     | int64                | int64 cannot accommodate all uint64 values     |
++-------------------+----------------------+------------------------------------------------+
+| float32, int32    | float32              | Promote RHS to float32                         |
++-------------------+----------------------+------------------------------------------------+
+| float32, float64  | float64              |                                                |
++-------------------+----------------------+------------------------------------------------+
+| float32, int64    | float32              | int64 is wider, still promotes to float32      |
++-------------------+----------------------+------------------------------------------------+
+
+In particulary, note that comparing a ``uint64`` column to an ``int16`` column
+may emit an error if one of the ``uint64`` values cannot be expressed as the
+common type ``int64`` (for example, ``2 ** 63``).
+
+.. _compute-function-list:
+
+Available functions
+===================
+
+Type categories
+---------------
+
+To avoid exhaustively listing supported types, the tables below use a number
+of general type categories:
+
+* "Numeric": Integer types (Int8, etc.) and Floating-point types (Float32,
+  Float64, sometimes Float16).  Some functions also accept Decimal128 and
+  Decimal256 input.
+
+* "Temporal": Date types (Date32, Date64), Time types (Time32, Time64),
+  Timestamp, Duration, Interval.
+
+* "Binary-like": Binary, LargeBinary, sometimes also FixedSizeBinary.
+
+* "String-like": String, LargeString.
+
+* "List-like": List, LargeList, sometimes also FixedSizeList.
+
+* "Nested": List-likes (including FixedSizeList), Struct, Union, and
+  related types like Map.
+
+If you are unsure whether a function supports a concrete input type, we
+recommend you try it out.  Unsupported input types return a ``TypeError``
+:class:`Status`.
+
+Aggregations
+------------
+
+Scalar aggregations operate on a (chunked) array or scalar value and reduce
+the input to a single output value.
+
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| Function name      | Arity | Input types      | Output type            | Options class                    | Notes |
++====================+=======+==================+========================+==================================+=======+
+| all                | Unary | Boolean          | Scalar Boolean         | :struct:`ScalarAggregateOptions` | \(1)  |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| any                | Unary | Boolean          | Scalar Boolean         | :struct:`ScalarAggregateOptions` | \(1)  |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| approximate_median | Unary | Numeric          | Scalar Float64         | :struct:`ScalarAggregateOptions` |       |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| count              | Unary | Any              | Scalar Int64           | :struct:`CountOptions`           | \(2)  |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| count_distinct     | Unary | Non-nested types | Scalar Int64           | :struct:`CountOptions`           | \(2)  |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| index              | Unary | Any              | Scalar Int64           | :struct:`IndexOptions`           |       |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| max                | Unary | Non-nested types | Scalar Input type      | :struct:`ScalarAggregateOptions` |       |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| mean               | Unary | Numeric          | Scalar Decimal/Float64 | :struct:`ScalarAggregateOptions` |       |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| min                | Unary | Non-nested types | Scalar Input type      | :struct:`ScalarAggregateOptions` |       |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| min_max            | Unary | Non-nested types | Scalar Struct          | :struct:`ScalarAggregateOptions` | \(3)  |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| mode               | Unary | Numeric          | Struct                 | :struct:`ModeOptions`            | \(4)  |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| product            | Unary | Numeric          | Scalar Numeric         | :struct:`ScalarAggregateOptions` | \(5)  |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| quantile           | Unary | Numeric          | Scalar Numeric         | :struct:`QuantileOptions`        | \(6)  |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| stddev             | Unary | Numeric          | Scalar Float64         | :struct:`VarianceOptions`        |       |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| sum                | Unary | Numeric          | Scalar Numeric         | :struct:`ScalarAggregateOptions` | \(5)  |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| tdigest            | Unary | Numeric          | Float64                | :struct:`TDigestOptions`         | \(7)  |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+| variance           | Unary | Numeric          | Scalar Float64         | :struct:`VarianceOptions`        |       |
++--------------------+-------+------------------+------------------------+----------------------------------+-------+
+
+* \(1) If null values are taken into account, by setting the
+  ScalarAggregateOptions parameter skip_nulls = false, then `Kleene logic`_
+  logic is applied. The min_count option is not respected.
+
+* \(2) CountMode controls whether only non-null values are counted (the
+  default), only null values are counted, or all values are counted.
+
+* \(3) Output is a ``{"min": input type, "max": input type}`` Struct.
+
+  Of the interval types, only the month interval is supported, as the day-time
+  and month-day-nano types are not sortable.
+
+* \(4) Output is an array of ``{"mode": input type, "count": Int64}`` Struct.
+  It contains the *N* most common elements in the input, in descending
+  order, where *N* is given in :member:`ModeOptions::n`.
+  If two values have the same count, the smallest one comes first.
+  Note that the output can have less than *N* elements if the input has
+  less than *N* distinct values.
+
+* \(5) Output is Int64, UInt64, Float64, or Decimal128/256, depending on the
+  input type.
+
+* \(6) Output is Float64 or input type, depending on QuantileOptions.
+
+* \(7) tdigest/t-digest computes approximate quantiles, and so only needs a
+  fixed amount of memory. See the `reference implementation
+  <https://github.com/tdunning/t-digest>`_ for details.
+
+Grouped Aggregations ("group by")
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Grouped aggregations are not directly invokable, but are used as part of a
+SQL-style "group by" operation. Like scalar aggregations, grouped aggregations
+reduce multiple input values to a single output value. Instead of aggregating
+all values of the input, however, grouped aggregations partition the input
+values on some set of "key" columns, then aggregate each group individually,
+emitting one output value per input group.
+
+As an example, for the following table:
+
++------------------+-----------------+
+| Column ``key``   | Column ``x``    |
++==================+=================+
+| "a"              | 2               |
++------------------+-----------------+
+| "a"              | 5               |
++------------------+-----------------+
+| "b"              | null            |
++------------------+-----------------+
+| "b"              | null            |
++------------------+-----------------+
+| null             | null            |
++------------------+-----------------+
+| null             | 9               |
++------------------+-----------------+
+
+we can compute a sum of the column ``x``, grouped on the column ``key``.
+This gives us three groups, with the following results. Note that null is
+treated as a distinct key value.
+
++------------------+-----------------------+
+| Column ``key``   | Column ``sum(x)``     |
++==================+=======================+
+| "a"              | 7                     |
++------------------+-----------------------+
+| "b"              | null                  |
++------------------+-----------------------+
+| null             | 9                     |
++------------------+-----------------------+
+
+The supported aggregation functions are as follows. All function names are
+prefixed with ``hash_``, which differentiates them from their scalar
+equivalents above and reflects how they are implemented internally.
+
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| Function name           | Arity | Input types                        | Output type            | Options class                    | Notes |
++=========================+=======+====================================+========================+==================================+=======+
+| hash_all                | Unary | Boolean                            | Boolean                | :struct:`ScalarAggregateOptions` | \(1)  |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_any                | Unary | Boolean                            | Boolean                | :struct:`ScalarAggregateOptions` | \(1)  |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_approximate_median | Unary | Numeric                            | Float64                | :struct:`ScalarAggregateOptions` |       |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_count              | Unary | Any                                | Int64                  | :struct:`CountOptions`           | \(2)  |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_count_distinct     | Unary | Any                                | Int64                  | :struct:`CountOptions`           | \(2)  |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_distinct           | Unary | Any                                | Input type             | :struct:`CountOptions`           | \(2)  |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_max                | Unary | Non-nested, non-binary/string-like | Input type             | :struct:`ScalarAggregateOptions` |       |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_mean               | Unary | Numeric                            | Decimal/Float64        | :struct:`ScalarAggregateOptions` |       |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_min                | Unary | Non-nested, non-binary/string-like | Input type             | :struct:`ScalarAggregateOptions` |       |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_min_max            | Unary | Non-nested, non-binary/string-like | Struct                 | :struct:`ScalarAggregateOptions` | \(3)  |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_product            | Unary | Numeric                            | Numeric                | :struct:`ScalarAggregateOptions` | \(4)  |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_stddev             | Unary | Numeric                            | Float64                | :struct:`VarianceOptions`        |       |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_sum                | Unary | Numeric                            | Numeric                | :struct:`ScalarAggregateOptions` | \(4)  |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_tdigest            | Unary | Numeric                            | FixedSizeList[Float64] | :struct:`TDigestOptions`         | \(5)  |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+| hash_variance           | Unary | Numeric                            | Float64                | :struct:`VarianceOptions`        |       |
++-------------------------+-------+------------------------------------+------------------------+----------------------------------+-------+
+
+* \(1) If null values are taken into account, by setting the
+  :member:`ScalarAggregateOptions::skip_nulls` to false, then `Kleene logic`_
+  logic is applied. The min_count option is not respected.
+
+* \(2) CountMode controls whether only non-null values are counted
+  (the default), only null values are counted, or all values are
+  counted. For hash_distinct, it instead controls whether null values
+  are emitted. This never affects the grouping keys, only group values
+  (i.e. you may get a group where the key is null).
+
+* \(3) Output is a ``{"min": input type, "max": input type}`` Struct array.
+
+  Of the interval types, only the month interval is supported, as the day-time
+  and month-day-nano types are not sortable.
+
+* \(4) Output is Int64, UInt64, Float64, or Decimal128/256, depending on the
+  input type.
+
+* \(5) T-digest computes approximate quantiles, and so only needs a
+  fixed amount of memory. See the `reference implementation
+  <https://github.com/tdunning/t-digest>`_ for details.
+
+Element-wise ("scalar") functions
+---------------------------------
+
+All element-wise functions accept both arrays and scalars as input.  The
+semantics for unary functions are as follow:
+
+* scalar inputs produce a scalar output
+* array inputs produce an array output
+
+Binary functions have the following semantics (which is sometimes called
+"broadcasting" in other systems such as NumPy):
+
+* ``(scalar, scalar)`` inputs produce a scalar output
+* ``(array, array)`` inputs produce an array output (and both inputs must
+  be of the same length)
+* ``(scalar, array)`` and ``(array, scalar)`` produce an array output.
+  The scalar input is handled as if it were an array of the same length N
+  as the other input, with the same value repeated N times.
+
+Arithmetic functions
+~~~~~~~~~~~~~~~~~~~~
+
+These functions expect inputs of numeric type and apply a given arithmetic
+operation to each element(s) gathered from the input(s).  If any of the
+input element(s) is null, the corresponding output element is null.
+For binary functions, input(s) will be cast to the
+:ref:`common numeric type <common-numeric-type>`
+(and dictionary decoded, if applicable) before the operation is applied.
+
+The default variant of these functions does not detect overflow (the result
+then typically wraps around).  Most functions are also available in an
+overflow-checking variant, suffixed ``_checked``, which returns
+an ``Invalid`` :class:`Status` when overflow is detected.
+
+For functions which support decimal inputs (currently ``add``, ``subtract``,
+``multiply``, and ``divide`` and their checked variants), decimals of different
+precisions/scales will be promoted appropriately. Mixed decimal and
+floating-point arguments will cast all arguments to floating-point, while mixed
+decimal and integer arguments will cast all arguments to decimals.
+
++------------------+--------+----------------+----------------------+-------+
+| Function name    | Arity  | Input types    | Output type          | Notes |
++==================+========+================+======================+=======+
+| abs              | Unary  | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| abs_checked      | Unary  | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| add              | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| add_checked      | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| divide           | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| divide_checked   | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| multiply         | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| multiply_checked | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| negate           | Unary  | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| negate_checked   | Unary  | Signed Numeric | Signed Numeric       |       |
++------------------+--------+----------------+----------------------+-------+
+| power            | Binary | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| power_checked    | Binary | Numeric        | Numeric              |       |
++------------------+--------+----------------+----------------------+-------+
+| sign             | Unary  | Numeric        | Int8/Float32/Float64 | \(2)  |
++------------------+--------+----------------+----------------------+-------+
+| subtract         | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+| subtract_checked | Binary | Numeric        | Numeric              | \(1)  |
++------------------+--------+----------------+----------------------+-------+
+
+* \(1) Precision and scale of computed DECIMAL results
+
+  +------------+---------------------------------------------+
+  | Operation  | Result precision and scale                  |
+  +============+=============================================+
+  | | add      | | scale = max(s1, s2)                       |
+  | | subtract | | precision = max(p1-s1, p2-s2) + 1 + scale |
+  +------------+---------------------------------------------+
+  | multiply   | | scale = s1 + s2                           |
+  |            | | precision = p1 + p2 + 1                   |
+  +------------+---------------------------------------------+
+  | divide     | | scale = max(4, s1 + p2 - s2 + 1)          |
+  |            | | precision = p1 - s1 + s2 + scale          |
+  +------------+---------------------------------------------+
+
+  It's compatible with Redshift's decimal promotion rules. All decimal digits
+  are preserved for `add`, `subtract` and `multiply` operations. The result
+  precision of `divide` is at least the sum of precisions of both operands with
+  enough scale kept. Error is returned if the result precision is beyond the
+  decimal value range.
+
+* \(2) Output is any of (-1,1) for nonzero inputs and 0 for zero input.
+  NaN values return NaN.  Integral values return signedness as Int8 and
+  floating-point values return it with the same type as the input values.
+
+Bit-wise functions
+~~~~~~~~~~~~~~~~~~
+
++--------------------------+------------+--------------------+---------------------+
+| Function name            | Arity      | Input types        | Output type         |
++==========================+============+====================+=====================+
+| bit_wise_and             | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| bit_wise_not             | Unary      | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| bit_wise_or              | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| bit_wise_xor             | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| shift_left               | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| shift_left_checked       | Binary     | Numeric            | Numeric (1)         |
++--------------------------+------------+--------------------+---------------------+
+| shift_right              | Binary     | Numeric            | Numeric             |
++--------------------------+------------+--------------------+---------------------+
+| shift_right_checked      | Binary     | Numeric            | Numeric (1)         |
++--------------------------+------------+--------------------+---------------------+
+
+* \(1) An error is emitted if the shift amount (i.e. the second input) is
+  out of bounds for the data type.  However, an overflow when shifting the
+  first input is not error (truncated bits are silently discarded).
+
+Rounding functions
+~~~~~~~~~~~~~~~~~~
+
+Rounding functions displace numeric inputs to an approximate value with a simpler
+representation based on the rounding criterion.
+
++-------------------+------------+-------------+-------------------------+----------------------------------+--------+
+| Function name     | Arity      | Input types | Output type             | Options class                    | Notes  |
++===================+============+=============+=========================+==================================+========+
+| ceil              | Unary      | Numeric     | Float32/Float64/Decimal |                                  |        |
++-------------------+------------+-------------+-------------------------+----------------------------------+--------+
+| floor             | Unary      | Numeric     | Float32/Float64/Decimal |                                  |        |
++-------------------+------------+-------------+-------------------------+----------------------------------+--------+
+| round             | Unary      | Numeric     | Float32/Float64/Decimal | :struct:`RoundOptions`           | (1)(2) |
++-------------------+------------+-------------+-------------------------+----------------------------------+--------+
+| round_to_multiple | Unary      | Numeric     | Float32/Float64/Decimal | :struct:`RoundToMultipleOptions` | (1)(3) |
++-------------------+------------+-------------+-------------------------+----------------------------------+--------+
+| trunc             | Unary      | Numeric     | Float32/Float64/Decimal |                                  |        |
++-------------------+------------+-------------+-------------------------+----------------------------------+--------+
+
+* \(1) Output value is a 64-bit floating-point for integral inputs and the
+  retains the same type for floating-point and decimal inputs.  By default
+  rounding functions displace a value to the nearest integer using
+  HALF_TO_EVEN to resolve ties.  Options are available to control the rounding
+  criterion.  Both ``round`` and ``round_to_multiple`` have the ``round_mode``
+  option to set the rounding mode.
+* \(2) Round to a number of digits where the ``ndigits`` option of
+  :struct:`RoundOptions` specifies the rounding precision in terms of number
+  of digits.  A negative value corresponds to digits in the non-fractional
+  part.  For example, -2 corresponds to rounding to the nearest multiple of
+  100 (zeroing the ones and tens digits).  Default value of ``ndigits`` is 0
+  which rounds to the nearest integer.
+* \(3) Round to a multiple where the ``multiple`` option of
+  :struct:`RoundToMultipleOptions` specifies the rounding scale.  The rounding
+  multiple has to be a positive value.  For example, 100 corresponds to
+  rounding to the nearest multiple of 100 (zeroing the ones and tens digits).
+  Default value of ``multiple`` is 1 which rounds to the nearest integer.
+
+For ``round`` and ``round_to_multiple``, the following rounding modes are available.
+Tie-breaking modes are prefixed with HALF and round non-ties to the nearest integer.
+The example values are given for default values of ``ndigits`` and ``multiple``.
+
++-----------------------+--------------------------------------------------------------+---------------------------+
+| ``round_mode``        | Operation performed                                          | Example values            |
++=======================+==============================================================+===========================+
+| DOWN                  | Round to nearest integer less than or equal in magnitude;    | 3.2 -> 3, 3.7 -> 3,       |
+|                       | also known as ``floor(x)``                                   | -3.2 -> -4, -3.7 -> -4    |
++-----------------------+--------------------------------------------------------------+---------------------------+
+| UP                    | Round to nearest integer greater than or equal in magnitude; | 3.2 -> 4, 3.7 -> 4,       |
+|                       | also known as ``ceil(x)``                                    | -3.2 -> -3, -3.7 -> -3    |
++-----------------------+--------------------------------------------------------------+---------------------------+
+| TOWARDS_ZERO          | Get the integral part without fractional digits;             | 3.2 -> 3, 3.7 -> 3,       |
+|                       | also known as ``trunc(x)``                                   | -3.2 -> -3, -3.7 -> -3    |
++-----------------------+--------------------------------------------------------------+---------------------------+
+| TOWARDS_INFINITY      | Round negative values with ``DOWN`` rule,                    | 3.2 -> 4, 3.7 -> 4,       |
+|                       | round positive values with ``UP`` rule                       | -3.2 -> -4, -3.7 -> -4    |
++-----------------------+--------------------------------------------------------------+---------------------------+
+| HALF_DOWN             | Round ties with ``DOWN`` rule                                | 3.5 -> 3, 4.5 -> 4,       |
+|                       |                                                              | -3.5 -> -4, -4.5 -> -5    |
++-----------------------+--------------------------------------------------------------+---------------------------+
+| HALF_UP               | Round ties with ``UP`` rule                                  | 3.5 -> 4, 4.5 -> 5,       |
+|                       |                                                              | -3.5 -> -3, -4.5 -> -4    |
++-----------------------+--------------------------------------------------------------+---------------------------+
+| HALF_TOWARDS_ZERO     | Round ties with ``TOWARDS_ZERO`` rule                        | 3.5 -> 3, 4.5 -> 4,       |
+|                       |                                                              | -3.5 -> -3, -4.5 -> -4    |
++-----------------------+--------------------------------------------------------------+---------------------------+
+| HALF_TOWARDS_INFINITY | Round ties with ``TOWARDS_INFINITY`` rule                    | 3.5 -> 4, 4.5 -> 5,       |
+|                       |                                                              | -3.5 -> -4, -4.5 -> -5    |
++-----------------------+--------------------------------------------------------------+---------------------------+
+| HALF_TO_EVEN          | Round ties to nearest even integer                           | 3.5 -> 4, 4.5 -> 4,       |
+|                       |                                                              | -3.5 -> -4, -4.5 -> -4    |
++-----------------------+--------------------------------------------------------------+---------------------------+
+| HALF_TO_ODD           | Round ties to nearest odd integer                            | 3.5 -> 3, 4.5 -> 5,       |
+|                       |                                                              | -3.5 -> -3, -4.5 -> -5    |
++-----------------------+--------------------------------------------------------------+---------------------------+
+
+The following table gives examples of how ``ndigits`` (for the ``round``
+function) and ``multiple`` (for ``round_to_multiple``) influence the operance
+performed, respectively.
+
++--------------------+-------------------+---------------------------+
+| Round ``multiple`` | Round ``ndigits`` | Operation performed       |
++====================+===================+===========================+
+| 1                  | 0                 | Round to integer          |
++--------------------+-------------------+---------------------------+
+| 0.001              | 3                 | Round to 3 decimal places |
++--------------------+-------------------+---------------------------+
+| 10                 | -1                | Round to multiple of 10   |
++--------------------+-------------------+---------------------------+
+| 2                  | NA                | Round to multiple of 2    |
++--------------------+-------------------+---------------------------+
+
+Logarithmic functions
+~~~~~~~~~~~~~~~~~~~~~
+
+Logarithmic functions are also supported, and also offer ``_checked``
+variants that check for domain errors if needed.
+
++--------------------------+------------+--------------------+---------------------+
+| Function name            | Arity      | Input types        | Output type         |
++==========================+============+====================+=====================+
+| ln                       | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| ln_checked               | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| log10                    | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| log10_checked            | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| log1p                    | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| log1p_checked            | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| log2                     | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| log2_checked             | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| logb                     | Binary     | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| logb_checked             | Binary     | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+
+Trigonometric functions
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Trigonometric functions are also supported, and also offer ``_checked``
+variants that check for domain errors if needed.
+
++--------------------------+------------+--------------------+---------------------+
+| Function name            | Arity      | Input types        | Output type         |
++==========================+============+====================+=====================+
+| acos                     | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| acos_checked             | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| asin                     | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| asin_checked             | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| atan                     | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| atan2                    | Binary     | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| cos                      | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| cos_checked              | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| sin                      | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| sin_checked              | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| tan                      | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+| tan_checked              | Unary      | Float32/Float64    | Float32/Float64     |
++--------------------------+------------+--------------------+---------------------+
+
+Comparisons
+~~~~~~~~~~~
+
+These functions expect two inputs of numeric type (in which case they will be
+cast to the :ref:`common numeric type <common-numeric-type>` before comparison),
+or two inputs of Binary- or String-like types, or two inputs of Temporal types.
+If any input is dictionary encoded it will be expanded for the purposes of
+comparison. If any of the input elements in a pair is null, the corresponding
+output element is null. Decimal arguments will be promoted in the same way as
+for ``add`` and ``subtract``.
+
++----------------+------------+---------------------------------------------+---------------------+
+| Function names | Arity      | Input types                                 | Output type         |
++================+============+=============================================+=====================+
+| equal          | Binary     | Numeric, Temporal, Binary- and String-like  | Boolean             |
++----------------+------------+---------------------------------------------+---------------------+
+| greater        | Binary     | Numeric, Temporal, Binary- and String-like  | Boolean             |
++----------------+------------+---------------------------------------------+---------------------+
+| greater_equal  | Binary     | Numeric, Temporal, Binary- and String-like  | Boolean             |
++----------------+------------+---------------------------------------------+---------------------+
+| less           | Binary     | Numeric, Temporal, Binary- and String-like  | Boolean             |
++----------------+------------+---------------------------------------------+---------------------+
+| less_equal     | Binary     | Numeric, Temporal, Binary- and String-like  | Boolean             |
++----------------+------------+---------------------------------------------+---------------------+
+| not_equal      | Binary     | Numeric, Temporal, Binary- and String-like  | Boolean             |
++----------------+------------+---------------------------------------------+---------------------+
+
+These functions take any number of inputs of numeric type (in which case they
+will be cast to the :ref:`common numeric type <common-numeric-type>` before
+comparison) or of temporal types. If any input is dictionary encoded it will be
+expanded for the purposes of comparison.
+
++------------------+------------+---------------------------------------------+---------------------+---------------------------------------+-------+
+| Function names   | Arity      | Input types                                 | Output type         | Options class                         | Notes |
++==================+============+=============================================+=====================+=======================================+=======+
+| max_element_wise | Varargs    | Numeric and Temporal                        | Numeric or Temporal | :struct:`ElementWiseAggregateOptions` | \(1)  |
++------------------+------------+---------------------------------------------+---------------------+---------------------------------------+-------+
+| min_element_wise | Varargs    | Numeric and Temporal                        | Numeric or Temporal | :struct:`ElementWiseAggregateOptions` | \(1)  |
++------------------+------------+---------------------------------------------+---------------------+---------------------------------------+-------+
+
+* \(1) By default, nulls are skipped (but the kernel can be configured to propagate nulls).
+  For floating point values, NaN will be taken over null but not over any other value.
+
+Logical functions
+~~~~~~~~~~~~~~~~~~
+
+The normal behaviour for these functions is to emit a null if any of the
+inputs is null (similar to the semantics of ``NaN`` in floating-point
+computations).
+
+Some of them are also available in a `Kleene logic`_ variant (suffixed
+``_kleene``) where null is taken to mean "undefined".  This is the
+interpretation of null used in SQL systems as well as R and Julia,
+for example.
+
+For the Kleene logic variants, therefore:
+
+* "true AND null", "null AND true" give "null" (the result is undefined)
+* "true OR null", "null OR true" give "true"
+* "false AND null", "null AND false" give "false"
+* "false OR null", "null OR false" give "null" (the result is undefined)
+
++--------------------------+------------+--------------------+---------------------+
+| Function name            | Arity      | Input types        | Output type         |
++==========================+============+====================+=====================+
+| and                      | Binary     | Boolean            | Boolean             |
++--------------------------+------------+--------------------+---------------------+
+| and_kleene               | Binary     | Boolean            | Boolean             |
++--------------------------+------------+--------------------+---------------------+
+| and_not                  | Binary     | Boolean            | Boolean             |
++--------------------------+------------+--------------------+---------------------+
+| and_not_kleene           | Binary     | Boolean            | Boolean             |
++--------------------------+------------+--------------------+---------------------+
+| invert                   | Unary      | Boolean            | Boolean             |
++--------------------------+------------+--------------------+---------------------+
+| or                       | Binary     | Boolean            | Boolean             |
++--------------------------+------------+--------------------+---------------------+
+| or_kleene                | Binary     | Boolean            | Boolean             |
++--------------------------+------------+--------------------+---------------------+
+| xor                      | Binary     | Boolean            | Boolean             |
++--------------------------+------------+--------------------+---------------------+
+
+.. _Kleene logic: https://en.wikipedia.org/wiki/Three-valued_logic#Kleene_and_Priest_logics
+
+String predicates
+~~~~~~~~~~~~~~~~~
+
+These functions classify the input string elements according to their character
+contents.  An empty string element emits false in the output.  For ASCII
+variants of the functions (prefixed ``ascii_``), a string element with non-ASCII
+characters emits false in the output.
+
+The first set of functions operates on a character-per-character basis,
+and emit true in the output if the input contains only characters of a
+given class:
+
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| Function name      | Arity | Input types | Output type | Matched character class | Notes |
++====================+=======+=============+=============+=========================+=======+
+| ascii_is_alnum     | Unary | String-like | Boolean     | Alphanumeric ASCII      |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_alpha     | Unary | String-like | Boolean     | Alphabetic ASCII        |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_decimal   | Unary | String-like | Boolean     | Decimal ASCII           | \(1)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_lower     | Unary | String-like | Boolean     | Lowercase ASCII         | \(2)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_printable | Unary | String-like | Boolean     | Printable ASCII         |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_space     | Unary | String-like | Boolean     | Whitespace ASCII        |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| ascii_is_upper     | Unary | String-like | Boolean     | Uppercase ASCII         | \(2)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_alnum      | Unary | String-like | Boolean     | Alphanumeric Unicode    |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_alpha      | Unary | String-like | Boolean     | Alphabetic Unicode      |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_decimal    | Unary | String-like | Boolean     | Decimal Unicode         |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_digit      | Unary | String-like | Boolean     | Unicode digit           | \(3)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_lower      | Unary | String-like | Boolean     | Lowercase Unicode       | \(2)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_numeric    | Unary | String-like | Boolean     | Numeric Unicode         | \(4)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_printable  | Unary | String-like | Boolean     | Printable Unicode       |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_space      | Unary | String-like | Boolean     | Whitespace Unicode      |       |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+| utf8_is_upper      | Unary | String-like | Boolean     | Uppercase Unicode       | \(2)  |
++--------------------+-------+-------------+-------------+-------------------------+-------+
+
+* \(1) Also matches all numeric ASCII characters and all ASCII digits.
+
+* \(2) Non-cased characters, such as punctuation, do not match.
+
+* \(3) This is currently the same as ``utf8_is_decimal``.
+
+* \(4) Unlike ``utf8_is_decimal``, non-decimal numeric characters also match.
+
+The second set of functions also consider the character order in a string
+element:
+
++--------------------------+------------+--------------------+---------------------+---------+
+| Function name            | Arity      | Input types        | Output type         | Notes   |
++==========================+============+====================+=====================+=========+
+| ascii_is_title           | Unary      | String-like        | Boolean             | \(1)    |
++--------------------------+------------+--------------------+---------------------+---------+
+| utf8_is_title            | Unary      | String-like        | Boolean             | \(1)    |
++--------------------------+------------+--------------------+---------------------+---------+
+
+* \(1) Output is true iff the input string element is title-cased, i.e. any
+  word starts with an uppercase character, followed by lowercase characters.
+  Word boundaries are defined by non-cased characters.
+
+The third set of functions examines string elements on a byte-per-byte basis:
+
++--------------------------+------------+--------------------+---------------------+---------+
+| Function name            | Arity      | Input types        | Output type         | Notes   |
++==========================+============+====================+=====================+=========+
+| string_is_ascii          | Unary      | String-like        | Boolean             | \(1)    |
++--------------------------+------------+--------------------+---------------------+---------+
+
+* \(1) Output is true iff the input string element contains only ASCII characters,
+  i.e. only bytes in [0, 127].
+
+String transforms
+~~~~~~~~~~~~~~~~~
+
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| Function name           | Arity | Input types            | Output type            | Options class                     | Notes |
++=========================+=======+========================+========================+===================================+=======+
+| ascii_capitalize        | Unary | String-like            | String-like            |                                   | \(1)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| ascii_lower             | Unary | String-like            | String-like            |                                   | \(1)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| ascii_reverse           | Unary | String-like            | String-like            |                                   | \(2)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| ascii_swapcase          | Unary | String-like            | String-like            |                                   | \(1)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| ascii_title             | Unary | String-like            | String-like            |                                   | \(1)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| ascii_upper             | Unary | String-like            | String-like            |                                   | \(1)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| binary_length           | Unary | Binary- or String-like | Int32 or Int64         |                                   | \(3)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| binary_replace_slice    | Unary | String-like            | Binary- or String-like | :struct:`ReplaceSliceOptions`     | \(4)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| replace_substring       | Unary | String-like            | String-like            | :struct:`ReplaceSubstringOptions` | \(5)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| replace_substring_regex | Unary | String-like            | String-like            | :struct:`ReplaceSubstringOptions` | \(6)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_capitalize         | Unary | String-like            | String-like            |                                   | \(8)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_length             | Unary | String-like            | Int32 or Int64         |                                   | \(7)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_lower              | Unary | String-like            | String-like            |                                   | \(8)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_replace_slice      | Unary | String-like            | String-like            | :struct:`ReplaceSliceOptions`     | \(4)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_reverse            | Unary | String-like            | String-like            |                                   | \(9)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_swapcase           | Unary | String-like            | String-like            |                                   | \(8)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_title              | Unary | String-like            | String-like            |                                   | \(8)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+| utf8_upper              | Unary | String-like            | String-like            |                                   | \(8)  |
++-------------------------+-------+------------------------+------------------------+-----------------------------------+-------+
+
+* \(1) Each ASCII character in the input is converted to lowercase or
+  uppercase.  Non-ASCII characters are left untouched.
+
+* \(2) ASCII input is reversed to the output. If non-ASCII characters
+  are present, ``Invalid`` :class:`Status` will be returned.
+
+* \(3) Output is the physical length in bytes of each input element.  Output
+  type is Int32 for Binary / String, Int64 for LargeBinary / LargeString.
+
+* \(4) Replace the slice of the substring from :member:`ReplaceSliceOptions::start`
+  (inclusive) to :member:`ReplaceSliceOptions::stop` (exclusive) by
+  :member:`ReplaceSubstringOptions::replacement`. The binary kernel measures the
+  slice in bytes, while the UTF8 kernel measures the slice in codeunits.
+
+* \(5) Replace non-overlapping substrings that match to
+  :member:`ReplaceSubstringOptions::pattern` by
+  :member:`ReplaceSubstringOptions::replacement`. If
+  :member:`ReplaceSubstringOptions::max_replacements` != -1, it determines the
+  maximum number of replacements made, counting from the left.
+
+* \(6) Replace non-overlapping substrings that match to the regular expression
+  :member:`ReplaceSubstringOptions::pattern` by
+  :member:`ReplaceSubstringOptions::replacement`, using the Google RE2 library. If
+  :member:`ReplaceSubstringOptions::max_replacements` != -1, it determines the
+  maximum number of replacements made, counting from the left. Note that if the
+  pattern contains groups, backreferencing can be used.
+
+* \(7) Output is the number of characters (not bytes) of each input element.
+  Output type is Int32 for String, Int64 for LargeString.
+
+* \(8) Each UTF8-encoded character in the input is converted to lowercase or
+  uppercase.
+
+* \(9) Each UTF8-encoded code unit is written in reverse order to the output.
+  If the input is not valid UTF8, then the output is undefined (but the size of output
+  buffers will be preserved).
+
+String padding
+~~~~~~~~~~~~~~
+
+These functions append/prepend a given padding byte (ASCII) or codepoint (UTF8) in
+order to center (center), right-align (lpad), or left-align (rpad) a string.
+
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| Function name            | Arity      | Input types             | Output type         | Options class                          |
++==========================+============+=========================+=====================+========================================+
+| ascii_center             | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| ascii_lpad               | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| ascii_rpad               | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| utf8_center              | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| utf8_lpad                | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+| utf8_rpad                | Unary      | String-like             | String-like         | :struct:`PadOptions`                   |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+
+
+String trimming
+~~~~~~~~~~~~~~~
+
+These functions trim off characters on both sides (trim), or the left (ltrim) or right side (rtrim).
+
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+| Function name            | Arity      | Input types             | Output type         | Options class                          | Notes   |
++==========================+============+=========================+=====================+========================================+=========+
+| ascii_ltrim              | Unary      | String-like             | String-like         | :struct:`TrimOptions`                  | \(1)    |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+| ascii_ltrim_whitespace   | Unary      | String-like             | String-like         |                                        | \(2)    |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+| ascii_rtrim              | Unary      | String-like             | String-like         | :struct:`TrimOptions`                  | \(1)    |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+| ascii_rtrim_whitespace   | Unary      | String-like             | String-like         |                                        | \(2)    |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+| ascii_trim               | Unary      | String-like             | String-like         | :struct:`TrimOptions`                  | \(1)    |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+| ascii_trim_whitespace    | Unary      | String-like             | String-like         |                                        | \(2)    |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+| utf8_ltrim               | Unary      | String-like             | String-like         | :struct:`TrimOptions`                  | \(3)    |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+| utf8_ltrim_whitespace    | Unary      | String-like             | String-like         |                                        | \(4)    |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+| utf8_rtrim               | Unary      | String-like             | String-like         | :struct:`TrimOptions`                  | \(3)    |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+| utf8_rtrim_whitespace    | Unary      | String-like             | String-like         |                                        | \(4)    |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+| utf8_trim                | Unary      | String-like             | String-like         | :struct:`TrimOptions`                  | \(3)    |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+| utf8_trim_whitespace     | Unary      | String-like             | String-like         |                                        | \(4)    |
++--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+
+
+* \(1) Only characters specified in :member:`TrimOptions::characters` will be
+  trimmed off. Both the input string and the `characters` argument are
+  interpreted as ASCII characters.
+
+* \(2) Only trim off ASCII whitespace characters (``'\t'``, ``'\n'``, ``'\v'``,
+  ``'\f'``, ``'\r'``  and ``' '``).
+
+* \(3) Only characters specified in :member:`TrimOptions::characters` will be
+  trimmed off.
+
+* \(4) Only trim off Unicode whitespace characters.
+
+String splitting
+~~~~~~~~~~~~~~~~
+
+These functions split strings into lists of strings.  All kernels can optionally
+be configured with a ``max_splits`` and a ``reverse`` parameter, where
+``max_splits == -1`` means no limit (the default).  When ``reverse`` is true,
+the splitting is done starting from the end of the string; this is only relevant
+when a positive ``max_splits`` is given.
+
++--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
+| Function name            | Arity      | Input types             | Output type       | Options class                    | Notes   |
++==========================+============+=========================+===================+==================================+=========+
+| ascii_split_whitespace   | Unary      | String-like             | List-like         | :struct:`SplitOptions`           | \(1)    |
++--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
+| split_pattern            | Unary      | String-like             | List-like         | :struct:`SplitPatternOptions`    | \(2)    |
++--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
+| split_pattern_regex      | Unary      | String-like             | List-like         | :struct:`SplitPatternOptions`    | \(3)    |
++--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
+| utf8_split_whitespace    | Unary      | String-like             | List-like         | :struct:`SplitOptions`           | \(4)    |
++--------------------------+------------+-------------------------+-------------------+----------------------------------+---------+
+
+* \(1) A non-zero length sequence of ASCII defined whitespace bytes
+  (``'\t'``, ``'\n'``, ``'\v'``, ``'\f'``, ``'\r'``  and ``' '``) is seen
+  as separator.
+
+* \(2) The string is split when an exact pattern is found (the pattern itself
+  is not included in the output).
+
+* \(3) The string is split when a regex match is found (the matched
+  substring itself is not included in the output).
+
+* \(4) A non-zero length sequence of Unicode defined whitespace codepoints
+  is seen as separator.
+
+String component extraction
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
++---------------+-------+-------------+-------------+-------------------------------+-------+
+| Function name | Arity | Input types | Output type | Options class                 | Notes |
++===============+=======+=============+=============+===============================+=======+
+| extract_regex | Unary | String-like | Struct      | :struct:`ExtractRegexOptions` | \(1)  |
++---------------+-------+-------------+-------------+-------------------------------+-------+
+
+* \(1) Extract substrings defined by a regular expression using the Google RE2
+  library.  The output struct field names refer to the named capture groups,
+  e.g. 'letter' and 'digit' for the regular expression
+  ``(?P<letter>[ab])(?P<digit>\\d)``.
+
+String joining
+~~~~~~~~~~~~~~
+
+These functions do the inverse of string splitting.
+
++--------------------------+-----------+-----------------------+----------------+-------------------+-----------------------+---------+
+| Function name            | Arity     | Input type 1          | Input type 2   | Output type       | Options class         | Notes   |
++==========================+===========+=======================+================+===================+=======================+=========+
+| binary_join              | Binary    | List of string-like   | String-like    | String-like       |                       | \(1)    |
++--------------------------+-----------+-----------------------+----------------+-------------------+-----------------------+---------+
+| binary_join_element_wise | Varargs   | String-like (varargs) | String-like    | String-like       | :struct:`JoinOptions` | \(2)    |
++--------------------------+-----------+-----------------------+----------------+-------------------+-----------------------+---------+
+
+* \(1) The first input must be an array, while the second can be a scalar or array.
+  Each list of values in the first input is joined using each second input
+  as separator.  If any input list is null or contains a null, the corresponding
+  output will be null.
+
+* \(2) All arguments are concatenated element-wise, with the last argument treated
+  as the separator (scalars are recycled in either case). Null separators emit
+  null. If any other argument is null, by default the corresponding output will be
+  null, but it can instead either be skipped or replaced with a given string.
+
+String Slicing
+~~~~~~~~~~~~~~
+
+This function transforms each sequence of the array to a subsequence, according
+to start and stop indices, and a non-zero step (defaulting to 1).  Slicing
+semantics follow Python slicing semantics: the start index is inclusive,
+the stop index exclusive; if the step is negative, the sequence is followed
+in reverse order.
+
++--------------------------+------------+----------------+-----------------+--------------------------+---------+
+| Function name            | Arity      | Input types    | Output type     | Options class            | Notes   |
++==========================+============+================+=================+==========================+=========+
+| utf8_slice_codeunits     | Unary      | String-like    | String-like     | :struct:`SliceOptions`   | \(1)    |
++--------------------------+------------+----------------+-----------------+--------------------------+---------+
+
+* \(1) Slice string into a substring defined by (``start``, ``stop``, ``step``)
+  as given by :struct:`SliceOptions` where ``start`` and ``stop`` are measured
+  in codeunits. Null inputs emit null.
+
+Containment tests
+~~~~~~~~~~~~~~~~~
+
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| Function name         | Arity | Input types                       | Output type    | Options class                   | Notes |
++=======================+=======+===================================+================+=================================+=======+
+| count_substring       | Unary | String-like                       | Int32 or Int64 | :struct:`MatchSubstringOptions` | \(1)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| count_substring_regex | Unary | String-like                       | Int32 or Int64 | :struct:`MatchSubstringOptions` | \(1)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| ends_with             | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(2)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| find_substring        | Unary | Binary- and String-like           | Int32 or Int64 | :struct:`MatchSubstringOptions` | \(3)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| find_substring_regex  | Unary | Binary- and String-like           | Int32 or Int64 | :struct:`MatchSubstringOptions` | \(3)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| index_in              | Unary | Boolean, Null, Numeric, Temporal, | Int32          | :struct:`SetLookupOptions`      | \(4)  |
+|                       |       | Binary- and String-like           |                |                                 |       |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| is_in                 | Unary | Boolean, Null, Numeric, Temporal, | Boolean        | :struct:`SetLookupOptions`      | \(5)  |
+|                       |       | Binary- and String-like           |                |                                 |       |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| match_like            | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(6)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| match_substring       | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(7)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| match_substring_regex | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(8)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+| starts_with           | Unary | String-like                       | Boolean        | :struct:`MatchSubstringOptions` | \(2)  |
++-----------------------+-------+-----------------------------------+----------------+---------------------------------+-------+
+
+* \(1) Output is the number of occurrences of
+  :member:`MatchSubstringOptions::pattern` in the corresponding input
+  string. Output type is Int32 for Binary/String, Int64
+  for LargeBinary/LargeString.
+
+* \(2) Output is true iff :member:`MatchSubstringOptions::pattern`
+  is a suffix/prefix of the corresponding input.
+
+* \(3) Output is the index of the first occurrence of
+  :member:`MatchSubstringOptions::pattern` in the corresponding input
+  string, otherwise -1. Output type is Int32 for Binary/String, Int64
+  for LargeBinary/LargeString.
+
+* \(4) Output is the index of the corresponding input element in
+  :member:`SetLookupOptions::value_set`, if found there.  Otherwise,
+  output is null.
+
+* \(5) Output is true iff the corresponding input element is equal to one
+  of the elements in :member:`SetLookupOptions::value_set`.
+
+* \(6) Output is true iff the SQL-style LIKE pattern
+  :member:`MatchSubstringOptions::pattern` fully matches the
+  corresponding input element. That is, ``%`` will match any number of
+  characters, ``_`` will match exactly one character, and any other
+  character matches itself. To match a literal percent sign or
+  underscore, precede the character with a backslash.
+
+* \(7) Output is true iff :member:`MatchSubstringOptions::pattern`
+  is a substring of the corresponding input element.
+
+* \(8) Output is true iff :member:`MatchSubstringOptions::pattern`
+  matches the corresponding input element at any position.
+
+Categorizations
+~~~~~~~~~~~~~~~
+
++-------------------+------------+---------------------+---------------------+------------------------+---------+
+| Function name     | Arity      | Input types         | Output type         | Options class          | Notes   |
++===================+============+=====================+=====================+========================+=========+
+| is_finite         | Unary      | Float, Double       | Boolean             |                        | \(1)    |
++-------------------+------------+---------------------+---------------------+------------------------+---------+
+| is_inf            | Unary      | Float, Double       | Boolean             |                        | \(2)    |
++-------------------+------------+---------------------+---------------------+------------------------+---------+
+| is_nan            | Unary      | Float, Double       | Boolean             |                        | \(3)    |
++-------------------+------------+---------------------+---------------------+------------------------+---------+
+| is_null           | Unary      | Any                 | Boolean             | :struct:`NullOptions`  | \(4)    |
++-------------------+------------+---------------------+---------------------+------------------------+---------+
+| is_valid          | Unary      | Any                 | Boolean             |                        | \(5)    |
++-------------------+------------+---------------------+---------------------+------------------------+---------+
+
+* \(1) Output is true iff the corresponding input element is finite (neither Infinity,
+  -Infinity, nor NaN).
+
+* \(2) Output is true iff the corresponding input element is Infinity/-Infinity.
+
+* \(3) Output is true iff the corresponding input element is NaN.
+
+* \(4) Output is true iff the corresponding input element is null. NaN values
+  can also be considered null by setting :member:`NullOptions::nan_is_null`.
+
+* \(5) Output is true iff the corresponding input element is non-null.
+
+.. _cpp-compute-scalar-selections:
+
+Selecting / multiplexing
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+For each "row" of input values, these functions emit one of the input values,
+depending on a condition.
+
++------------------+------------+---------------------------------------------------+---------------------+---------+
+| Function name    | Arity      | Input types                                       | Output type         | Notes   |
++==================+============+===================================================+=====================+=========+
+| case_when        | Varargs    | Struct of Boolean (Arg 0), Any (rest)             | Input type          | \(1)    |
++------------------+------------+---------------------------------------------------+---------------------+---------+
+| choose           | Varargs    | Integral (Arg 0), Fixed-width/Binary-like (rest)  | Input type          | \(2)    |
++------------------+------------+---------------------------------------------------+---------------------+---------+
+| coalesce         | Varargs    | Any                                               | Input type          | \(3)    |
++------------------+------------+---------------------------------------------------+---------------------+---------+
+| if_else          | Ternary    | Boolean (Arg 0), Any (rest)                       | Input type          | \(4)    |
++------------------+------------+---------------------------------------------------+---------------------+---------+
+
+* \(1) This function acts like a SQL "case when" statement or switch-case. The
+  input is a "condition" value, which is a struct of Booleans, followed by the
+  values for each "branch". There must be either exactly one value argument for
+  each child of the condition struct, or one more value argument than children
+  (in which case we have an "else" or "default" value). The output is of the
+  same type as the value inputs; each row will be the corresponding value from
+  the first value datum for which the corresponding Boolean is true, or the
+  corresponding value from the "default" input, or null otherwise.
+
+  Note that currently, while all types are supported, dictionaries will be
+  unpacked.
+
+* \(2) The first input must be an integral type. The rest of the arguments can be
+  any type, but must all be the same type or promotable to a common type. Each
+  value of the first input (the 'index') is used as a zero-based index into the
+  remaining arguments (i.e. index 0 is the second argument, index 1 is the third
+  argument, etc.), and the value of the output for that row will be the
+  corresponding value of the selected input at that row. If the index is null,
+  then the output will also be null.
+
+* \(3) Each row of the output will be the corresponding value of the first
+  input which is non-null for that row, otherwise null.
+
+* \(4) First input must be a Boolean scalar or array. Second and third inputs
+  could be scalars or arrays and must be of the same type. Output is an array
+  (or scalar if all inputs are scalar) of the same type as the second/ third
+  input. If the nulls present on the first input, they will be promoted to the
+  output, otherwise nulls will be chosen based on the first input values.
+
+  Also see: :ref:`replace_with_mask <cpp-compute-vector-structural-transforms>`.
+
+Structural transforms
+~~~~~~~~~~~~~~~~~~~~~
+
++---------------------+------------+-------------+------------------+------------------------------+--------+
+| Function name       | Arity      | Input types | Output type      | Options class                | Notes  |
++=====================+============+=============+==================+==============================+========+
+| list_value_length   | Unary      | List-like   | Int32 or Int64   |                              | \(1)   |
++---------------------+------------+-------------+------------------+------------------------------+--------+
+| make_struct         | Varargs    | Any         | Struct           | :struct:`MakeStructOptions`  | \(2)   |
++---------------------+------------+-------------+------------------+------------------------------+--------+
+
+* \(1) Each output element is the length of the corresponding input element
+  (null if input is null).  Output type is Int32 for List and FixedSizeList,
+  Int64 for LargeList.
+
+* \(2) The output struct's field types are the types of its arguments. The
+  field names are specified using an instance of :struct:`MakeStructOptions`.
+  The output shape will be scalar if all inputs are scalar, otherwise any
+  scalars will be broadcast to arrays.
+
+Conversions
+~~~~~~~~~~~
+
+A general conversion function named ``cast`` is provided which accepts a large
+number of input and output types.  The type to cast to can be passed in a
+:struct:`CastOptions` instance.  As an alternative, the same service is
+provided by a concrete function :func:`~arrow::compute::Cast`.
+
++-----------------+------------+--------------------+------------------+------------------------------+-------+
+| Function name   | Arity      | Input types        | Output type      | Options class                | Notes |
++=================+============+====================+==================+==============================+=======+
+| cast            | Unary      | Many               | Variable         | :struct:`CastOptions`        |       |
++-----------------+------------+--------------------+------------------+------------------------------+-------+
+| strftime        | Unary      | Temporal           | String           | :struct:`StrftimeOptions`    | \(1)  |
++-----------------+------------+--------------------+------------------+------------------------------+-------+
+| strptime        | Unary      | String-like        | Timestamp        | :struct:`StrptimeOptions`    |       |
++-----------------+------------+--------------------+------------------+------------------------------+-------+
+
+The conversions available with ``cast`` are listed below.  In all cases, a
+null input value is converted into a null output value.
+
+* \(1) Output precision of ``%S`` (seconds) flag depends on the input timestamp
+  precision. Timestamps with second precision are represented as integers while
+  milliseconds, microsecond and nanoseconds are represented as fixed floating
+  point numbers with 3, 6 and 9 decimal places respectively. To obtain integer
+  seconds, cast to timestamp with second resolution.
+  The character for the decimal point is localized according to the locale.
+  See `detailed formatting documentation`_ for descriptions of other flags.
+
+.. _detailed formatting documentation: https://howardhinnant.github.io/date/date.html#to_stream_formatting
+
+**Truth value extraction**
+
++-----------------------------+------------------------------------+--------------+
+| Input type                  | Output type                        | Notes        |
++=============================+====================================+==============+
+| Binary- and String-like     | Boolean                            | \(1)         |
++-----------------------------+------------------------------------+--------------+
+| Numeric                     | Boolean                            | \(2)         |
++-----------------------------+------------------------------------+--------------+
+
+* \(1) Output is true iff the corresponding input value has non-zero length.
+
+* \(2) Output is true iff the corresponding input value is non-zero.
+
+**Same-kind conversion**
+
++-----------------------------+------------------------------------+--------------+
+| Input type                  | Output type                        | Notes        |
++=============================+====================================+==============+
+| Int32                       | 32-bit Temporal                    | \(1)         |
++-----------------------------+------------------------------------+--------------+
+| Int64                       | 64-bit Temporal                    | \(1)         |
++-----------------------------+------------------------------------+--------------+
+| (Large)Binary               | (Large)String                      | \(2)         |
++-----------------------------+------------------------------------+--------------+
+| (Large)String               | (Large)Binary                      | \(3)         |
++-----------------------------+------------------------------------+--------------+
+| Numeric                     | Numeric                            | \(4) \(5)    |
++-----------------------------+------------------------------------+--------------+
+| 32-bit Temporal             | Int32                              | \(1)         |
++-----------------------------+------------------------------------+--------------+
+| 64-bit Temporal             | Int64                              | \(1)         |
++-----------------------------+------------------------------------+--------------+
+| Temporal                    | Temporal                           | \(4) \(5)    |
++-----------------------------+------------------------------------+--------------+
+
+* \(1) No-operation cast: the raw values are kept identical, only
+  the type is changed.
+
+* \(2) Validates the contents if :member:`CastOptions::allow_invalid_utf8`
+  is false.
+
+* \(3) No-operation cast: only the type is changed.
+
+* \(4) Overflow and truncation checks are enabled depending on
+  the given :struct:`CastOptions`.
+
+* \(5) Not all such casts have been implemented.
+
+**String representations**
+
++-----------------------------+------------------------------------+---------+
+| Input type                  | Output type                        | Notes   |
++=============================+====================================+=========+
+| Boolean                     | String-like                        |         |
++-----------------------------+------------------------------------+---------+
+| Numeric                     | String-like                        |         |
++-----------------------------+------------------------------------+---------+
+
+**Generic conversions**
+
++-----------------------------+------------------------------------+---------+
+| Input type                  | Output type                        | Notes   |
++=============================+====================================+=========+
+| Dictionary                  | Dictionary value type              | \(1)    |
++-----------------------------+------------------------------------+---------+
+| Extension                   | Extension storage type             |         |
++-----------------------------+------------------------------------+---------+
+| List-like                   | List-like                          | \(2)    |
++-----------------------------+------------------------------------+---------+
+| Null                        | Any                                |         |
++-----------------------------+------------------------------------+---------+
+
+* \(1) The dictionary indices are unchanged, the dictionary values are
+  cast from the input value type to the output value type (if a conversion
+  is available).
+
+* \(2) The list offsets are unchanged, the list values are cast from the
+  input value type to the output value type (if a conversion is
+  available).
+
+
+Temporal component extraction
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+These functions extract datetime components (year, month, day, etc) from temporal types.
+For timestamps inputs with non-empty timezone, localized timestamp components will be returned.
+
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| Function name      | Arity      | Input types       | Output type   | Options class              | Notes |
++====================+============+===================+===============+============================+=======+
+| day                | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| day_of_week        | Unary      | Temporal          | Int64         | :struct:`DayOfWeekOptions` | \(1)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| day_of_year        | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| hour               | Unary      | Timestamp, Time   | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| iso_week           | Unary      | Temporal          | Int64         |                            | \(2)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| iso_year           | Unary      | Temporal          | Int64         |                            | \(2)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| iso_calendar       | Unary      | Temporal          | Struct        |                            | \(3)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| microsecond        | Unary      | Timestamp, Time   | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| millisecond        | Unary      | Timestamp, Time   | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| minute             | Unary      | Timestamp, Time   | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| month              | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| nanosecond         | Unary      | Timestamp, Time   | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| quarter            | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| second             | Unary      | Timestamp, Time   | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| subsecond          | Unary      | Timestamp, Time   | Double        |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| us_week            | Unary      | Temporal          | Int64         |                            | \(4)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| week               | Unary      | Timestamp         | Int64         | :struct:`WeekOptions`      | \(5)  |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+| year               | Unary      | Temporal          | Int64         |                            |       |
++--------------------+------------+-------------------+---------------+----------------------------+-------+
+
+* \(1) Outputs the number of the day of the week. By default week begins on Monday
+  represented by 0 and ends on Sunday represented by 6. Day numbering can start with 0 or 1 based on
+  :member:`DayOfWeekOptions::count_from_zero` parameter. :member:`DayOfWeekOptions::week_start` can be
+  used to set the starting day of the week using ISO convention (Monday=1, Sunday=7).
+  :member:`DayOfWeekOptions::week_start` parameter is not affected by :member:`DayOfWeekOptions::count_from_zero`.
+
+* \(2) First ISO week has the majority (4 or more) of it's days in January. ISO year
+  starts with the first ISO week. ISO week starts on Monday.
+  See `ISO 8601 week date definition`_ for more details.
+
+* \(3) Output is a ``{"iso_year": output type, "iso_week": output type, "iso_day_of_week":  output type}`` Struct.
+
+* \(4) First US week has the majority (4 or more) of its days in January. US year
+  starts with the first US week. US week starts on Sunday.
+
+* \(5) Returns week number allowing for setting several parameters.
+  If :member:`WeekOptions::week_starts_monday` is true, the week starts with Monday, else Sunday if false.
+  If :member:`WeekOptions::count_from_zero` is true, dates from the current year that fall into the last ISO week
+  of the previous year are numbered as week 0, else week 52 or 53 if false.
+  If :member:`WeekOptions::first_week_is_fully_in_year` is true, the first week (week 1) must fully be in January;
+  else if false, a week that begins on December 29, 30, or 31 is considered the first week of the new year.
+
+.. _ISO 8601 week date definition: https://en.wikipedia.org/wiki/ISO_week_date#First_week
+
+Temporal difference
+~~~~~~~~~~~~~~~~~~~
+
+These functions compute the difference between two timestamps in the
+specified unit. The difference is determined by the number of
+boundaries crossed, not the span of time. For example, the difference
+in days between 23:59:59 on one day and 00:00:01 on the next day is
+one day (since midnight was crossed), not zero days (even though less
+than 24 hours elapsed). Additionally, if the timestamp has a defined
+timezone, the difference is calculated in the local timezone. For
+instance, the difference in years between "2019-12-31 18:00:00-0500"
+and "2019-12-31 23:00:00-0500" is zero years, because the local year
+is the same, even though the UTC years would be different.
+
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| Function name                   | Arity      | Input types       | Output type           | Options class              |
++=================================+============+===================+=======================+============================+
+| day_time_interval_between       | Binary     | Temporal          | DayTime interval      |                            |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| days_between                    | Binary     | Timestamp, Date   | Int64                 |                            |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| hours_between                   | Binary     | Temporal          | Int64                 |                            |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| microseconds_between            | Binary     | Temporal          | Int64                 |                            |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| milliseconds_between            | Binary     | Temporal          | Int64                 |                            |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| minutes_between                 | Binary     | Temporal          | Int64                 |                            |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| month_day_nano_interval_between | Binary     | Temporal          | MonthDayNano interval |                            |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| month_interval_between          | Binary     | Timestamp, Date   | Month interval        |                            |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| nanoseconds_between             | Binary     | Temporal          | Int64                 |                            |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| quarters_between                | Binary     | Timestamp, Date   | Int64                 |                            |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| seconds_between                 | Binary     | Temporal          | Int64                 |                            |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| weeks_between                   | Binary     | Timestamp, Date   | Int64                 | :struct:`DayOfWeekOptions` |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+| years_between                   | Binary     | Timestamp, Date   | Int64                 |                            |
++---------------------------------+------------+-------------------+-----------------------+----------------------------+
+
+Timezone handling
+~~~~~~~~~~~~~~~~~
+
+This function is meant to be used when an external system produces
+"timezone-naive" timestamps which need to be converted to "timezone-aware"
+timestamps (see for example the `definition
+<https://docs.python.org/3/library/datetime.html#aware-and-naive-objects>`__
+in the Python documentation).
+
+Input timestamps are assumed to be relative to the timezone given in
+:member:`AssumeTimezoneOptions::timezone`. They are converted to
+UTC-relative timestamps with the timezone metadata set to the above value.
+An error is returned if the timestamps already have the timezone metadata set.
+
++--------------------+------------+-------------------+---------------+----------------------------------+-------+
+| Function name      | Arity      | Input types       | Output type   | Options class                    | Notes |
++====================+============+===================+===============+==================================+=======+
+| assume_timezone    | Unary      | Timestamp         | Timestamp     | :struct:`AssumeTimezoneOptions`  | \(1)  |
++--------------------+------------+-------------------+---------------+----------------------------------+-------+
+
+* \(1) In addition to the timezone value, :struct:`AssumeTimezoneOptions`
+  allows choosing the behaviour when a timestamp is ambiguous or nonexistent
+  in the given timezone (because of DST shifts).
+
+
+Array-wise ("vector") functions
+-------------------------------
+
+Associative transforms
+~~~~~~~~~~~~~~~~~~~~~~
+
++-------------------+-------+-----------------------------------+-------------+-------+
+| Function name     | Arity | Input types                       | Output type | Notes |
++===================+=======+===================================+=============+=======+
+| dictionary_encode | Unary | Boolean, Null, Numeric,           | Dictionary  | \(1)  |
+|                   |       | Temporal, Binary- and String-like |             |       |
++-------------------+-------+-----------------------------------+-------------+-------+
+| unique            | Unary | Boolean, Null, Numeric,           | Input type  | \(2)  |
+|                   |       | Temporal, Binary- and String-like |             |       |
++-------------------+-------+-----------------------------------+-------------+-------+
+| value_counts      | Unary | Boolean, Null, Numeric,           | Input type  | \(3)  |
+|                   |       | Temporal, Binary- and String-like |             |       |
++-------------------+-------+-----------------------------------+-------------+-------+
+
+* \(1) Output is ``Dictionary(Int32, input type)``.
+
+* \(2) Duplicates are removed from the output while the original order is
+  maintained.
+
+* \(3) Output is a ``{"values": input type, "counts": Int64}`` Struct.
+  Each output element corresponds to a unique value in the input, along
+  with the number of times this value has appeared.
+
+Selections
+~~~~~~~~~~
+
+These functions select and return a subset of their input.
+
++---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
+| Function name | Arity  | Input type 1 | Input type 2 | Output type  | Options class           | Notes     |
++===============+========+==============+==============+==============+=========================+===========+
+| array_filter  | Binary | Any          | Boolean      | Input type 1 | :struct:`FilterOptions` | \(1) \(3) |
++---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
+| array_take    | Binary | Any          | Boolean      | Input type 1 | :struct:`TakeOptions`   | \(1) \(4) |
++---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
+| drop_null     | Unary  | Any          | -            | Input type 1 |                         | \(1) \(2) |
++---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
+| filter        | Binary | Any          | Boolean      | Input type 1 | :struct:`FilterOptions` | \(1) \(3) |
++---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
+| take          | Binary | Any          | Integer      | Input type 1 | :struct:`TakeOptions`   | \(1) \(4) |
++---------------+--------+--------------+--------------+--------------+-------------------------+-----------+
+
+* \(1) Sparse unions are unsupported.
+
+* \(2) Each element in the input is appended to the output iff it is non-null.
+  If the input is a record batch or table, any null value in a column drops
+  the entire row.
+
+* \(3) Each element in input 1 (the values) is appended to the output iff
+  the corresponding element in input 2 (the filter) is true.  How
+  nulls in the filter are handled can be configured using FilterOptions.
+
+* \(4) For each element *i* in input 2 (the indices), the *i*'th element
+  in input 1 (the values) is appended to the output.
+
+Sorts and partitions
+~~~~~~~~~~~~~~~~~~~~
+
+By default, in these functions, nulls are considered greater than any other value
+(they will be sorted or partitioned at the end of the array).  Floating-point
+NaN values are considered greater than any other non-null value, but smaller
+than nulls.  This behaviour can be changed using the ``null_placement`` setting
+in the respective option classes.
+
+.. note::
+   Binary- and String-like inputs are ordered lexicographically as bytestrings,
+   even for String types.
+
++-----------------------+------------+---------------------------------------------------------+-------------------+--------------------------------+----------------+
+| Function name         | Arity      | Input types                                             | Output type       | Options class                  | Notes          |
++=======================+============+=========================================================+===================+================================+================+
+| array_sort_indices    | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | UInt64            | :struct:`ArraySortOptions`     | \(1) \(2)      |
++-----------------------+------------+---------------------------------------------------------+-------------------+--------------------------------+----------------+
+| partition_nth_indices | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | UInt64            | :struct:`PartitionNthOptions`  | \(3)           |
++-----------------------+------------+---------------------------------------------------------+-------------------+--------------------------------+----------------+
+| select_k_unstable     | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | UInt64            | :struct:`SelectKOptions`       | \(4) \(5)      |
++-----------------------+------------+---------------------------------------------------------+-------------------+--------------------------------+----------------+
+| sort_indices          | Unary      | Boolean, Numeric, Temporal, Binary- and String-like     | UInt64            | :struct:`SortOptions`          | \(1) \(4)      |
++-----------------------+------------+---------------------------------------------------------+-------------------+--------------------------------+----------------+
+
+* \(1) The output is an array of indices into the input, that define a
+  stable sort of the input.
+
+* \(2) The input must be an array. The default order is ascending.
+
+* \(3) The output is an array of indices into the input array, that define
+  a partial non-stable sort such that the *N*'th index points to the *N*'th
+  element in sorted order, and all indices before the *N*'th point to
+  elements less or equal to elements at or after the *N*'th (similar to
+  :func:`std::nth_element`).  *N* is given in
+  :member:`PartitionNthOptions::pivot`.
+
+* \(4) The input can be an array, chunked array, record batch or
+  table. If the input is a record batch or table, one or more sort
+  keys must be specified.
+
+* \(5) The output is an array of indices into the input, that define a
+  non-stable sort of the input.
+
+.. _cpp-compute-vector-structural-transforms:
+
+Structural transforms
+~~~~~~~~~~~~~~~~~~~~~
+
++---------------------+------------+-------------------------------------+------------------+--------+
+| Function name       | Arity      | Input types                         | Output type      | Notes  |
++=====================+============+=====================================+==================+========+
+| list_element        | Binary     | List-like (Arg 0), Integral (Arg 1) | List value type  | \(1)   |
++---------------------+------------+-------------------------------------+------------------+--------+
+| list_flatten        | Unary      | List-like                           | List value type  | \(2)   |
++---------------------+------------+-------------------------------------+------------------+--------+
+| list_parent_indices | Unary      | List-like                           | Int32 or Int64   | \(3)   |
++---------------------+------------+-------------------------------------+------------------+--------+
+
+* \(1) Output is an array of the same length as the input list array. The
+  output values are the values at the specified index of each child list.
+
+* \(2) The top level of nesting is removed: all values in the list child array,
+  including nulls, are appended to the output.  However, nulls in the parent
+  list array are discarded.
+
+* \(3) For each value in the list child array, the index at which it is found
+  in the list array is appended to the output.  Nulls in the parent list array
+  are discarded.  Output type is Int32 for List and FixedSizeList, Int64 for
+  LargeList.
+
+These functions create a copy of the first input with some elements
+replaced, based on the remaining inputs.
+
++--------------------------+------------+-----------------------+--------------+--------------+--------------+-------+
+| Function name            | Arity      | Input type 1          | Input type 2 | Input type 3 | Output type  | Notes |
++==========================+============+=======================+==============+==============+==============+=======+
+| replace_with_mask        | Ternary    | Fixed-width or binary | Boolean      | Input type 1 | Input type 1 | \(1)  |
++--------------------------+------------+-----------------------+--------------+--------------+--------------+-------+
+
+* \(1) Each element in input 1 for which the corresponding Boolean in input 2
+  is true is replaced with the next value from input 3. A null in input 2
+  results in a corresponding null in the output.
+
+  Also see: :ref:`if_else <cpp-compute-scalar-selections>`.
diff --git a/src/arrow/docs/source/cpp/conventions.rst b/src/arrow/docs/source/cpp/conventions.rst
new file mode 100644
index 000000000..218d028ee
--- /dev/null
+++ b/src/arrow/docs/source/cpp/conventions.rst
@@ -0,0 +1,107 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+.. cpp:namespace:: arrow
+
+Conventions
+===========
+
+The Arrow C++ API follows a few simple guidelines.  As with many rules,
+there may be exceptions.
+
+Language version
+----------------
+
+Arrow is C++11-compatible.  A few backports are used for newer functionality,
+for example the :class:`std::string_view` class.
+
+Namespacing
+-----------
+
+All the Arrow API (except macros) is namespaced inside a ``arrow`` namespace,
+and nested namespaces thereof.
+
+Safe pointers
+-------------
+
+Arrow objects are usually passed and stored using safe pointers -- most of
+the time :class:`std::shared_ptr` but sometimes also :class:`std::unique_ptr`.
+
+Immutability
+------------
+
+Many Arrow objects are immutable: once constructed, their logical properties
+cannot change anymore.  This makes it possible to use them in multi-threaded
+scenarios without requiring tedious and error-prone synchronization.
+
+There are obvious exceptions to this, such as IO objects or mutable data buffers.
+
+Error reporting
+---------------
+
+Most APIs indicate a successful or erroneous outcome by returning a
+:class:`arrow::Status` instance.  Arrow doesn't throw exceptions of its
+own, but third-party exceptions might propagate through, especially
+:class:`std::bad_alloc` (but Arrow doesn't use the standard allocators for
+large data).
+
+When an API can return either an error code or a successful value, it usually
+does so by returning the template class
+:class:`arrow::Result <template\<class T\> arrow::Result>`.  However,
+some APIs (usually deprecated) return :class:`arrow::Status` and pass the
+result value as an out-pointer parameter.
+
+Here is an example of checking the outcome of an operation::
+
+   const int64_t buffer_size = 4096;
+
+   auto maybe_buffer = arrow::AllocateBuffer(buffer_size, &buffer);
+   if (!maybe_buffer.ok()) {
+      // ... handle error
+   } else {
+      std::shared_ptr<arrow::Buffer> buffer = *maybe_buffer;
+      // ... use allocated buffer
+   }
+
+If the caller function itself returns a :class:`arrow::Result` or
+:class:`arrow::Status` and wants to propagate any non-successful outcome, two
+convenience macros are available:
+
+* :c:macro:`ARROW_RETURN_NOT_OK` takes a :class:`arrow::Status` parameter
+  and returns it if not successful.
+
+* :c:macro:`ARROW_ASSIGN_OR_RAISE` takes a :class:`arrow::Result` parameter,
+  assigns its result to a *lvalue* if successful, or returns the corresponding
+  :class:`arrow::Status` on error.
+
+For example::
+
+   arrow::Status DoSomething() {
+      const int64_t buffer_size = 4096;
+      std::shared_ptr<arrow::Buffer> buffer;
+      ARROW_ASSIGN_OR_RAISE(buffer, arrow::AllocateBuffer(buffer_size));
+      // ... allocation successful, do something with buffer below
+
+      // return success at the end
+      return Status::OK();
+   }
+
+.. seealso::
+   :doc:`API reference for error reporting <api/support>`
diff --git a/src/arrow/docs/source/cpp/csv.rst b/src/arrow/docs/source/cpp/csv.rst
new file mode 100644
index 000000000..42b5af67d
--- /dev/null
+++ b/src/arrow/docs/source/cpp/csv.rst
@@ -0,0 +1,220 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+.. cpp:namespace:: arrow::csv
+
+=============================
+Reading and Writing CSV files
+=============================
+
+Arrow provides a fast CSV reader allowing ingestion of external data
+as Arrow tables.
+
+.. seealso::
+   :ref:`CSV reader/writer API reference <cpp-api-csv>`.
+
+Basic usage
+===========
+
+A CSV file is read from a :class:`~arrow::io::InputStream`.
+
+.. code-block:: cpp
+
+   #include "arrow/csv/api.h"
+
+   {
+      // ...
+      arrow::io::IOContext io_context = arrow::io::default_io_context();
+      std::shared_ptr<arrow::io::InputStream> input = ...;
+
+      auto read_options = arrow::csv::ReadOptions::Defaults();
+      auto parse_options = arrow::csv::ParseOptions::Defaults();
+      auto convert_options = arrow::csv::ConvertOptions::Defaults();
+
+      // Instantiate TableReader from input stream and options
+      auto maybe_reader =
+        arrow::csv::TableReader::Make(io_context,
+                                      input,
+                                      read_options,
+                                      parse_options,
+                                      convert_options);
+      if (!maybe_reader.ok()) {
+         // Handle TableReader instantiation error...
+      }
+      std::shared_ptr<arrow::csv::TableReader> reader = *maybe_reader;
+
+      // Read table from CSV file
+      auto maybe_table = reader->Read();
+      if (!maybe_table.ok()) {
+         // Handle CSV read error
+         // (for example a CSV syntax error or failed type conversion)
+      }
+      std::shared_ptr<arrow::Table> table = *maybe_table;
+   }
+
+A CSV file is written to a :class:`~arrow::io::OutputStream`.
+
+.. code-block:: cpp
+
+   #include <arrow/csv/api.h>
+   {
+       // Oneshot write
+       // ...
+       std::shared_ptr<arrow::io::OutputStream> output = ...;
+       auto write_options = arrow::csv::WriteOptions::Defaults();
+       if (WriteCSV(table, write_options, output.get()).ok()) {
+           // Handle writer error...
+       }
+   }
+   {
+       // Write incrementally
+       // ...
+       std::shared_ptr<arrow::io::OutputStream> output = ...;
+       auto write_options = arrow::csv::WriteOptions::Defaults();
+       auto maybe_writer = arrow::csv::MakeCSVWriter(output, schema, write_options);
+       if (!maybe_writer.ok()) {
+           // Handle writer instantiation error...
+       }
+       std::shared_ptr<arrow::ipc::RecordBatchWriter> writer = *maybe_writer;
+
+       // Write batches...
+       if (!writer->WriteRecordBatch(*batch).ok()) {
+           // Handle write error...
+       }
+
+       if (!writer->Close().ok()) {
+           // Handle close error...
+       }
+       if (!output->Close().ok()) {
+           // Handle file close error...
+       }
+   }
+
+.. note:: The writer does not yet support all Arrow types.
+
+Column names
+============
+
+There are three possible ways to infer column names from the CSV file:
+
+* By default, the column names are read from the first row in the CSV file
+* If :member:`ReadOptions::column_names` is set, it forces the column
+  names in the table to these values (the first row in the CSV file is
+  read as data)
+* If :member:`ReadOptions::autogenerate_column_names` is true, column names
+  will be autogenerated with the pattern "f0", "f1"... (the first row in the
+  CSV file is read as data)
+
+Column selection
+================
+
+By default, Arrow reads all columns in the CSV file.  You can narrow the
+selection of columns with the :member:`ConvertOptions::include_columns`
+option.  If some columns in :member:`ConvertOptions::include_columns`
+are missing from the CSV file, an error will be emitted unless
+:member:`ConvertOptions::include_missing_columns` is true, in which case
+the missing columns are assumed to contain all-null values.
+
+Interaction with column names
+-----------------------------
+
+If both :member:`ReadOptions::column_names` and
+:member:`ConvertOptions::include_columns` are specified,
+the :member:`ReadOptions::column_names` are assumed to map to CSV columns,
+and :member:`ConvertOptions::include_columns` is a subset of those column
+names that will part of the Arrow Table.
+
+Data types
+==========
+
+By default, the CSV reader infers the most appropriate data type for each
+column.  Type inference considers the following data types, in order:
+
+* Null
+* Int64
+* Boolean
+* Date32
+* Time32 (with seconds unit)
+* Timestamp (with seconds unit)
+* Timestamp (with nanoseconds unit)
+* Float64
+* Dictionary<String> (if :member:`ConvertOptions::auto_dict_encode` is true)
+* Dictionary<Binary> (if :member:`ConvertOptions::auto_dict_encode` is true)
+* String
+* Binary
+
+It is possible to override type inference for select columns by setting
+the :member:`ConvertOptions::column_types` option.  Explicit data types
+can be chosen from the following list:
+
+* Null
+* All Integer types
+* Float32 and Float64
+* Decimal128
+* Boolean
+* Date32 and Date64
+* Time32 and Time64
+* Timestamp
+* Binary and Large Binary
+* String and Large String (with optional UTF8 input validation)
+* Fixed-Size Binary
+* Dictionary with index type Int32 and value type one of the following:
+  Binary, String, LargeBinary, LargeString,  Int32, UInt32, Int64, UInt64,
+  Float32, Float64, Decimal128
+
+Other data types do not support conversion from CSV values and will error out.
+
+Dictionary inference
+--------------------
+
+If type inference is enabled and :member:`ConvertOptions::auto_dict_encode`
+is true, the CSV reader first tries to convert string-like columns to a
+dictionary-encoded string-like array.  It switches to a plain string-like
+array when the threshold in :member:`ConvertOptions::auto_dict_max_cardinality`
+is reached.
+
+Nulls
+-----
+
+Null values are recognized from the spellings stored in
+:member:`ConvertOptions::null_values`.  The :func:`ConvertOptions::Defaults`
+factory method will initialize a number of conventional null spellings such
+as ``N/A``.
+
+Character encoding
+------------------
+
+CSV files are expected to be encoded in UTF8.  However, non-UTF8 data
+is accepted for Binary columns.
+
+Write Options
+=============
+
+The format of written CSV files can be customized via :class:`~arrow::csv::WriteOptions`.
+Currently few options are available; more will be added in future releases.
+
+Performance
+===========
+
+By default, the CSV reader will parallelize reads in order to exploit all
+CPU cores on your machine.  You can change this setting in
+:member:`ReadOptions::use_threads`.  A reasonable expectation is at least
+100 MB/s per core on a performant desktop or laptop computer (measured in
+source CSV bytes, not target Arrow data bytes).
diff --git a/src/arrow/docs/source/cpp/dataset.rst b/src/arrow/docs/source/cpp/dataset.rst
new file mode 100644
index 000000000..e7161a458
--- /dev/null
+++ b/src/arrow/docs/source/cpp/dataset.rst
@@ -0,0 +1,417 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+================
+Tabular Datasets
+================
+
+.. seealso::
+   :doc:`Dataset API reference <api/dataset>`
+
+.. warning::
+
+    The ``arrow::dataset`` namespace is experimental, and a stable API
+    is not yet guaranteed.
+
+The Arrow Datasets library provides functionality to efficiently work with
+tabular, potentially larger than memory, and multi-file datasets. This includes:
+
+* A unified interface that supports different sources and file formats
+  (currently, Parquet, ORC, Feather / Arrow IPC, and CSV files) and different
+  file systems (local, cloud).
+* Discovery of sources (crawling directories, handling partitioned datasets with
+  various partitioning schemes, basic schema normalization, ...)
+* Optimized reading with predicate pushdown (filtering rows), projection
+  (selecting and deriving columns), and optionally parallel reading.
+
+The goal is to expand support to other file formats and data sources
+(e.g. database connections) in the future.
+
+Reading Datasets
+----------------
+
+For the examples below, let's create a small dataset consisting
+of a directory with two parquet files:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Reading Datasets)
+   :end-before: (Doc section: Reading Datasets)
+   :linenos:
+   :lineno-match:
+
+(See the full example at bottom: :ref:`cpp-dataset-full-example`.)
+
+Dataset discovery
+~~~~~~~~~~~~~~~~~
+
+A :class:`arrow::dataset::Dataset` object can be created using the various
+:class:`arrow::dataset::DatasetFactory` objects. Here, we'll use the
+:class:`arrow::dataset::FileSystemDatasetFactory`, which can create a dataset
+given a base directory path:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Dataset discovery)
+   :end-before: (Doc section: Dataset discovery)
+   :emphasize-lines: 6-11
+   :linenos:
+   :lineno-match:
+
+We're also passing the filesystem to use and the file format to use for reading.
+This lets us choose between (for example) reading local files or files in Amazon
+S3, or between Parquet and CSV.
+
+In addition to searching a base directory, we can list file paths manually.
+
+Creating a :class:`arrow::dataset::Dataset` does not begin reading the data
+itself. It only crawls the directory to find all the files (if needed), which can
+be retrieved with :func:`arrow::dataset::FileSystemDataset::files`:
+
+.. code-block:: cpp
+
+   // Print out the files crawled (only for FileSystemDataset)
+   for (const auto& filename : dataset->files()) {
+     std::cout << filename << std::endl;
+   }
+
+…and infers the dataset's schema (by default from the first file):
+
+.. code-block:: cpp
+
+   std::cout << dataset->schema()->ToString() << std::endl;
+
+Using the :func:`arrow::dataset::Dataset::NewScan` method, we can build a
+:class:`arrow::dataset::Scanner` and read the dataset (or a portion of it) into
+a :class:`arrow::Table` with the :func:`arrow::dataset::Scanner::ToTable`
+method:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Dataset discovery)
+   :end-before: (Doc section: Dataset discovery)
+   :emphasize-lines: 16-19
+   :linenos:
+   :lineno-match:
+
+.. TODO: iterative loading not documented pending API changes
+.. note:: Depending on the size of your dataset, this can require a lot of
+          memory; see :ref:`cpp-dataset-filtering-data` below on
+          filtering/projecting.
+
+Reading different file formats
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The above examples use Parquet files on local disk, but the Dataset API
+provides a consistent interface across multiple file formats and filesystems.
+(See :ref:`cpp-dataset-cloud-storage` for more information on the latter.)
+Currently, Parquet, ORC, Feather / Arrow IPC, and CSV file formats are
+supported; more formats are planned in the future.
+
+If we save the table as Feather files instead of Parquet files:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Reading different file formats)
+   :end-before: (Doc section: Reading different file formats)
+   :linenos:
+   :lineno-match:
+
+…then we can read the Feather file by passing an :class:`arrow::dataset::IpcFileFormat`:
+
+.. code-block:: cpp
+
+    auto format = std::make_shared<ds::ParquetFileFormat>();
+    // ...
+    auto factory = ds::FileSystemDatasetFactory::Make(filesystem, selector, format, options)
+                       .ValueOrDie();
+
+Customizing file formats
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+:class:`arrow::dataset::FileFormat` objects have properties that control how
+files are read. For example::
+
+  auto format = std::make_shared<ds::ParquetFileFormat>();
+  format->reader_options.dict_columns.insert("a");
+
+Will configure column ``"a"`` to be dictionary-encoded when read. Similarly,
+setting :member:`arrow::dataset::CsvFileFormat::parse_options` lets us change
+things like reading comma-separated or tab-separated data.
+
+Additionally, passing an :class:`arrow::dataset::FragmentScanOptions` to
+:func:`arrow::dataset::ScannerBuilder::FragmentScanOptions` offers fine-grained
+control over data scanning. For example, for CSV files, we can change what values
+are converted into Boolean true and false at scan time.
+
+.. _cpp-dataset-filtering-data:
+
+Filtering data
+--------------
+
+So far, we've been reading the entire dataset, but if we need only a subset of the
+data, this can waste time or memory reading data we don't need. The
+:class:`arrow::dataset::Scanner` offers control over what data to read.
+
+In this snippet, we use :func:`arrow::dataset::ScannerBuilder::Project` to select
+which columns to read:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Filtering data)
+   :end-before: (Doc section: Filtering data)
+   :emphasize-lines: 16
+   :linenos:
+   :lineno-match:
+
+Some formats, such as Parquet, can reduce I/O costs here by reading only the
+specified columns from the filesystem.
+
+A filter can be provided with :func:`arrow::dataset::ScannerBuilder::Filter`, so
+that rows which do not match the filter predicate will not be included in the
+returned table. Again, some formats, such as Parquet, can use this filter to
+reduce the amount of I/O needed.
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Filtering data)
+   :end-before: (Doc section: Filtering data)
+   :emphasize-lines: 17
+   :linenos:
+   :lineno-match:
+
+.. TODO Expressions not documented pending renamespacing
+
+Projecting columns
+------------------
+
+In addition to selecting columns, :func:`arrow::dataset::ScannerBuilder::Project`
+can also be used for more complex projections, such as renaming columns, casting
+them to other types, and even deriving new columns based on evaluating
+expressions.
+
+In this case, we pass a vector of expressions used to construct column values
+and a vector of names for the columns:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Projecting columns)
+   :end-before: (Doc section: Projecting columns)
+   :emphasize-lines: 18-28
+   :linenos:
+   :lineno-match:
+
+This also determines the column selection; only the given columns will be
+present in the resulting table. If you want to include a derived column in
+*addition* to the existing columns, you can build up the expressions from the
+dataset schema:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Projecting columns #2)
+   :end-before: (Doc section: Projecting columns #2)
+   :emphasize-lines: 17-27
+   :linenos:
+   :lineno-match:
+
+.. note:: When combining filters and projections, Arrow will determine all
+          necessary columns to read. For instance, if you filter on a column that
+          isn't ultimately selected, Arrow will still read the column to evaluate
+          the filter.
+
+Reading and writing partitioned data
+------------------------------------
+
+So far, we've been working with datasets consisting of flat directories with
+files. Oftentimes, a dataset will have one or more columns that are frequently
+filtered on. Instead of having to read and then filter the data, by organizing the
+files into a nested directory structure, we can define a partitioned dataset,
+where sub-directory names hold information about which subset of the data is
+stored in that directory. Then, we can more efficiently filter data by using that
+information to avoid loading files that don't match the filter.
+
+For example, a dataset partitioned by year and month may have the following layout:
+
+.. code-block:: text
+
+   dataset_name/
+     year=2007/
+       month=01/
+          data0.parquet
+          data1.parquet
+          ...
+       month=02/
+          data0.parquet
+          data1.parquet
+          ...
+       month=03/
+       ...
+     year=2008/
+       month=01/
+       ...
+     ...
+
+The above partitioning scheme is using "/key=value/" directory names, as found in
+Apache Hive. Under this convention, the file at
+``dataset_name/year=2007/month=01/data0.parquet`` contains only data for which
+``year == 2007`` and ``month == 01``.
+
+Let's create a small partitioned dataset. For this, we'll use Arrow's dataset
+writing functionality.
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Reading and writing partitioned data)
+   :end-before: (Doc section: Reading and writing partitioned data)
+   :emphasize-lines: 25-42
+   :linenos:
+   :lineno-match:
+
+The above created a directory with two subdirectories ("part=a" and "part=b"),
+and the Parquet files written in those directories no longer include the "part"
+column.
+
+Reading this dataset, we now specify that the dataset should use a Hive-like
+partitioning scheme:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Reading and writing partitioned data #2)
+   :end-before: (Doc section: Reading and writing partitioned data #2)
+   :emphasize-lines: 7,9-11
+   :linenos:
+   :lineno-match:
+
+Although the partition fields are not included in the actual Parquet files,
+they will be added back to the resulting table when scanning this dataset:
+
+.. code-block:: text
+
+   $ ./debug/dataset_documentation_example file:///tmp parquet_hive partitioned
+   Found fragment: /tmp/parquet_dataset/part=a/part0.parquet
+   Partition expression: (part == "a")
+   Found fragment: /tmp/parquet_dataset/part=b/part1.parquet
+   Partition expression: (part == "b")
+   Read 20 rows
+   a: int64
+     -- field metadata --
+     PARQUET:field_id: '1'
+   b: double
+     -- field metadata --
+     PARQUET:field_id: '2'
+   c: int64
+     -- field metadata --
+     PARQUET:field_id: '3'
+   part: string
+   ----
+   # snip...
+
+We can now filter on the partition keys, which avoids loading files
+altogether if they do not match the filter:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: (Doc section: Reading and writing partitioned data #3)
+   :end-before: (Doc section: Reading and writing partitioned data #3)
+   :emphasize-lines: 15-18
+   :linenos:
+   :lineno-match:
+
+Different partitioning schemes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The above example uses a Hive-like directory scheme, such as "/year=2009/month=11/day=15".
+We specified this by passing the Hive partitioning factory. In this case, the types of
+the partition keys are inferred from the file paths.
+
+It is also possible to directly construct the partitioning and explicitly define
+the schema of the partition keys. For example:
+
+.. code-block:: cpp
+
+    auto part = std::make_shared<ds::HivePartitioning>(arrow::schema({
+        arrow::field("year", arrow::int16()),
+        arrow::field("month", arrow::int8()),
+        arrow::field("day", arrow::int32())
+    }));
+
+Arrow supports another partitioning scheme, "directory partitioning", where the
+segments in the file path represent the values of the partition keys without
+including the name (the field names are implicit in the segment's index). For
+example, given field names "year", "month", and "day", one path might be
+"/2019/11/15".
+
+Since the names are not included in the file paths, these must be specified
+when constructing a directory partitioning:
+
+.. code-block:: cpp
+
+    auto part = ds::DirectoryPartitioning::MakeFactory({"year", "month", "day"});
+
+Directory partitioning also supports providing a full schema rather than inferring
+types from file paths.
+
+Reading from other data sources
+-------------------------------
+
+Reading in-memory data
+~~~~~~~~~~~~~~~~~~~~~~
+
+If you already have data in memory that you'd like to use with the Datasets API
+(e.g. to filter/project data, or to write it out to a filesystem), you can wrap it
+in an :class:`arrow::dataset::InMemoryDataset`:
+
+.. code-block:: cpp
+
+   auto table = arrow::Table::FromRecordBatches(...);
+   auto dataset = std::make_shared<arrow::dataset::InMemoryDataset>(std::move(table));
+   // Scan the dataset, filter, it, etc.
+   auto scanner_builder = dataset->NewScan();
+
+In the example, we used the InMemoryDataset to write our example data to local
+disk which was used in the rest of the example:
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :start-after: Reading and writing partitioned data
+   :end-before: Reading and writing partitioned data
+   :emphasize-lines: 24-28
+   :linenos:
+   :lineno-match:
+
+.. _cpp-dataset-cloud-storage:
+
+Reading from cloud storage
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In addition to local files, Arrow Datasets also support reading from cloud
+storage systems, such as Amazon S3, by passing a different filesystem.
+
+See the :ref:`filesystem <cpp-filesystems>` docs for more details on the available
+filesystems.
+
+.. _cpp-dataset-full-example:
+
+Full Example
+------------
+
+.. literalinclude:: ../../../cpp/examples/arrow/dataset_documentation_example.cc
+   :language: cpp
+   :linenos:
diff --git a/src/arrow/docs/source/cpp/datatypes.rst b/src/arrow/docs/source/cpp/datatypes.rst
new file mode 100644
index 000000000..9149420a4
--- /dev/null
+++ b/src/arrow/docs/source/cpp/datatypes.rst
@@ -0,0 +1,68 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Data Types
+==========
+
+.. seealso::
+   :doc:`Datatype API reference <api/datatype>`.
+
+Data types govern how physical data is interpreted.  Their :ref:`specification
+<format_columnar>` allows binary interoperability between different Arrow
+implementations, including from different programming languages and runtimes
+(for example it is possible to access the same data, without copying, from
+both Python and Java using the :py:mod:`pyarrow.jvm` bridge module).
+
+Information about a data type in C++ can be represented in three ways:
+
+1. Using a :class:`arrow::DataType` instance (e.g. as a function argument)
+2. Using a :class:`arrow::DataType` concrete subclass (e.g. as a template
+   parameter)
+3. Using a :type:`arrow::Type::type` enum value (e.g. as the condition of
+   a switch statement)
+
+The first form (using a :class:`arrow::DataType` instance) is the most idiomatic
+and flexible.  Runtime-parametric types can only be fully represented with
+a DataType instance.  For example, a :class:`arrow::TimestampType` needs to be
+constructed at runtime with a :type:`arrow::TimeUnit::type` parameter; a
+:class:`arrow::Decimal128Type` with *scale* and *precision* parameters;
+a :class:`arrow::ListType` with a full child type (itself a
+:class:`arrow::DataType` instance).
+
+The two other forms can be used where performance is critical, in order to
+avoid paying the price of dynamic typing and polymorphism.  However, some
+amount of runtime switching can still be required for parametric types.
+It is not possible to reify all possible types at compile time, since Arrow
+data types allows arbitrary nesting.
+
+Creating data types
+-------------------
+
+To instantiate data types, it is recommended to call the provided
+:ref:`factory functions <api-type-factories>`::
+
+   std::shared_ptr<arrow::DataType> type;
+
+   // A 16-bit integer type
+   type = arrow::int16();
+   // A 64-bit timestamp type (with microsecond granularity)
+   type = arrow::timestamp(arrow::TimeUnit::MICRO);
+   // A list type of single-precision floating-point values
+   type = arrow::list(arrow::float32());
diff --git a/src/arrow/docs/source/cpp/examples/cmake_minimal_build.rst b/src/arrow/docs/source/cpp/examples/cmake_minimal_build.rst
new file mode 100644
index 000000000..f135de830
--- /dev/null
+++ b/src/arrow/docs/source/cpp/examples/cmake_minimal_build.rst
@@ -0,0 +1,28 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Minimal build using CMake
+==========================
+
+The folder ``cpp/examples/minimal_build/`` located inside the source tree
+contains a Docker-based example of building and using Arrow from a
+third-party project, using CMake.  The
+`README <https://github.com/apache/arrow/tree/master/cpp/examples/minimal_build/README.md>`_
+file in that folder has more information.
diff --git a/src/arrow/docs/source/cpp/examples/compute_and_write_example.rst b/src/arrow/docs/source/cpp/examples/compute_and_write_example.rst
new file mode 100644
index 000000000..096b97b83
--- /dev/null
+++ b/src/arrow/docs/source/cpp/examples/compute_and_write_example.rst
@@ -0,0 +1,28 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Compute and Write CSV Example
+=============================
+
+The file ``cpp/examples/arrow/compute_and_write_csv_example.cc`` located inside 
+the source tree contains an example of creating a table of two numerical columns 
+and then compariong the magnitudes of the entries in the columns and wrting out to 
+a CSV file with the column entries and their comparisons.  The code in the example
+is documented.
diff --git a/src/arrow/docs/source/cpp/examples/dataset_documentation_example.rst b/src/arrow/docs/source/cpp/examples/dataset_documentation_example.rst
new file mode 100644
index 000000000..2bc993f24
--- /dev/null
+++ b/src/arrow/docs/source/cpp/examples/dataset_documentation_example.rst
@@ -0,0 +1,27 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Arrow Datasets example
+=========================
+
+The file ``cpp/examples/arrow/dataset_documentation_example.cc``
+located inside the source tree contains an example of using Arrow
+Datasets to read, write, select, and filter data. :doc:`../dataset`
+has a full walkthrough of the example.
diff --git a/src/arrow/docs/source/cpp/examples/index.rst b/src/arrow/docs/source/cpp/examples/index.rst
new file mode 100644
index 000000000..bc5bd497c
--- /dev/null
+++ b/src/arrow/docs/source/cpp/examples/index.rst
@@ -0,0 +1,28 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Examples
+========
+
+.. toctree::
+   :maxdepth: 1
+
+   cmake_minimal_build
+   compute_and_write_example
+   dataset_documentation_example
+   row_columnar_conversion
+   std::tuple-like ranges to Arrow <tuple_range_conversion>
diff --git a/src/arrow/docs/source/cpp/examples/row_columnar_conversion.rst b/src/arrow/docs/source/cpp/examples/row_columnar_conversion.rst
new file mode 100644
index 000000000..3f45864c2
--- /dev/null
+++ b/src/arrow/docs/source/cpp/examples/row_columnar_conversion.rst
@@ -0,0 +1,27 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Row to columnar conversion
+==========================
+
+The following example converts an array of structs to a :class:`arrow::Table`
+instance, and then converts it back to the original array of structs.
+
+.. literalinclude:: ../../../../cpp/examples/arrow/row_wise_conversion_example.cc
diff --git a/src/arrow/docs/source/cpp/examples/tuple_range_conversion.rst b/src/arrow/docs/source/cpp/examples/tuple_range_conversion.rst
new file mode 100644
index 000000000..64ba23782
--- /dev/null
+++ b/src/arrow/docs/source/cpp/examples/tuple_range_conversion.rst
@@ -0,0 +1,106 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+Conversion of range of ``std::tuple``-like to ``Table`` instances
+=================================================================
+
+While the above example shows a quite manual approach of a row to columnar
+conversion, Arrow also provides some template logic to convert ranges of
+``std::tuple<..>``-like objects to tables.
+
+In the most simple case, you only need to provide the input data and the
+type conversion is then inferred at compile time.
+
+.. code::
+
+   std::vector<std::tuple<double, std::string>> rows = ..
+   std::shared_ptr<Table> table;
+
+   if (!arrow::stl::TableFromTupleRange(
+         arrow::default_memory_pool(),
+         rows, names, &table).ok()
+   ) {
+     // Error handling code should go here.
+   }
+
+In reverse, you can use ``TupleRangeFromTable`` to fill an already
+pre-allocated range with the data from a ``Table`` instance.
+
+.. code::
+
+    // An important aspect here is that the table columns need to be in the
+    // same order as the columns will later appear in the tuple. As the tuple
+    // is unnamed, matching is done on positions.
+    std::shared_ptr<Table> table = ..
+
+    // The range needs to be pre-allocated to the respective amount of rows.
+    // This allows us to pass in an arbitrary range object, not only
+    // `std::vector`.
+    std::vector<std::tuple<double, std::string>> rows(2);
+    if (!arrow::stl::TupleRangeFromTable(*table, &rows).ok()) {
+      // Error handling code should go here.
+    }
+
+Arrow itself already supports some C(++) data types for this conversion. If you
+want to support additional data types, you need to implement a specialization
+of ``arrow::stl::ConversionTraits<T>`` and the more general
+``arrow::CTypeTraits<T>``.
+
+
+.. code::
+
+    namespace arrow {
+
+    template<>
+    struct CTypeTraits<boost::posix_time::ptime> {
+      using ArrowType = ::arrow::TimestampType;
+
+      static std::shared_ptr<::arrow::DataType> type_singleton() {
+        return ::arrow::timestamp(::arrow::TimeUnit::MICRO);
+      }
+    };
+
+    }
+
+    namespace arrow { namespace stl {
+
+    template <>
+    struct ConversionTraits<boost::posix_time::ptime> : public CTypeTraits<boost::posix_time::ptime> {
+      constexpr static bool nullable = false;
+
+      // This is the specialization to load a scalar value into an Arrow builder.
+      static Status AppendRow(
+            typename TypeTraits<TimestampType>::BuilderType& builder,
+            boost::posix_time::ptime cell) {
+        boost::posix_time::ptime const epoch({1970, 1, 1}, {0, 0, 0, 0});
+        return builder.Append((cell - epoch).total_microseconds());
+      }
+
+      // Specify how we can fill the tuple from the values stored in the Arrow
+      // array.
+      static boost::posix_time::ptime GetEntry(
+            const TimestampArray& array, size_t j) {
+        return psapp::arrow::internal::timestamp_epoch
+            + boost::posix_time::time_duration(0, 0, 0, array.Value(j));
+      }
+    };
+
+    }}
+
diff --git a/src/arrow/docs/source/cpp/flight.rst b/src/arrow/docs/source/cpp/flight.rst
new file mode 100644
index 000000000..c1d2e43b9
--- /dev/null
+++ b/src/arrow/docs/source/cpp/flight.rst
@@ -0,0 +1,119 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+================
+Arrow Flight RPC
+================
+
+Arrow Flight is an RPC framework for efficient transfer of Flight data
+over the network. See :doc:`../format/Flight` for full details on
+the protocol, or :doc:`./api/flight` for API docs.
+
+Writing a Flight Service
+========================
+
+Servers are subclasses of :class:`arrow::flight::FlightServerBase`. To
+implement individual RPCs, override the RPC methods on this class.
+
+.. code-block:: cpp
+
+   class MyFlightServer : public FlightServerBase {
+     Status ListFlights(const ServerCallContext& context, const Criteria* criteria,
+                        std::unique_ptr<FlightListing>* listings) override {
+       std::vector<FlightInfo> flights = ...;
+       *listings = std::unique_ptr<FlightListing>(new SimpleFlightListing(flights));
+       return Status::OK();
+     }
+   };
+
+Each RPC method always takes a
+:class:`arrow::flight::ServerCallContext` for common parameters and
+returns a :class:`arrow::Status` to indicate success or
+failure. Flight-specific error codes can be returned via
+:func:`arrow::flight::MakeFlightError`.
+
+RPC methods that return a value in addition to a status will use an
+out parameter, as shown above. Often, there are helper classes
+providing basic implementations of these out parameters. For instance,
+above, :class:`arrow::flight::SimpleFlightListing` uses a vector of
+:class:`arrow::flight::FlightInfo` objects as the result of a
+``ListFlights`` RPC.
+
+To start a server, create a :class:`arrow::flight::Location` to
+specify where to listen, and call
+:func:`arrow::flight::FlightServerBase::Init`. This will start the
+server, but won't block the rest of the program. Use
+:func:`arrow::flight::FlightServerBase::SetShutdownOnSignals` to
+enable stopping the server if an interrupt signal is received, then
+call :func:`arrow::flight::FlightServerBase::Serve` to block until the
+server stops.
+
+.. code-block:: cpp
+
+   std::unique_ptr<arrow::flight::FlightServerBase> server;
+   // Initialize server
+   arrow::flight::Location location;
+   // Listen to all interfaces on a free port
+   ARROW_CHECK_OK(arrow::flight::Location::ForGrpcTcp("0.0.0.0", 0, &location));
+   arrow::flight::FlightServerOptions options(location);
+
+   // Start the server
+   ARROW_CHECK_OK(server->Init(options));
+   // Exit with a clean error code (0) on SIGTERM
+   ARROW_CHECK_OK(server->SetShutdownOnSignals({SIGTERM}));
+
+   std::cout << "Server listening on localhost:" << server->port() << std::endl;
+   ARROW_CHECK_OK(server->Serve());
+
+
+Enabling TLS and Authentication
+-------------------------------
+
+TLS can be enabled by providing a certificate and key pair to
+:func:`FlightServerBase::Init
+<arrow::flight::FlightServerBase::Init>`. Additionally, use
+:func:`Location::ForGrpcTls <arrow::flight::Location::ForGrpcTls>` to
+construct the :class:`arrow::flight::Location` to listen on.
+
+Similarly, authentication can be enabled by providing an
+implementation of :class:`ServerAuthHandler
+<arrow::flight::ServerAuthHandler>`. Authentication consists of two
+parts: on initial client connection, the server and client
+authentication implementations can perform any negotiation needed;
+then, on each RPC thereafter, the client provides a token. The server
+authentication handler validates the token and provides the identity
+of the client. This identity can be obtained from the
+:class:`arrow::flight::ServerCallContext`.
+
+Using the Flight Client
+=======================
+
+To connect to a Flight service, create an instance of
+:class:`arrow::flight::FlightClient` by calling :func:`Connect
+<arrow::flight::FlightClient::Connect>`. This takes a Location and
+returns the client through an out parameter. To authenticate, call
+:func:`Authenticate <arrow::flight::FlightClient::Authenticate>` with
+the desired client authentication implementation.
+
+Each RPC method returns :class:`arrow::Status` to indicate the
+success/failure of the request. Any other return values are specified
+through out parameters. They also take an optional :class:`options
+<arrow::flight::FlightCallOptions>` parameter that allows specifying a
+timeout for the call.
diff --git a/src/arrow/docs/source/cpp/getting_started.rst b/src/arrow/docs/source/cpp/getting_started.rst
new file mode 100644
index 000000000..36ea4803f
--- /dev/null
+++ b/src/arrow/docs/source/cpp/getting_started.rst
@@ -0,0 +1,41 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+User Guide
+==========
+
+.. toctree::
+
+   overview
+   conventions
+   build_system
+   memory
+   arrays
+   datatypes
+   tables
+   compute
+   streaming_execution
+   io
+   ipc
+   parquet
+   csv
+   json
+   dataset
+   flight
diff --git a/src/arrow/docs/source/cpp/index.rst b/src/arrow/docs/source/cpp/index.rst
new file mode 100644
index 000000000..b3f6e4c82
--- /dev/null
+++ b/src/arrow/docs/source/cpp/index.rst
@@ -0,0 +1,32 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+C++ Implementation
+==================
+
+.. toctree::
+   :maxdepth: 2
+
+   getting_started
+   Examples <examples/index>
+   api
+
+.. TODO add "topics" chapter
+.. - nested arrays
+.. - dictionary encoding
+
+.. TODO add "building" or "development" chapter
diff --git a/src/arrow/docs/source/cpp/io.rst b/src/arrow/docs/source/cpp/io.rst
new file mode 100644
index 000000000..6e1d261c0
--- /dev/null
+++ b/src/arrow/docs/source/cpp/io.rst
@@ -0,0 +1,87 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+.. cpp:namespace:: arrow::io
+
+==============================
+Input / output and filesystems
+==============================
+
+Arrow provides a range of C++ interfaces abstracting the concrete details
+of input / output operations.  They operate on streams of untyped binary data.
+Those abstractions are used for various purposes such as reading CSV or
+Parquet data, transmitting IPC streams, and more.
+
+.. seealso::
+   :doc:`API reference for input/output facilities <api/io>`.
+
+Reading binary data
+===================
+
+Interfaces for reading binary data come in two flavours:
+
+* Sequential reading: the :class:`InputStream` interface provides
+  ``Read`` methods; it is recommended to ``Read`` to a ``Buffer`` as it
+  may in some cases avoid a memory copy.
+
+* Random access reading: the :class:`RandomAccessFile` interface
+  provides additional facilities for positioning and, most importantly,
+  the ``ReadAt`` methods which allow parallel reading from multiple threads.
+
+Concrete implementations are available for :class:`in-memory reads <BufferReader>`,
+:class:`unbuffered file reads <ReadableFile>`,
+:class:`memory-mapped file reads <MemoryMappedFile>`,
+:class:`buffered reads <BufferedInputStream>`,
+:class:`compressed reads <CompressedInputStream>`.
+
+Writing binary data
+===================
+
+Writing binary data is mostly done through the :class:`OutputStream`
+interface.
+
+Concrete implementations are available for :class:`in-memory writes <BufferOutputStream>`,
+:class:`unbuffered file writes <FileOutputStream>`,
+:class:`memory-mapped file writes <MemoryMappedFile>`,
+:class:`buffered writes <BufferedOutputStream>`,
+:class:`compressed writes <CompressedOutputStream>`.
+
+.. cpp:namespace:: arrow::fs
+
+.. _cpp-filesystems:
+
+Filesystems
+===========
+
+The :class:`filesystem interface <FileSystem>` allows abstracted access over
+various data storage backends such as the local filesystem or a S3 bucket.
+It provides input and output streams as well as directory operations.
+
+The filesystem interface exposes a simplified view of the underlying data
+storage.  Data paths are represented as *abstract paths*, which are
+``/``-separated, even on Windows, and shouldn't include special path
+components such as ``.`` and ``..``.  Symbolic links, if supported by the
+underlying storage, are automatically dereferenced.  Only basic
+:class:`metadata <FileStats>` about file entries, such as the file size
+and modification time, is made available.
+
+Concrete implementations are available for
+:class:`local filesystem access <LocalFileSystem>`,
+:class:`HDFS <HadoopFileSystem>` and
+:class:`Amazon S3-compatible storage <S3FileSystem>`.
diff --git a/src/arrow/docs/source/cpp/ipc.rst b/src/arrow/docs/source/cpp/ipc.rst
new file mode 100644
index 000000000..ce4175bca
--- /dev/null
+++ b/src/arrow/docs/source/cpp/ipc.rst
@@ -0,0 +1,75 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+.. cpp:namespace:: arrow::ipc
+
+========================================
+Reading and writing the Arrow IPC format
+========================================
+
+.. seealso::
+   :ref:`Arrow IPC format specification <format-ipc>`.
+
+   :doc:`API reference for IPC readers and writers <api/ipc>`.
+
+Arrow C++ provides readers and writers for the Arrow IPC format which wrap
+lower level input/output, handled through the :doc:`IO interfaces <io>`.
+For reading, there is also an event-driven API that enables feeding
+arbitrary data into the IPC decoding layer asynchronously.
+
+Reading IPC streams and files
+=============================
+
+Synchronous reading
+-------------------
+
+For most cases, it is most convenient to use the :class:`RecordBatchStreamReader`
+or :class:`RecordBatchFileReader` class, depending on which variant of the IPC
+format you want to read.  The former requires a :class:`~arrow::io::InputStream`
+source, while the latter requires a :class:`~arrow::io::RandomAccessFile`.
+
+Reading Arrow IPC data is inherently zero-copy if the source allows it.
+For example, a :class:`~arrow::io::BufferReader` or :class:`~arrow::io::MemoryMappedFile`
+can typically be zero-copy.  Exceptions are when the data must be transformed
+on the fly, e.g. when buffer compression has been enabled on the IPC stream
+or file.
+
+Event-driven reading
+--------------------
+
+When it is necessary to process the IPC format without blocking (for example
+to integrate Arrow with an event loop), or if data is coming from an unusual
+source, use the event-driven :class:`StreamDecoder`.  You will need to define
+a subclass of :class:`Listener` and implement the virtual methods for the
+desired events (for example, implement :func:`Listener::OnRecordBatchDecoded`
+to be notified of each incoming :class:`RecordBatch`).
+
+Writing IPC streams and files
+=============================
+
+Use one of the factory functions, :func:`MakeStreamWriter` or
+:func:`MakeFileWriter`, to obtain a :class:`RecordBatchWriter` instance for
+the given IPC format variant.
+
+Configuring
+===========
+
+Various aspects of reading and writing the IPC format can be configured
+using the :class:`IpcReadOptions` and :class:`IpcWriteOptions` classes,
+respectively.
diff --git a/src/arrow/docs/source/cpp/json.rst b/src/arrow/docs/source/cpp/json.rst
new file mode 100644
index 000000000..cdb742e6c
--- /dev/null
+++ b/src/arrow/docs/source/cpp/json.rst
@@ -0,0 +1,128 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+.. cpp:namespace:: arrow::json
+
+==================
+Reading JSON files
+==================
+
+Arrow allows reading line-separated JSON files as Arrow tables.  Each
+independent JSON object in the input file is converted to a row in
+the target Arrow table.
+
+.. seealso::
+   :ref:`JSON reader API reference <cpp-api-json>`.
+
+Basic usage
+===========
+
+A JSON file is read from a :class:`~arrow::io::InputStream`.
+
+.. code-block:: cpp
+
+   #include "arrow/json/api.h"
+
+   {
+      // ...
+      arrow::Status st;
+      arrow::MemoryPool* pool = default_memory_pool();
+      std::shared_ptr<arrow::io::InputStream> input = ...;
+
+      auto read_options = arrow::json::ReadOptions::Defaults();
+      auto parse_options = arrow::json::ParseOptions::Defaults();
+
+      // Instantiate TableReader from input stream and options
+      std::shared_ptr<arrow::json::TableReader> reader;
+      st = arrow::json::TableReader::Make(pool, input, read_options,
+                                          parse_options, &reader);
+      if (!st.ok()) {
+         // Handle TableReader instantiation error...
+      }
+
+      std::shared_ptr<arrow::Table> table;
+      // Read table from JSON file
+      st = reader->Read(&table);
+      if (!st.ok()) {
+         // Handle JSON read error
+         // (for example a JSON syntax error or failed type conversion)
+      }
+   }
+
+Data types
+==========
+
+Since JSON values are typed, the possible Arrow data types on output
+depend on the input value types.  Top-level JSON values should always be
+objects.  The fields of top-level objects are taken to represent columns
+in the Arrow data.  For each name/value pair in a JSON object, there are
+two possible modes of deciding the output data type:
+
+* if the name is in :class:`ConvertOptions::explicit_schema`,
+  conversion of the JSON value to the corresponding Arrow data type is
+  attempted;
+
+* otherwise, the Arrow data type is determined via type inference on
+  the JSON value, trying out a number of Arrow data types in order.
+
+The following tables show the possible combinations for each of those
+two modes.
+
+.. table:: Explicit conversions from JSON to Arrow
+   :align: center
+
+   +-----------------+----------------------------------------------------+
+   | JSON value type | Allowed Arrow data types                           |
+   +=================+====================================================+
+   | Null            | Any (including Null)                               |
+   +-----------------+----------------------------------------------------+
+   | Number          | All Integer types, Float32, Float64,               |
+   |                 | Date32, Date64, Time32, Time64                     |
+   +-----------------+----------------------------------------------------+
+   | Boolean         | Boolean                                            |
+   +-----------------+----------------------------------------------------+
+   | String          | Binary, LargeBinary, String, LargeString,          |
+   |                 | Timestamp                                          |
+   +-----------------+----------------------------------------------------+
+   | Array           | List                                               |
+   +-----------------+----------------------------------------------------+
+   | Object (nested) | Struct                                             |
+   +-----------------+----------------------------------------------------+
+
+.. table:: Implicit type inference from JSON to Arrow
+   :align: center
+
+   +-----------------+----------------------------------------------------+
+   | JSON value type | Inferred Arrow data types (in order)               |
+   +=================+====================================================+
+   | Null            | Null, any other                                    |
+   +-----------------+----------------------------------------------------+
+   | Number          | Int64, Float64                                     |
+   |                 |                                                    |
+   +-----------------+----------------------------------------------------+
+   | Boolean         | Boolean                                            |
+   +-----------------+----------------------------------------------------+
+   | String          | Timestamp (with seconds unit), String              |
+   |                 |                                                    |
+   +-----------------+----------------------------------------------------+
+   | Array           | List                                               |
+   +-----------------+----------------------------------------------------+
+   | Object (nested) | Struct                                             |
+   +-----------------+----------------------------------------------------+
diff --git a/src/arrow/docs/source/cpp/memory.rst b/src/arrow/docs/source/cpp/memory.rst
new file mode 100644
index 000000000..ff8ffb044
--- /dev/null
+++ b/src/arrow/docs/source/cpp/memory.rst
@@ -0,0 +1,203 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+.. _cpp_memory_management:
+
+=================
+Memory Management
+=================
+
+.. seealso::
+   :doc:`Memory management API reference <api/memory>`
+
+Buffers
+=======
+
+To avoid passing around raw data pointers with varying and non-obvious
+lifetime rules, Arrow provides a generic abstraction called :class:`arrow::Buffer`.
+A Buffer encapsulates a pointer and data size, and generally also ties its
+lifetime to that of an underlying provider (in other words, a Buffer should
+*always* point to valid memory till its destruction).  Buffers are untyped:
+they simply denote a physical memory area regardless of its intended meaning
+or interpretation.
+
+Buffers may be allocated by Arrow itself , or by third-party routines.
+For example, it is possible to pass the data of a Python bytestring as a Arrow
+buffer, keeping the Python object alive as necessary.
+
+In addition, buffers come in various flavours: mutable or not, resizable or
+not.  Generally, you will hold a mutable buffer when building up a piece
+of data, then it will be frozen as an immutable container such as an
+:doc:`array <arrays>`.
+
+.. note::
+   Some buffers may point to non-CPU memory, such as GPU-backed memory
+   provided by a CUDA context.  If you're writing a GPU-aware application,
+   you will need to be careful not to interpret a GPU memory pointer as
+   a CPU-reachable pointer, or vice-versa.
+
+Accessing Buffer Memory
+-----------------------
+
+Buffers provide fast access to the underlying memory using the
+:func:`~arrow::Buffer::size` and :func:`~arrow::Buffer::data` accessors
+(or :func:`~arrow::Buffer::mutable_data` for writable access to a mutable
+buffer).
+
+Slicing
+-------
+
+It is possible to make zero-copy slices of buffers, to obtain a buffer
+referring to some contiguous subset of the underlying data.  This is done
+by calling the :func:`arrow::SliceBuffer` and :func:`arrow::SliceMutableBuffer`
+functions.
+
+Allocating a Buffer
+-------------------
+
+You can allocate a buffer yourself by calling one of the
+:func:`arrow::AllocateBuffer` or :func:`arrow::AllocateResizableBuffer`
+overloads::
+
+   arrow::Result<std::unique_ptr<Buffer>> maybe_buffer = arrow::AllocateBuffer(4096);
+   if (!maybe_buffer.ok()) {
+      // ... handle allocation error
+   }
+
+   std::shared_ptr<arrow::Buffer> buffer = *std::move(maybe_buffer);
+   uint8_t* buffer_data = buffer->mutable_data();
+   memcpy(buffer_data, "hello world", 11);
+
+Allocating a buffer this way ensures it is 64-bytes aligned and padded
+as recommended by the :doc:`Arrow memory specification <../format/Layout>`.
+
+Building a Buffer
+-----------------
+
+You can also allocate *and* build a Buffer incrementally, using the
+:class:`arrow::BufferBuilder` API::
+
+   BufferBuilder builder;
+   builder.Resize(11);  // reserve enough space for 11 bytes
+   builder.Append("hello ", 6);
+   builder.Append("world", 5);
+
+   auto maybe_buffer = builder.Finish();
+   if (!maybe_buffer.ok()) {
+      // ... handle buffer allocation error
+   }
+   std::shared_ptr<arrow::Buffer> buffer = *maybe_buffer;
+
+If a Buffer is meant to contain values of a given fixed-width type (for
+example the 32-bit offsets of a List array), it can be more convenient to
+use the template :class:`arrow::TypedBufferBuilder` API::
+
+   TypedBufferBuilder<int32_t> builder;
+   builder.Reserve(2);  // reserve enough space for two int32_t values
+   builder.Append(0x12345678);
+   builder.Append(-0x765643210);
+
+   auto maybe_buffer = builder.Finish();
+   if (!maybe_buffer.ok()) {
+      // ... handle buffer allocation error
+   }
+   std::shared_ptr<arrow::Buffer> buffer = *maybe_buffer;
+
+Memory Pools
+============
+
+When allocating a Buffer using the Arrow C++ API, the buffer's underlying
+memory is allocated by a :class:`arrow::MemoryPool` instance.  Usually this
+will be the process-wide *default memory pool*, but many Arrow APIs allow
+you to pass another MemoryPool instance for their internal allocations.
+
+Memory pools are used for large long-lived data such as array buffers.
+Other data, such as small C++ objects and temporary workspaces, usually
+goes through the regular C++ allocators.
+
+Default Memory Pool
+-------------------
+
+The default memory pool depends on how Arrow C++ was compiled:
+
+- if enabled at compile time, a `jemalloc <http://jemalloc.net/>`_ heap;
+- otherwise, if enabled at compile time, a
+  `mimalloc <https://github.com/microsoft/mimalloc>`_ heap;
+- otherwise, the C library ``malloc`` heap.
+
+Overriding the Default Memory Pool
+----------------------------------
+
+One can override the above selection algorithm by setting the
+``ARROW_DEFAULT_MEMORY_POOL`` environment variable to one of the following
+values: ``jemalloc``, ``mimalloc`` or ``system``.  This variable is inspected
+once when Arrow C++ is loaded in memory (for example when the Arrow C++ DLL
+is loaded).
+
+STL Integration
+---------------
+
+If you wish to use a Arrow memory pool to allocate the data of STL containers,
+you can do so using the :class:`arrow::stl::allocator` wrapper.
+
+Conversely, you can also use a STL allocator to allocate Arrow memory,
+using the :class:`arrow::stl::STLMemoryPool` class.  However, this may be less
+performant, as STL allocators don't provide a resizing operation.
+
+Devices
+=======
+
+Many Arrow applications only access host (CPU) memory.  However, in some cases
+it is desirable to handle on-device memory (such as on-board memory on a GPU)
+as well as host memory.
+
+Arrow represents the CPU and other devices using the
+:class:`arrow::Device` abstraction.  The associated class :class:`arrow::MemoryManager`
+specifies how to allocate on a given device.  Each device has a default memory manager, but
+additional instances may be constructed (for example, wrapping a custom
+:class:`arrow::MemoryPool` the CPU).
+:class:`arrow::MemoryManager` instances which specify how to allocate
+memory on a given device (for example, using a particular
+:class:`arrow::MemoryPool` on the CPU).
+
+Device-Agnostic Programming
+---------------------------
+
+If you receive a Buffer from third-party code, you can query whether it is
+CPU-readable by calling its :func:`~arrow::Buffer::is_cpu` method.
+
+You can also view the Buffer on a given device, in a generic way, by calling
+:func:`arrow::Buffer::View` or :func:`arrow::Buffer::ViewOrCopy`.  This will
+be a no-operation if the source and destination devices are identical.
+Otherwise, a device-dependent mechanism will attempt to construct a memory
+address for the destination device that gives access to the buffer contents.
+Actual device-to-device transfer may happen lazily, when reading the buffer
+contents.
+
+Similarly, if you want to do I/O on a buffer without assuming a CPU-readable
+buffer, you can call :func:`arrow::Buffer::GetReader` and
+:func:`arrow::Buffer::GetWriter`.
+
+For example, to get an on-CPU view or copy of an arbitrary buffer, you can
+simply do::
+
+   std::shared_ptr<arrow::Buffer> arbitrary_buffer = ... ;
+   std::shared_ptr<arrow::Buffer> cpu_buffer = arrow::Buffer::ViewOrCopy(
+      arbitrary_buffer, arrow::default_cpu_memory_manager());
diff --git a/src/arrow/docs/source/cpp/overview.rst b/src/arrow/docs/source/cpp/overview.rst
new file mode 100644
index 000000000..ccebdba45
--- /dev/null
+++ b/src/arrow/docs/source/cpp/overview.rst
@@ -0,0 +1,97 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+High-Level Overview
+===================
+
+The Arrow C++ library is comprised of different parts, each of which serves
+a specific purpose.
+
+The physical layer
+------------------
+
+**Memory management** abstractions provide a uniform API over memory that
+may be allocated through various means, such as heap allocation, the memory
+mapping of a file or a static memory area.  In particular, the **buffer**
+abstraction represents a contiguous area of physical data.
+
+The one-dimensional layer
+-------------------------
+
+**Data types** govern the *logical* interpretation of *physical* data.
+Many operations in Arrow are parametered, at compile-time or at runtime,
+by a data type.
+
+**Arrays** assemble one or several buffers with a data type, allowing to
+view them as a logical contiguous sequence of values (possibly nested).
+
+**Chunked arrays** are a generalization of arrays, comprising several same-type
+arrays into a longer logical sequence of values.
+
+The two-dimensional layer
+-------------------------
+
+**Schemas** describe a logical collection of several pieces of data,
+each with a distinct name and type, and optional metadata.
+
+**Tables** are collections of chunked array in accordance to a schema. They
+are the most capable dataset-providing abstraction in Arrow.
+
+**Record batches** are collections of contiguous arrays, described
+by a schema.  They allow incremental construction or serialization of tables.
+
+The compute layer
+-----------------
+
+**Datums** are flexible dataset references, able to hold for example an array or table
+reference.
+
+**Kernels** are specialized computation functions running in a loop over a
+given set of datums representing input and output parameters to the functions.
+
+The IO layer
+------------
+
+**Streams** allow untyped sequential or seekable access over external data
+of various kinds (for example compressed or memory-mapped).
+
+The Inter-Process Communication (IPC) layer
+-------------------------------------------
+
+A **messaging format** allows interchange of Arrow data between processes, using
+as few copies as possible.
+
+The file formats layer
+----------------------
+
+Reading and writing Arrow data from/to various file formats is possible, for
+example **Parquet**, **CSV**, **Orc** or the Arrow-specific **Feather** format.
+
+The devices layer
+-----------------
+
+Basic **CUDA** integration is provided, allowing to describe Arrow data backed
+by GPU-allocated memory.
+
+The filesystem layer
+--------------------
+
+A filesystem abstraction allows reading and writing data from different storage
+backends, such as the local filesystem or a S3 bucket.
diff --git a/src/arrow/docs/source/cpp/parquet.rst b/src/arrow/docs/source/cpp/parquet.rst
new file mode 100644
index 000000000..88ea4e5b6
--- /dev/null
+++ b/src/arrow/docs/source/cpp/parquet.rst
@@ -0,0 +1,432 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+.. cpp:namespace:: parquet
+
+=================================
+Reading and writing Parquet files
+=================================
+
+.. seealso::
+   :ref:`Parquet reader and writer API reference <cpp-api-parquet>`.
+
+The `Parquet format <https://parquet.apache.org/documentation/latest/>`__
+is a space-efficient columnar storage format for complex data.  The Parquet
+C++ implementation is part of the Apache Arrow project and benefits
+from tight integration with the Arrow C++ classes and facilities.
+
+Supported Parquet features
+==========================
+
+The Parquet format has many features, and Parquet C++ supports a subset of them.
+
+Page types
+----------
+
++-------------------+---------+
+| Page type         | Notes   |
++===================+=========+
+| DATA_PAGE         |         |
++-------------------+---------+
+| DATA_PAGE_V2      |         |
++-------------------+---------+
+| DICTIONARY_PAGE   |         |
++-------------------+---------+
+
+*Unsupported page type:* INDEX_PAGE. When reading a Parquet file, pages of
+this type are ignored.
+
+Compression
+-----------
+
++-------------------+---------+
+| Compression codec | Notes   |
++===================+=========+
+| SNAPPY            |         |
++-------------------+---------+
+| GZIP              |         |
++-------------------+---------+
+| BROTLI            |         |
++-------------------+---------+
+| LZ4               | \(1)    |
++-------------------+---------+
+| ZSTD              |         |
++-------------------+---------+
+
+* \(1) On the read side, Parquet C++ is able to decompress both the regular
+  LZ4 block format and the ad-hoc Hadoop LZ4 format used by the
+  `reference Parquet implementation <https://github.com/apache/parquet-mr>`__.
+  On the write side, Parquet C++ always generates the ad-hoc Hadoop LZ4 format.
+
+*Unsupported compression codec:* LZO.
+
+Encodings
+---------
+
++--------------------------+---------+
+| Encoding                 | Notes   |
++==========================+=========+
+| PLAIN                    |         |
++--------------------------+---------+
+| PLAIN_DICTIONARY         |         |
++--------------------------+---------+
+| BIT_PACKED               |         |
++--------------------------+---------+
+| RLE                      | \(1)    |
++--------------------------+---------+
+| RLE_DICTIONARY           | \(2)    |
++--------------------------+---------+
+| BYTE_STREAM_SPLIT        |         |
++--------------------------+---------+
+
+* \(1) Only supported for encoding definition and repetition levels, not values.
+
+* \(2) On the write path, RLE_DICTIONARY is only enabled if Parquet format version
+  2.4 or greater is selected in :func:`WriterProperties::version`.
+
+*Unsupported encodings:* DELTA_BINARY_PACKED, DELTA_LENGTH_BYTE_ARRAY,
+DELTA_BYTE_ARRAY.
+
+Types
+-----
+
+Physical types
+~~~~~~~~~~~~~~
+
++--------------------------+-------------------------+------------+
+| Physical type            | Mapped Arrow type       | Notes      |
++==========================+=========================+============+
+| BOOLEAN                  | Boolean                 |            |
++--------------------------+-------------------------+------------+
+| INT32                    | Int32 / other           | \(1)       |
++--------------------------+-------------------------+------------+
+| INT64                    | Int64 / other           | \(1)       |
++--------------------------+-------------------------+------------+
+| INT96                    | Timestamp (nanoseconds) | \(2)       |
++--------------------------+-------------------------+------------+
+| FLOAT                    | Float32                 |            |
++--------------------------+-------------------------+------------+
+| DOUBLE                   | Float64                 |            |
++--------------------------+-------------------------+------------+
+| BYTE_ARRAY               | Binary / other          | \(1) \(3)  |
++--------------------------+-------------------------+------------+
+| FIXED_LENGTH_BYTE_ARRAY  | FixedSizeBinary / other | \(1)       |
++--------------------------+-------------------------+------------+
+
+* \(1) Can be mapped to other Arrow types, depending on the logical type
+  (see below).
+
+* \(2) On the write side, :func:`ArrowWriterProperties::support_deprecated_int96_timestamps`
+  must be enabled.
+
+* \(3) On the write side, an Arrow LargeBinary can also mapped to BYTE_ARRAY.
+
+Logical types
+~~~~~~~~~~~~~
+
+Specific logical types can override the default Arrow type mapping for a given
+physical type.
+
++-------------------+-----------------------------+----------------------------+---------+
+| Logical type      | Physical type               | Mapped Arrow type          | Notes   |
++===================+=============================+============================+=========+
+| NULL              | Any                         | Null                       | \(1)    |
++-------------------+-----------------------------+----------------------------+---------+
+| INT               | INT32                       | Int8 / UInt8 / Int16 /     |         |
+|                   |                             | UInt16 / Int32 / UInt32    |         |
++-------------------+-----------------------------+----------------------------+---------+
+| INT               | INT64                       | Int64 / UInt64             |         |
++-------------------+-----------------------------+----------------------------+---------+
+| DECIMAL           | INT32 / INT64 / BYTE_ARRAY  | Decimal128 / Decimal256    | \(2)    |
+|                   | / FIXED_LENGTH_BYTE_ARRAY   |                            |         |
++-------------------+-----------------------------+----------------------------+---------+
+| DATE              | INT32                       | Date32                     | \(3)    |
++-------------------+-----------------------------+----------------------------+---------+
+| TIME              | INT32                       | Time32 (milliseconds)      |         |
++-------------------+-----------------------------+----------------------------+---------+
+| TIME              | INT64                       | Time64 (micro- or          |         |
+|                   |                             | nanoseconds)               |         |
++-------------------+-----------------------------+----------------------------+---------+
+| TIMESTAMP         | INT64                       | Timestamp (milli-, micro-  |         |
+|                   |                             | or nanoseconds)            |         |
++-------------------+-----------------------------+----------------------------+---------+
+| STRING            | BYTE_ARRAY                  | Utf8                       | \(4)    |
++-------------------+-----------------------------+----------------------------+---------+
+| LIST              | Any                         | List                       | \(5)    |
++-------------------+-----------------------------+----------------------------+---------+
+| MAP               | Any                         | Map                        | \(6)    |
++-------------------+-----------------------------+----------------------------+---------+
+
+* \(1) On the write side, the Parquet physical type INT32 is generated.
+
+* \(2) On the write side, a FIXED_LENGTH_BYTE_ARRAY is always emitted.
+
+* \(3) On the write side, an Arrow Date64 is also mapped to a Parquet DATE INT32.
+
+* \(4) On the write side, an Arrow LargeUtf8 is also mapped to a Parquet STRING.
+
+* \(5) On the write side, an Arrow LargeList or FixedSizedList is also mapped to
+  a Parquet LIST.
+
+* \(6) On the read side, a key with multiple values does not get deduplicated,
+  in contradiction with the
+  `Parquet specification <https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps>`__.
+
+*Unsupported logical types:* JSON, BSON, UUID.  If such a type is encountered
+when reading a Parquet file, the default physical type mapping is used (for
+example, a Parquet JSON column may be read as Arrow Binary or FixedSizeBinary).
+
+Converted types
+~~~~~~~~~~~~~~~
+
+While converted types are deprecated in the Parquet format (they are superceded
+by logical types), they are recognized and emitted by the Parquet C++
+implementation so as to maximize compatibility with other Parquet
+implementations.
+
+Special cases
+~~~~~~~~~~~~~
+
+An Arrow Extension type is written out as its storage type.  It can still
+be recreated at read time using Parquet metadata (see "Roundtripping Arrow
+types" below).
+
+An Arrow Dictionary type is written out as its value type.  It can still
+be recreated at read time using Parquet metadata (see "Roundtripping Arrow
+types" below).
+
+Roundtripping Arrow types
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+While there is no bijection between Arrow types and Parquet types, it is
+possible to serialize the Arrow schema as part of the Parquet file metadata.
+This is enabled using :func:`ArrowWriterProperties::store_schema`.
+
+On the read path, the serialized schema will be automatically recognized
+and will recreate the original Arrow data, converting the Parquet data as
+required (for example, a LargeList will be recreated from the Parquet LIST
+type).
+
+As an example, when serializing an Arrow LargeList to Parquet:
+
+* The data is written out as a Parquet LIST
+
+* When read back, the Parquet LIST data is decoded as an Arrow LargeList if
+  :func:`ArrowWriterProperties::store_schema` was enabled when writing the file;
+  otherwise, it is decoded as an Arrow List.
+
+Serialization details
+"""""""""""""""""""""
+
+The Arrow schema is serialized as a :ref:`Arrow IPC <format-ipc>` schema message,
+then base64-encoded and stored under the ``ARROW:schema`` metadata key in
+the Parquet file metadata.
+
+Limitations
+~~~~~~~~~~~
+
+Writing or reading back FixedSizedList data with null entries is not supported.
+
+Encryption
+----------
+
+Parquet C++ implements all features specified in the
+`encryption specification <https://github.com/apache/parquet-format/blob/master/Encryption.md>`__,
+except for encryption of column index and bloom filter modules. 
+
+More specifically, Parquet C++ supports:
+
+* AES_GCM_V1 and AES_GCM_CTR_V1 encryption algorithms.
+* AAD suffix for Footer, ColumnMetaData, Data Page, Dictionary Page,
+  Data PageHeader, Dictionary PageHeader module types. Other module types
+  (ColumnIndex, OffsetIndex, BloomFilter Header, BloomFilter Bitset) are not
+  supported.
+* EncryptionWithFooterKey and EncryptionWithColumnKey modes.
+* Encrypted Footer and Plaintext Footer modes.
+
+
+Reading Parquet files
+=====================
+
+The :class:`arrow::FileReader` class reads data for an entire
+file or row group into an :class:`::arrow::Table`.
+
+The :class:`StreamReader` and :class:`StreamWriter` classes allow for
+data to be written using a C++ input/output streams approach to
+read/write fields column by column and row by row.  This approach is
+offered for ease of use and type-safety.  It is of course also useful
+when data must be streamed as files are read and written
+incrementally.
+
+Please note that the performance of the :class:`StreamReader` and
+:class:`StreamWriter` classes will not be as good due to the type
+checking and the fact that column values are processed one at a time.
+
+FileReader
+----------
+
+The Parquet :class:`arrow::FileReader` requires a
+:class:`::arrow::io::RandomAccessFile` instance representing the input
+file.
+
+.. code-block:: cpp
+
+   #include "arrow/parquet/arrow/reader.h"
+
+   {
+      // ...
+      arrow::Status st;
+      arrow::MemoryPool* pool = default_memory_pool();
+      std::shared_ptr<arrow::io::RandomAccessFile> input = ...;
+
+      // Open Parquet file reader
+      std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
+      st = parquet::arrow::OpenFile(input, pool, &arrow_reader);
+      if (!st.ok()) {
+         // Handle error instantiating file reader...
+      }
+
+      // Read entire file as a single Arrow table
+      std::shared_ptr<arrow::Table> table;
+      st = arrow_reader->ReadTable(&table);
+      if (!st.ok()) {
+         // Handle error reading Parquet data...
+      }
+   }
+
+Finer-grained options are available through the
+:class:`arrow::FileReaderBuilder` helper class.
+
+.. TODO write section about performance and memory efficiency
+
+StreamReader
+------------
+
+The :class:`StreamReader` allows for Parquet files to be read using
+standard C++ input operators which ensures type-safety.
+
+Please note that types must match the schema exactly i.e. if the
+schema field is an unsigned 16-bit integer then you must supply a
+uint16_t type.
+
+Exceptions are used to signal errors.  A :class:`ParquetException` is
+thrown in the following circumstances:
+
+* Attempt to read field by supplying the incorrect type.
+
+* Attempt to read beyond end of row.
+
+* Attempt to read beyond end of file.
+
+.. code-block:: cpp
+
+   #include "arrow/io/file.h"
+   #include "parquet/stream_reader.h"
+
+   {
+      std::shared_ptr<arrow::io::ReadableFile> infile;
+
+      PARQUET_ASSIGN_OR_THROW(
+         infile,
+         arrow::io::ReadableFile::Open("test.parquet"));
+
+      parquet::StreamReader os{parquet::ParquetFileReader::Open(infile)};
+
+      std::string article;
+      float price;
+      uint32_t quantity;
+
+      while ( !os.eof() )
+      {
+         os >> article >> price >> quantity >> parquet::EndRow;
+         // ...
+      }
+   }
+
+Writing Parquet files
+=====================
+
+WriteTable
+----------
+
+The :func:`arrow::WriteTable` function writes an entire
+:class:`::arrow::Table` to an output file.
+
+.. code-block:: cpp
+
+   #include "parquet/arrow/writer.h"
+
+   {
+      std::shared_ptr<arrow::io::FileOutputStream> outfile;
+      PARQUET_ASSIGN_OR_THROW(
+         outfile,
+         arrow::io::FileOutputStream::Open("test.parquet"));
+
+      PARQUET_THROW_NOT_OK(
+         parquet::arrow::WriteTable(table, arrow::default_memory_pool(), outfile, 3));
+   }
+
+StreamWriter
+------------
+
+The :class:`StreamWriter` allows for Parquet files to be written using
+standard C++ output operators.  This type-safe approach also ensures
+that rows are written without omitting fields and allows for new row
+groups to be created automatically (after certain volume of data) or
+explicitly by using the :type:`EndRowGroup` stream modifier.
+
+Exceptions are used to signal errors.  A :class:`ParquetException` is
+thrown in the following circumstances:
+
+* Attempt to write a field using an incorrect type.
+
+* Attempt to write too many fields in a row.
+
+* Attempt to skip a required field.
+
+.. code-block:: cpp
+
+   #include "arrow/io/file.h"
+   #include "parquet/stream_writer.h"
+
+   {
+      std::shared_ptr<arrow::io::FileOutputStream> outfile;
+
+      PARQUET_ASSIGN_OR_THROW(
+         outfile,
+         arrow::io::FileOutputStream::Open("test.parquet"));
+
+      parquet::WriterProperties::Builder builder;
+      std::shared_ptr<parquet::schema::GroupNode> schema;
+
+      // Set up builder with required compression type etc.
+      // Define schema.
+      // ...
+
+      parquet::StreamWriter os{
+         parquet::ParquetFileWriter::Open(outfile, schema, builder.build())};
+
+      // Loop over some data structure which provides the required
+      // fields to be written and write each row.
+      for (const auto& a : getArticles())
+      {
+         os << a.name() << a.price() << a.quantity() << parquet::EndRow;
+      }
+   }
diff --git a/src/arrow/docs/source/cpp/simple_graph.svg b/src/arrow/docs/source/cpp/simple_graph.svg
new file mode 100644
index 000000000..d87507224
--- /dev/null
+++ b/src/arrow/docs/source/cpp/simple_graph.svg
@@ -0,0 +1,139 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<svg width="320pt" height="404pt"
+ viewBox="0.00 0.00 388.02 404.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 400)">
+<title>G</title>
+<polygon fill="#ffffff" stroke="transparent" points="-4,4 -4,-400 384.0173,-400 384.0173,4 -4,4"/>
+<!-- scan lineitem -->
+<g id="node1" class="node">
+<title>scan lineitem</title>
+<ellipse fill="none" stroke="#000000" cx="62.2569" cy="-378" rx="62.0148" ry="18"/>
+<text text-anchor="middle" x="62.2569" y="-373.8" font-family="Times,serif" font-size="14.00" fill="#000000">scan lineitem</text>
+</g>
+<!-- filter -->
+<g id="node2" class="node">
+<title>filter</title>
+<ellipse fill="none" stroke="#000000" cx="86.2569" cy="-306" rx="29.6089" ry="18"/>
+<text text-anchor="middle" x="86.2569" y="-301.8" font-family="Times,serif" font-size="14.00" fill="#000000">filter</text>
+</g>
+<!-- scan lineitem&#45;&gt;filter -->
+<g id="edge1" class="edge">
+<title>scan lineitem&#45;&gt;filter</title>
+<path fill="none" stroke="#000000" d="M68.3132,-359.8314C70.9767,-351.8406 74.163,-342.2819 77.1065,-333.4514"/>
+<polygon fill="#000000" stroke="#000000" points="80.4439,-334.5071 80.2858,-323.9134 73.8031,-332.2934 80.4439,-334.5071"/>
+</g>
+<!-- join -->
+<g id="node3" class="node">
+<title>join</title>
+<ellipse fill="none" stroke="#000000" cx="184.2569" cy="-234" rx="27" ry="18"/>
+<text text-anchor="middle" x="184.2569" y="-229.8" font-family="Times,serif" font-size="14.00" fill="#000000">join</text>
+</g>
+<!-- filter&#45;&gt;join -->
+<g id="edge2" class="edge">
+<title>filter&#45;&gt;join</title>
+<path fill="none" stroke="#000000" d="M105.6186,-291.7751C120.5341,-280.8168 141.3184,-265.5467 157.7735,-253.4572"/>
+<polygon fill="#000000" stroke="#000000" points="159.9433,-256.2062 165.9299,-247.4648 155.7988,-250.565 159.9433,-256.2062"/>
+</g>
+<!-- join again -->
+<g id="node4" class="node">
+<title>join again</title>
+<ellipse fill="none" stroke="#000000" cx="231.2569" cy="-162" rx="49.2784" ry="18"/>
+<text text-anchor="middle" x="231.2569" y="-157.8" font-family="Times,serif" font-size="14.00" fill="#000000">join again</text>
+</g>
+<!-- join&#45;&gt;join again -->
+<g id="edge3" class="edge">
+<title>join&#45;&gt;join again</title>
+<path fill="none" stroke="#000000" d="M195.1578,-217.3008C200.8051,-208.6496 207.8305,-197.8873 214.1788,-188.1623"/>
+<polygon fill="#000000" stroke="#000000" points="217.224,-189.9002 219.7594,-179.6132 211.3623,-186.0738 217.224,-189.9002"/>
+</g>
+<!-- filter again -->
+<g id="node9" class="node">
+<title>filter again</title>
+<ellipse fill="none" stroke="#000000" cx="231.2569" cy="-90" rx="53.2645" ry="18"/>
+<text text-anchor="middle" x="231.2569" y="-85.8" font-family="Times,serif" font-size="14.00" fill="#000000">filter again</text>
+</g>
+<!-- join again&#45;&gt;filter again -->
+<g id="edge8" class="edge">
+<title>join again&#45;&gt;filter again</title>
+<path fill="none" stroke="#000000" d="M231.2569,-143.8314C231.2569,-136.131 231.2569,-126.9743 231.2569,-118.4166"/>
+<polygon fill="#000000" stroke="#000000" points="234.757,-118.4132 231.2569,-108.4133 227.757,-118.4133 234.757,-118.4132"/>
+</g>
+<!-- scan orders -->
+<g id="node5" class="node">
+<title>scan orders</title>
+<ellipse fill="none" stroke="#000000" cx="197.2569" cy="-378" rx="54.9752" ry="18"/>
+<text text-anchor="middle" x="197.2569" y="-373.8" font-family="Times,serif" font-size="14.00" fill="#000000">scan orders</text>
+</g>
+<!-- project -->
+<g id="node6" class="node">
+<title>project</title>
+<ellipse fill="none" stroke="#000000" cx="184.2569" cy="-306" rx="37.6986" ry="18"/>
+<text text-anchor="middle" x="184.2569" y="-301.8" font-family="Times,serif" font-size="14.00" fill="#000000">project</text>
+</g>
+<!-- scan orders&#45;&gt;project -->
+<g id="edge4" class="edge">
+<title>scan orders&#45;&gt;project</title>
+<path fill="none" stroke="#000000" d="M193.9765,-359.8314C192.5861,-352.131 190.9329,-342.9743 189.3877,-334.4166"/>
+<polygon fill="#000000" stroke="#000000" points="192.8028,-333.6322 187.5816,-324.4133 185.9142,-334.8761 192.8028,-333.6322"/>
+</g>
+<!-- project&#45;&gt;join -->
+<g id="edge5" class="edge">
+<title>project&#45;&gt;join</title>
+<path fill="none" stroke="#000000" d="M184.2569,-287.8314C184.2569,-280.131 184.2569,-270.9743 184.2569,-262.4166"/>
+<polygon fill="#000000" stroke="#000000" points="187.757,-262.4132 184.2569,-252.4133 180.757,-262.4133 187.757,-262.4132"/>
+</g>
+<!-- scan customers -->
+<g id="node7" class="node">
+<title>scan customers</title>
+<ellipse fill="none" stroke="#000000" cx="310.2569" cy="-306" rx="69.5216" ry="18"/>
+<text text-anchor="middle" x="310.2569" y="-301.8" font-family="Times,serif" font-size="14.00" fill="#000000">scan customers</text>
+</g>
+<!-- aggregate -->
+<g id="node8" class="node">
+<title>aggregate</title>
+<ellipse fill="none" stroke="#000000" cx="294.2569" cy="-234" rx="48.6346" ry="18"/>
+<text text-anchor="middle" x="294.2569" y="-229.8" font-family="Times,serif" font-size="14.00" fill="#000000">aggregate</text>
+</g>
+<!-- scan customers&#45;&gt;aggregate -->
+<g id="edge6" class="edge">
+<title>scan customers&#45;&gt;aggregate</title>
+<path fill="none" stroke="#000000" d="M306.2195,-287.8314C304.5083,-280.131 302.4735,-270.9743 300.5717,-262.4166"/>
+<polygon fill="#000000" stroke="#000000" points="303.9348,-261.4159 298.3488,-252.4133 297.1015,-262.9344 303.9348,-261.4159"/>
+</g>
+<!-- aggregate&#45;&gt;join again -->
+<g id="edge7" class="edge">
+<title>aggregate&#45;&gt;join again</title>
+<path fill="none" stroke="#000000" d="M279.0064,-216.5708C271.1906,-207.6385 261.5369,-196.6056 252.9595,-186.8029"/>
+<polygon fill="#000000" stroke="#000000" points="255.5861,-184.4897 246.367,-179.2687 250.3181,-189.0993 255.5861,-184.4897"/>
+</g>
+<!-- write to disk -->
+<g id="node10" class="node">
+<title>write to disk</title>
+<ellipse fill="none" stroke="#000000" cx="231.2569" cy="-18" rx="59.1276" ry="18"/>
+<text text-anchor="middle" x="231.2569" y="-13.8" font-family="Times,serif" font-size="14.00" fill="#000000">write to disk</text>
+</g>
+<!-- filter again&#45;&gt;write to disk -->
+<g id="edge9" class="edge">
+<title>filter again&#45;&gt;write to disk</title>
+<path fill="none" stroke="#000000" d="M231.2569,-71.8314C231.2569,-64.131 231.2569,-54.9743 231.2569,-46.4166"/>
+<polygon fill="#000000" stroke="#000000" points="234.757,-46.4132 231.2569,-36.4133 227.757,-46.4133 234.757,-46.4132"/>
+</g>
+</g>
+</svg>
diff --git a/src/arrow/docs/source/cpp/streaming_execution.rst b/src/arrow/docs/source/cpp/streaming_execution.rst
new file mode 100644
index 000000000..a3406265b
--- /dev/null
+++ b/src/arrow/docs/source/cpp/streaming_execution.rst
@@ -0,0 +1,307 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+.. cpp:namespace:: arrow::compute
+
+==========================
+Streaming execution engine
+==========================
+
+.. warning::
+
+    The streaming execution engine is experimental, and a stable API
+    is not yet guaranteed.
+
+Motivation
+----------
+
+For many complex computations, successive direct :ref:`invocation of
+compute functions <invoking-compute-functions>` is not feasible
+in either memory or computation time. Doing so causes all intermediate
+data to be fully materialized. To facilitate arbitrarily large inputs
+and more efficient resource usage, Arrow also provides a streaming query
+engine with which computations can be formulated and executed.
+
+.. image:: simple_graph.svg
+   :alt: An example graph of a streaming execution workflow.
+
+:class:`ExecNode` is provided to reify the graph of operations in a query.
+Batches of data (:struct:`ExecBatch`) flow along edges of the graph from
+node to node. Structuring the API around streams of batches allows the
+working set for each node to be tuned for optimal performance independent
+of any other nodes in the graph. Each :class:`ExecNode` processes batches
+as they are pushed to it along an edge of the graph by upstream nodes
+(its inputs), and pushes batches along an edge of the graph to downstream
+nodes (its outputs) as they are finalized.
+
+..seealso::
+
+  `SHAIKHHA, A., DASHTI, M., & KOCH, C.
+  (2018). Push versus pull-based loop fusion in query engines.
+  Journal of Functional Programming, 28.
+  <https://doi.org/10.1017/s0956796818000102>`_
+
+Overview
+--------
+
+:class:`ExecNode`
+  Each node in the graph is an implementation of the :class:`ExecNode` interface.
+
+:class:`ExecPlan`
+  A set of :class:`ExecNode` is contained and (to an extent) coordinated by an
+  :class:`ExecPlan`.
+
+:class:`ExecFactoryRegistry`
+  Instances of :class:`ExecNode` are constructed by factory functions held
+  in a :class:`ExecFactoryRegistry`.
+
+:class:`ExecNodeOptions`
+  Heterogenous parameters for factories of :class:`ExecNode` are bundled in an
+  :class:`ExecNodeOptions`.
+
+:struct:`Declaration`
+  ``dplyr``-inspired helper for efficient construction of an :class:`ExecPlan`.
+
+:struct:`ExecBatch`
+  A lightweight container for a single chunk of data in the Arrow format. In
+  contrast to :class:`RecordBatch`, :struct:`ExecBatch` is intended for use
+  exclusively in a streaming execution context (for example, it doesn't have a
+  corresponding Python binding). Furthermore columns which happen to have a
+  constant value may be represented by a :class:`Scalar` instead of an
+  :class:`Array`. In addition, :struct:`ExecBatch` may carry
+  execution-relevant properties including a guaranteed-true-filter
+  for :class:`Expression` simplification.
+
+
+An example :class:`ExecNode` implementation which simply passes all input batches
+through unchanged::
+
+    class PassthruNode : public ExecNode {
+     public:
+      // InputReceived is the main entry point for ExecNodes. It is invoked
+      // by an input of this node to push a batch here for processing.
+      void InputReceived(ExecNode* input, ExecBatch batch) override {
+        // Since this is a passthru node we simply push the batch to our
+        // only output here.
+        outputs_[0]->InputReceived(this, batch);
+      }
+
+      // ErrorReceived is called by an input of this node to report an error.
+      // ExecNodes should always forward errors to their outputs unless they
+      // are able to fully handle the error (this is rare).
+      void ErrorReceived(ExecNode* input, Status error) override {
+        outputs_[0]->ErrorReceived(this, error);
+      }
+
+      // InputFinished is used to signal how many batches will ultimately arrive.
+      // It may be called with any ordering relative to InputReceived/ErrorReceived.
+      void InputFinished(ExecNode* input, int total_batches) override {
+        outputs_[0]->InputFinished(this, total_batches);
+      }
+
+      // ExecNodes may request that their inputs throttle production of batches
+      // until they are ready for more, or stop production if no further batches
+      // are required.  These signals should typically be forwarded to the inputs
+      // of the ExecNode.
+      void ResumeProducing(ExecNode* output) override { inputs_[0]->ResumeProducing(this); }
+      void PauseProducing(ExecNode* output) override { inputs_[0]->PauseProducing(this); }
+      void StopProducing(ExecNode* output) override { inputs_[0]->StopProducing(this); }
+
+      // An ExecNode has a single output schema to which all its batches conform.
+      using ExecNode::output_schema;
+
+      // ExecNodes carry basic introspection for debugging purposes
+      const char* kind_name() const override { return "PassthruNode"; }
+      using ExecNode::label;
+      using ExecNode::SetLabel;
+      using ExecNode::ToString;
+
+      // An ExecNode holds references to its inputs and outputs, so it is possible
+      // to walk the graph of execution if necessary.
+      using ExecNode::inputs;
+      using ExecNode::outputs;
+
+      // StartProducing() and StopProducing() are invoked by an ExecPlan to
+      // coordinate the graph-wide execution state.  These do not need to be
+      // forwarded to inputs or outputs.
+      Status StartProducing() override { return Status::OK(); }
+      void StopProducing() override {}
+      Future<> finished() override { return inputs_[0]->finished(); }
+    };
+
+Note that each method which is associated with an edge of the graph must be invoked
+with an ``ExecNode*`` to identify the node which invoked it. For example, in an
+:class:`ExecNode` which implements ``JOIN`` this tagging might be used to differentiate
+between batches from the left or right inputs.
+``InputReceived``, ``ErrorReceived``, ``InputFinished`` may only be invoked by
+the inputs of a node, while ``ResumeProducing``, ``PauseProducing``, ``StopProducing``
+may only be invoked by outputs of a node.
+
+:class:`ExecPlan` contains the associated instances of :class:`ExecNode`
+and is used to start and stop execution of all nodes and for querying/awaiting
+their completion::
+
+    // construct an ExecPlan first to hold your nodes
+    ARROW_ASSIGN_OR_RAISE(auto plan, ExecPlan::Make(default_exec_context()));
+
+    // ... add nodes to your ExecPlan
+
+    // start all nodes in the graph
+    ARROW_RETURN_NOT_OK(plan->StartProducing());
+
+    SetUserCancellationCallback([plan] {
+      // stop all nodes in the graph
+      plan->StopProducing();
+    });
+
+    // Complete will be marked finished when all nodes have run to completion
+    // or acknowledged a StopProducing() signal. The ExecPlan should be kept
+    // alive until this future is marked finished.
+    Future<> complete = plan->finished();
+
+
+Constructing ``ExecPlan`` objects
+---------------------------------
+
+.. warning::
+
+    The following will be superceded by construction from Compute IR, see ARROW-14074.
+
+None of the concrete implementations of :class:`ExecNode` are exposed
+in headers, so they can't be constructed directly outside the
+translation unit where they are defined. Instead, factories to
+create them are provided in an extensible registry. This structure
+provides a number of benefits:
+
+- This enforces consistent construction.
+- It decouples implementations from consumers of the interface
+  (for example: we have two classes for scalar and grouped aggregate,
+  we can choose which to construct within the single factory by
+  checking whether grouping keys are provided)
+- This expedites integration with out-of-library extensions. For example
+  "scan" nodes are implemented in the separate ``libarrow_dataset.so`` library.
+- Since the class is not referencable outside the translation unit in which it
+  is defined, compilers can optimize more aggressively.
+
+Factories of :class:`ExecNode` can be retrieved by name from the registry.
+The default registry is available through
+:func:`arrow::compute::default_exec_factory_registry()`
+and can be queried for the built-in factories::
+
+    // get the factory for "filter" nodes:
+    ARROW_ASSIGN_OR_RAISE(auto make_filter,
+                          default_exec_factory_registry()->GetFactory("filter"));
+
+    // factories take three arguments:
+    ARROW_ASSIGN_OR_RAISE(ExecNode* filter_node, *make_filter(
+        // the ExecPlan which should own this node
+        plan.get(),
+
+        // nodes which will send batches to this node (inputs)
+        {scan_node},
+
+        // parameters unique to "filter" nodes
+        FilterNodeOptions{filter_expression}));
+
+    // alternative shorthand:
+    ARROW_ASSIGN_OR_RAISE(filter_node, MakeExecNode("filter",
+        plan.get(), {scan_node}, FilterNodeOptions{filter_expression});
+
+Factories can also be added to the default registry as long as they are
+convertible to ``std::function<Result<ExecNode*>(
+ExecPlan*, std::vector<ExecNode*>, const ExecNodeOptions&)>``.
+
+To build an :class:`ExecPlan` representing a simple pipeline which
+reads from a :class:`RecordBatchReader` then filters, projects, and
+writes to disk::
+
+    std::shared_ptr<RecordBatchReader> reader = GetStreamOfBatches();
+    ExecNode* source_node = *MakeExecNode("source", plan.get(), {},
+                                          SourceNodeOptions::FromReader(
+                                              reader,
+                                              GetCpuThreadPool()));
+
+    ExecNode* filter_node = *MakeExecNode("filter", plan.get(), {source_node},
+                                          FilterNodeOptions{
+                                            greater(field_ref("score"), literal(3))
+                                          });
+
+    ExecNode* project_node = *MakeExecNode("project", plan.get(), {filter_node},
+                                           ProjectNodeOptions{
+                                             {add(field_ref("score"), literal(1))},
+                                             {"score + 1"}
+                                           });
+
+    arrow::dataset::internal::Initialize();
+    MakeExecNode("write", plan.get(), {project_node},
+                 WriteNodeOptions{/*base_dir=*/"/dat", /*...*/});
+
+:struct:`Declaration` is a `dplyr <https://dplyr.tidyverse.org>`_-inspired
+helper which further decreases the boilerplate associated with populating
+an :class:`ExecPlan` from C++::
+
+    arrow::dataset::internal::Initialize();
+
+    std::shared_ptr<RecordBatchReader> reader = GetStreamOfBatches();
+    ASSERT_OK(Declaration::Sequence(
+                  {
+                      {"source", SourceNodeOptions::FromReader(
+                           reader,
+                           GetCpuThreadPool())},
+                      {"filter", FilterNodeOptions{
+                           greater(field_ref("score"), literal(3))}},
+                      {"project", ProjectNodeOptions{
+                           {add(field_ref("score"), literal(1))},
+                           {"score + 1"}}},
+                      {"write", WriteNodeOptions{/*base_dir=*/"/dat", /*...*/}},
+                  })
+                  .AddToPlan(plan.get()));
+
+Note that a source node can wrap anything which resembles a stream of batches.
+For example, `PR#11032 <https://github.com/apache/arrow/pull/11032>`_ adds
+support for use of a `DuckDB <https://duckdb.org>`_ query as a source node.
+Similarly, a sink node can wrap anything which absorbs a stream of batches.
+In the example above we're writing completed
+batches to disk. However we can also collect these in memory into a :class:`Table`
+or forward them to a :class:`RecordBatchReader` as an out-of-graph stream.
+This flexibility allows an :class:`ExecPlan` to be used as streaming middleware
+between any endpoints which support Arrow formatted batches.
+
+An :class:`arrow::dataset::Dataset` can also be wrapped as a source node which
+pushes all the dataset's batches into an :class:`ExecPlan`. This factory is added
+to the default registry with the name ``"scan"`` by calling
+``arrow::dataset::internal::Initialize()``::
+
+    arrow::dataset::internal::Initialize();
+
+    std::shared_ptr<Dataset> dataset = GetDataset();
+
+    ASSERT_OK(Declaration::Sequence(
+                  {
+                      {"scan", ScanNodeOptions{dataset,
+                         /* push down predicate, projection, ... */}},
+                      {"filter", FilterNodeOptions{/* ... */}},
+                      // ...
+                  })
+                  .AddToPlan(plan.get()));
+
+Datasets may be scanned multiple times; just make multiple scan
+nodes from that dataset. (Useful for a self-join, for example.)
+Note that producing two scan nodes like this will perform all
+reads and decodes twice.
diff --git a/src/arrow/docs/source/cpp/tables.rst b/src/arrow/docs/source/cpp/tables.rst
new file mode 100644
index 000000000..ea9198771
--- /dev/null
+++ b/src/arrow/docs/source/cpp/tables.rst
@@ -0,0 +1,83 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. default-domain:: cpp
+.. highlight:: cpp
+
+============
+Tabular Data
+============
+
+.. seealso::
+   :doc:`Table and RecordBatch API reference <api/table>`.
+
+While arrays and chunked arrays represent a one-dimensional sequence of
+homogeneous values, data often comes in the form of two-dimensional sets of
+heterogeneous data (such as database tables, CSV files...).  Arrow provides
+several abstractions to handle such data conveniently and efficiently.
+
+Fields
+======
+
+Fields are used to denote the particular columns of a table (and also
+the particular members of a nested data type such as :class:`arrow::StructType`).
+A field, i.e. an instance of :class:`arrow::Field`, holds together a data
+type, a field name and some optional metadata.
+
+The recommended way to create a field is to call the :func:`arrow::field`
+factory function.
+
+Schemas
+=======
+
+A schema describes the overall structure of a two-dimensional dataset such
+as a table.  It holds a sequence of fields together with some optional
+schema-wide metadata (in addition to per-field metadata).  The recommended
+way to create a schema is to call one the :func:`arrow::schema` factory
+function overloads::
+
+   // Create a schema describing datasets with two columns:
+   // a int32 column "A" and a utf8-encoded string column "B"
+   std::shared_ptr<arrow::Field> field_a, field_b;
+   std::shared_ptr<arrow::Schema> schema;
+
+   field_a = arrow::field("A", arrow::int32());
+   field_b = arrow::field("B", arrow::utf8());
+   schema = arrow::schema({field_a, field_b});
+
+Tables
+======
+
+A :class:`arrow::Table` is a two-dimensional dataset with chunked arrays for
+columns, together with a schema providing field names.  Also, each chunked
+column must have the same logical length in number of elements (although each
+column can be chunked in a different way).
+
+Record Batches
+==============
+
+A :class:`arrow::RecordBatch` is a two-dimensional dataset of a number of
+contiguous arrays, each the same length.  Like a table, a record batch also
+has a schema which must match its arrays' datatypes.
+
+Record batches are a convenient unit of work for various serialization
+and computation functions, possibly incremental.
+
+A table can be streamed as an arbitrary number of record batches using
+a :class:`arrow::TableBatchReader`.  Conversely, a logical sequence of
+record batches can be assembled to form a table using one of the
+:func:`arrow::Table::FromRecordBatches` factory function overloads.
diff --git a/src/arrow/docs/source/developers/archery.rst b/src/arrow/docs/source/developers/archery.rst
new file mode 100644
index 000000000..a587975d6
--- /dev/null
+++ b/src/arrow/docs/source/developers/archery.rst
@@ -0,0 +1,87 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _archery:
+
+Daily Development using Archery
+===============================
+
+To ease some of the daily development tasks, we developed a Python-written
+utility called Archery.
+
+Installation
+------------
+
+Archery requires Python 3.6 or later. It is recommended to install archery in
+*editable* mode with the ``-e`` flag to automatically update the installation
+when pulling the Arrow repository. After cloning the Arrow repository, from
+the top level directory install Archery by using the command
+
+.. code:: bash
+
+   pip install -e dev/archery[all]
+
+Usage
+-----
+
+You can inspect Archery usage by passing the ``--help`` flag:
+
+.. code:: bash
+
+   $ archery --help
+   Usage: archery [OPTIONS] COMMAND [ARGS]...
+
+     Apache Arrow developer utilities.
+
+     See sub-commands help with `archery <cmd> --help`.
+
+   Options:
+     --debug      Increase logging with debugging output.
+     --pdb        Invoke pdb on uncaught exception.
+     -q, --quiet  Silence executed commands.
+     --help       Show this message and exit.
+
+   Commands:
+     benchmark    Arrow benchmarking.
+     build        Initialize an Arrow C++ build
+     crossbow     Schedule packaging tasks or nightly builds on CI services.
+     docker       Interact with docker-compose based builds.
+     integration  Execute protocol and Flight integration tests
+     linking      Quick and dirty utilities for checking library linkage.
+     lint         Check Arrow source tree for errors
+     numpydoc     Lint python docstring with NumpyDoc
+     release      Release releated commands.
+     trigger-bot
+
+Archery exposes independent subcommands, each of which provides dedicated
+help output, for example:
+
+.. code:: bash
+
+   $ archery docker --help
+   Usage: archery docker [OPTIONS] COMMAND [ARGS]...
+
+     Interact with docker-compose based builds.
+
+   Options:
+     --src <arrow_src>  Specify Arrow source directory.
+     --help             Show this message and exit.
+
+   Commands:
+     images  List the available docker-compose images.
+     push    Push the generated docker-compose image.
+     run     Execute docker-compose builds.
diff --git a/src/arrow/docs/source/developers/benchmarks.rst b/src/arrow/docs/source/developers/benchmarks.rst
new file mode 100644
index 000000000..22eb5159d
--- /dev/null
+++ b/src/arrow/docs/source/developers/benchmarks.rst
@@ -0,0 +1,179 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _benchmarks:
+
+==========
+Benchmarks
+==========
+
+Setup
+=====
+
+First install the :ref:`Archery <archery>` utility to run the benchmark suite.
+
+Running the benchmark suite
+===========================
+
+The benchmark suites can be run with the ``benchmark run`` sub-command.
+
+.. code-block:: shell
+
+   # Run benchmarks in the current git workspace
+   archery benchmark run
+   # Storing the results in a file
+   archery benchmark run --output=run.json
+
+Sometimes, it is required to pass custom CMake flags, e.g.
+
+.. code-block:: shell
+
+   export CC=clang-8 CXX=clang++8
+   archery benchmark run --cmake-extras="-DARROW_SIMD_LEVEL=NONE"
+
+Additionally a full CMake build directory may be specified.
+
+.. code-block:: shell
+
+   archery benchmark run $HOME/arrow/cpp/release-build
+
+Comparison
+==========
+
+One goal with benchmarking is to detect performance regressions. To this end,
+``archery`` implements a benchmark comparison facility via the ``benchmark
+diff`` sub-command.
+
+In the default invocation, it will compare the current source (known as the
+current workspace in git) with local master branch:
+
+.. code-block:: shell
+
+  archery --quiet benchmark diff --benchmark-filter=FloatParsing
+  -----------------------------------------------------------------------------------
+  Non-regressions: (1)
+  -----------------------------------------------------------------------------------
+                 benchmark            baseline           contender  change % counters
+   FloatParsing<FloatType>  105.983M items/sec  105.983M items/sec       0.0       {}
+
+  ------------------------------------------------------------------------------------
+  Regressions: (1)
+  ------------------------------------------------------------------------------------
+                  benchmark            baseline           contender  change % counters
+   FloatParsing<DoubleType>  209.941M items/sec  109.941M items/sec   -47.632       {}
+
+For more information, invoke the ``archery benchmark diff --help`` command for
+multiple examples of invocation.
+
+Iterating efficiently
+~~~~~~~~~~~~~~~~~~~~~
+
+Iterating with benchmark development can be a tedious process due to long
+build time and long run times. Multiple tricks can be used with
+``archery benchmark diff`` to reduce this overhead.
+
+First, the benchmark command supports comparing existing
+build directories, This can be paired with the ``--preserve`` flag to
+avoid rebuilding sources from zero.
+
+.. code-block:: shell
+
+   # First invocation clone and checkouts in a temporary directory. The
+   # directory is preserved with --preserve
+   archery benchmark diff --preserve
+
+   # Modify C++ sources
+
+   # Re-run benchmark in the previously created build directory.
+   archery benchmark diff /tmp/arrow-bench*/{WORKSPACE,master}/build
+
+Second, a benchmark run result can be saved in a json file. This also avoids
+rebuilding the sources, but also executing the (sometimes) heavy benchmarks.
+This technique can be used as a poor's man caching.
+
+.. code-block:: shell
+
+   # Run the benchmarks on a given commit and save the result
+   archery benchmark run --output=run-head-1.json HEAD~1
+   # Compare the previous captured result with HEAD
+   archery benchmark diff HEAD run-head-1.json
+
+Third, the benchmark command supports filtering suites (``--suite-filter``)
+and benchmarks (``--benchmark-filter``), both options supports regular
+expressions.
+
+.. code-block:: shell
+
+   # Taking over a previous run, but only filtering for benchmarks matching
+   # `Kernel` and suite matching `compute-aggregate`.
+   archery benchmark diff                                       \
+     --suite-filter=compute-aggregate --benchmark-filter=Kernel \
+     /tmp/arrow-bench*/{WORKSPACE,master}/build
+
+Instead of rerunning benchmarks on comparison, a JSON file (generated by
+``archery benchmark run``) may be specified for the contender and/or the
+baseline.
+
+.. code-block:: shell
+
+   archery benchmark run --output=baseline.json $HOME/arrow/cpp/release-build
+   git checkout some-feature
+   archery benchmark run --output=contender.json $HOME/arrow/cpp/release-build
+   archery benchmark diff contender.json baseline.json
+
+Regression detection
+====================
+
+Writing a benchmark
+~~~~~~~~~~~~~~~~~~~
+
+1. The benchmark command will filter (by default) benchmarks with the regular
+   expression ``^Regression``. This way, not all benchmarks are run by default.
+   Thus, if you want your benchmark to be verified for regression
+   automatically, the name must match.
+
+2. The benchmark command will run with the ``--benchmark_repetitions=K``
+   options for statistical significance. Thus, a benchmark should not override
+   the repetitions in the (C++) benchmark's arguments definition.
+
+3. Due to #2, a benchmark should run sufficiently fast. Often, when the input
+   does not fit in memory (L2/L3), the benchmark will be memory bound instead
+   of CPU bound. In this case, the input can be downsized.
+
+4. By default, google's benchmark library will use the cputime metric, which
+   is the sum of runtime dedicated on the CPU for all threads of the process.
+   By contrast to realtime which is the wall clock time, e.g. the difference
+   between end_time - start_time. In a single thread model, the cputime is
+   preferable since it is less affected by context switching. In a multi thread
+   scenario, the cputime will give incorrect result since the since it'll
+   be inflated by the number of threads and can be far off realtime. Thus, if
+   the benchmark is multi threaded, it might be better to use
+   ``SetRealtime()``, see this `example <https://github.com/apache/arrow/blob/a9582ea6ab2db055656809a2c579165fe6a811ba/cpp/src/arrow/io/memory-benchmark.cc#L223-L227>`_.
+
+Scripting
+=========
+
+``archery`` is written as a python library with a command line frontend. The
+library can be imported to automate some tasks.
+
+Some invocation of the command line interface can be quite verbose due to build
+output. This can be controlled/avoided with the ``--quiet`` option or the
+``--output=<file>`` can be used, e.g.
+
+.. code-block:: shell
+
+   archery benchmark diff --benchmark-filter=Kernel --output=compare.json ...
diff --git a/src/arrow/docs/source/developers/computeir.rst b/src/arrow/docs/source/developers/computeir.rst
new file mode 100644
index 000000000..9ebe1d5af
--- /dev/null
+++ b/src/arrow/docs/source/developers/computeir.rst
@@ -0,0 +1,59 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+**********************************************
+Arrow Compute IR (Intermediate Representation)
+**********************************************
+
+In the same way that the Arrow format provides a powerful tool
+for communicating data, Compute IR is intended to provide a
+consistent format for representing analytical operations against
+that data. As an arrow-native expression of computation it includes
+information such as explicit types and schemas and arrow formatted
+literal data. It is also optimized for low runtime overhead in both
+serialization and deserialization.
+
+Built-in definitions are included to enable representation of
+relational algebraic operations- the contents of a "logical query plan".
+Compute IR also has first class support for representing operations
+which are not members of a minimal relational algebra, including
+implementation and optimization details- the contents of a "physical
+query plan". This approach is taken in emulation of `MLIR`_ (Multi-Level
+Intermediate Representation), a system which has had strong successes in
+spaces of comparable complexity to representation of analytic operations.
+To borrow terms from that project, there are two mutations of interest:
+
+* Replacement of representations with semantically equivalent representations
+  which will yield better performance for consumers- an optimization pass.
+* Replacement of abstract or generic representations with more specific
+  and potentially consumer-specific representations- a lowering pass.
+  This modification corresponds to the translation of a logical plan
+  to a physical plan.
+
+Allowing representation of physical plans (and plans which are between
+logical and physical) in Compute IR enables systems to define incremental
+optimization and lowering passes which operate on and produce valid
+Compute IR. This in turn enables communication, manipulation, and inspection
+at every stage of lowering/optimization by the same tools
+used for logical-plan-equivalent-IR. This is especially useful for systems
+where such passes may depend on information only available on every node
+of a distributed consumer (for example statistics unique to that node's
+local data) or may not be universal to all backends in a heterogeneous
+consumer (for example which optimizations nodes are capable of for
+non equi joins).
+
+.. _MLIR: https://mlir.llvm.org
diff --git a/src/arrow/docs/source/developers/contributing.rst b/src/arrow/docs/source/developers/contributing.rst
new file mode 100644
index 000000000..9b81a6ff1
--- /dev/null
+++ b/src/arrow/docs/source/developers/contributing.rst
@@ -0,0 +1,362 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _contributing:
+
+****************************
+Contributing to Apache Arrow
+****************************
+
+Thanks for your interest in the Apache Arrow project. Arrow is a large project
+and may seem overwhelming when you're first getting involved.
+Contributing code is great, but that's probably not the first place to start.
+There are lots of ways to make valuable contributions to the project and
+community.
+
+This page provides some orientation for how to get involved. It also offers
+some recommendations on how to get best results when engaging with the
+community.
+
+Code of Conduct
+===============
+
+All participation in the Apache Arrow project is governed by the ASF's
+`Code of Conduct <https://www.apache.org/foundation/policies/conduct.html>`_.
+
+Join the mailing lists
+======================
+
+A good first step to getting involved in the Arrow project is to join the
+mailing lists and participate in discussions where you can.
+Projects in The Apache Software Foundation ("the ASF") use public, archived
+mailing lists to create a public record of each project's development
+activities and decision-making process.
+While lacking the immediacy of chat or other forms of communication,
+the mailing lists give participants the opportunity to slow down and be
+thoughtful in their responses, and they help developers who are spread across
+many timezones to participate more equally.
+
+See `the community page <https://arrow.apache.org/community/>`_ for links to
+subscribe to the mailing lists and to view archives.
+
+Report bugs and propose features
+================================
+
+Using the software and sharing your experience is a very helpful contribution
+itself. Those who actively develop Arrow need feedback from users on what
+works and what doesn't. Alerting us to unexpected behavior and missing features,
+even if you can't solve the problems yourself, help us understand and prioritize
+work to improve the libraries.
+
+We use `JIRA <https://issues.apache.org/jira/projects/ARROW/issues>`_
+to manage our development "todo" list and to maintain changelogs for releases.
+In addition, the project's `Confluence site <https://cwiki.apache.org/confluence/display/ARROW>`_
+has some useful higher-level views of the JIRA issues.
+
+To create a JIRA issue, you'll need to have an account on the ASF JIRA, which
+you can `sign yourself up for <https://issues.apache.org/jira/secure/Signup!default.jspa>`_.
+The JIRA server hosts bugs and issues for multiple Apache projects. The JIRA
+project name for Arrow is "ARROW".
+
+You don't need any special permissions on JIRA to be able to create issues.
+Once you are more involved in the project and want to do more on JIRA, such as
+assign yourself an issue, you will need "Contributor" permissions on the
+Apache Arrow JIRA. To get this role, ask on the mailing list for a project
+maintainer's help.
+
+Tips for using JIRA
++++++++++++++++++++
+
+Before you create a new issue, we recommend you first
+`search <https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20resolution%20%3D%20Unresolved>`_
+among existing Arrow issues.
+
+When reporting a new issue, follow these conventions to help make sure the
+right people see it:
+
+* Use the **Component** field to indicate the area of the project that your
+  issue pertains to (for example "Python" or "C++").
+* Also prefix the issue title with the component name in brackets, for example
+  ``[Python] issue name`` ; this helps when navigating lists of open issues,
+  and it also makes our changelogs more readable. Most prefixes are exactly the 
+  same as the **Component** name, with the following exceptions:
+
+  * **Component:** Continuous Integration — **Summary prefix:** [CI]
+  * **Component:** Developer Tools — **Summary prefix:** [Dev]
+  * **Component:** Documentation — **Summary prefix:** [Docs]
+
+* If you're reporting something that used to work in a previous version
+  but doesn't work in the current release, you can add the "Affects version"
+  field. For feature requests and other proposals, "Affects version" isn't
+  appropriate.
+
+Project maintainers may later tweak formatting and labels to help improve their
+visibility. They may add a "Fix version" to indicate that they're considering
+it for inclusion in the next release, though adding that tag is not a
+commitment that it will be done in the next release.
+
+Tips for successful bug reports
++++++++++++++++++++++++++++++++
+
+No one likes having bugs in their software, and in an ideal world, all bugs
+would get fixed as soon as they were reported. However, time and attention are
+finite, especially in an open-source project where most contributors are
+participating in their spare time. All contributors in Apache projects are
+volunteers and act as individuals, even if they are contributing to the project
+as part of their job responsibilities.
+
+In order for your bug to get prompt
+attention, there are things you can do to make it easier for contributors to
+reproduce and fix it.
+When you're reporting a bug, please help us understand the issue by providing,
+to the best of your ability,
+
+* Clear, minimal steps to reproduce the issue, with as few non-Arrow
+  dependencies as possible. If there's a problem on reading a file, try to
+  provide as small of an example file as possible, or code to create one.
+  If your bug report says "it crashes trying to read my file, but I can't
+  share it with you," it's really hard for us to debug.
+* Any relevant operating system, language, and library version information
+* If it isn't obvious, clearly state the expected behavior and what actually
+  happened.
+
+If a developer can't get a failing unit test, they won't be able to know that
+the issue has been identified, and they won't know when it has been fixed.
+Try to anticipate the questions you might be asked by someone working to
+understand the issue and provide those supporting details up front.
+
+Other resources:
+
+* `Mozilla's bug-reporting guidelines <https://developer.mozilla.org/en-US/docs/Mozilla/QA/Bug_writing_guidelines>`_
+* `Reprex do's and don'ts <https://reprex.tidyverse.org/articles/reprex-dos-and-donts.html>`_
+
+Improve documentation
+=====================
+
+A great way to contribute to the project is to improve documentation. If you
+found some docs to be incomplete or inaccurate, share your hard-earned knowledge
+with the rest of the community.
+
+Documentation improvements are also a great way to gain some experience with
+our submission and review process, discussed below, without requiring a lot
+of local development environment setup. In fact, many documentation-only changes
+can be made directly in the GitHub web interface by clicking the "edit" button.
+This will handle making a fork and a pull request for you.
+
+Contribute code
+===============
+
+Code contributions, or "patches", are delivered in the form of GitHub pull
+requests against the `github.com/apache/arrow
+<https://github.com/apache/arrow>`_ repository.
+
+Before starting
++++++++++++++++
+
+You'll first need to select a JIRA issue to work on. Perhaps you're working on
+one you reported yourself. Otherwise, if you're looking for something,
+`search <https://issues.apache.org/jira/issues/?jql=project%20%3D%20ARROW%20AND%20resolution%20%3D%20Unresolved>`_
+the open issues. Anything that's not in the "In Progress" state is fair game,
+even if it is "Assigned" to someone, particularly if it has not been
+recently updated. When in doubt, comment on the issue asking if they mind
+if you try to put together a pull request; interpret no response to mean that
+you're free to proceed.
+
+Please do ask questions, either on the JIRA itself or on the dev mailing list,
+if you have doubts about where to begin or what approach to take.
+This is particularly a good idea if this is your first code contribution,
+so you can get some sense of what the core developers in this part of the
+project think a good solution looks like. For best results, ask specific,
+direct questions, such as:
+
+* Do you think $PROPOSED_APPROACH is the right one?
+* In which file(s) should I be looking to make changes?
+* Is there anything related in the codebase I can look at to learn?
+
+If you ask these questions and do not get an answer, it is OK to ask again.
+
+Pull request and review
++++++++++++++++++++++++
+
+To contribute a patch:
+
+* Submit the patch as a GitHub pull request against the master branch. For a
+  tutorial, see the GitHub guides on `forking a repo <https://help.github.com/en/articles/fork-a-repo>`_
+  and `sending a pull request <https://help.github.com/en/articles/creating-a-pull-request-from-a-fork>`_.
+  So that your pull request syncs with the JIRA issue, prefix your pull request
+  name with the JIRA issue id (ex:
+  `ARROW-767: [C++] Filesystem abstraction <https://github.com/apache/arrow/pull/4225>`_).
+* Give the pull request a clear, brief description: when the pull request is
+  merged, this will be retained in the extended commit message.
+* Make sure that your code passes the unit tests. You can find instructions how
+  to run the unit tests for each Arrow component in its respective README file.
+
+Core developers and others with a stake in the part of the project your change
+affects will review, request changes, and hopefully indicate their approval
+in the end. To make the review process smooth for everyone, try to
+
+* Break your work into small, single-purpose patches if possible. It’s much
+  harder to merge in a large change with a lot of disjoint features, and
+  particularly if you're new to the project, smaller changes are much easier
+  for maintainers to accept.
+* Add new unit tests for your code.
+* Follow the style guides for the part(s) of the project you're modifying.
+  Some languages (C++ and Python, for example) run a lint check in
+  continuous integration. For all languages, see their respective developer
+  documentation and READMEs for style guidance. In general, try to make it look
+  as if the codebase has a single author, and emulate any conventions you see,
+  whether or not they are officially documented or checked.
+
+When tests are passing and the pull request has been approved by the interested
+parties, a `committer <https://arrow.apache.org/committers/>`_
+will merge the pull request. This is done with a
+command-line utility that does a squash merge, so all of your commits will be
+registered as a single commit to the master branch; this simplifies the
+connection between JIRA issues and commits, makes it easier to bisect
+history to identify where changes were introduced, and helps us be able to
+cherry-pick individual patches onto a maintenance branch.
+
+A side effect of this way of
+merging is that your pull request will appear in the GitHub interface to have
+been "closed without merge". Do not be alarmed: if you look at the bottom, you
+will see a message that says ``@user closed this in $COMMIT``. In the commit
+message of that commit, the merge tool adds the pull request description, a
+link back to the pull request, and attribution to the contributor and any
+co-authors.
+
+Local git conventions
++++++++++++++++++++++
+
+If you are tracking the Arrow source repository locally, here are some tips
+for using ``git``.
+
+All Arrow contributors work off of their personal fork of ``apache/arrow``
+and submit pull requests "upstream". Once you've cloned your fork of Arrow,
+be sure to::
+
+    $ git remote add upstream https://github.com/apache/arrow
+
+to set the "upstream" repository.
+
+You are encouraged to develop on branches, rather than your own "master" branch,
+and it helps to keep your fork's master branch synced with ``upstream/master``.
+
+To start a new branch, pull the latest from upstream first::
+
+   $ git fetch upstream
+   $ git checkout master
+   $ git pull --ff-only upstream master
+   $ git checkout -b $BRANCH
+
+It does not matter what you call your branch. Some people like to use the JIRA
+number as branch name, others use descriptive names.
+
+Once you have a branch going, you should sync with ``upstream/master``
+regularly, as many commits are merged to master every day.
+It is recommended to use ``git rebase`` rather than ``git merge``.
+To sync your local copy of a branch, you may do the following::
+
+    $ git pull upstream $BRANCH --rebase
+
+This will rebase your local commits on top of the tip of ``upstream/$BRANCH``.  In case
+there are conflicts, and your local commit history has multiple commits, you may
+simplify the conflict resolution process by squashing your local commits into a single
+commit. Preserving the commit history isn't as important because when your
+feature branch is merged upstream, a squash happens automatically.  If you choose this
+route, you can abort the rebase with::
+
+    $ git rebase --abort
+
+Following which, the local commits can be squashed interactively by running::
+
+    $ git rebase --interactive ORIG_HEAD~n
+
+Where ``n`` is the number of commits you have in your local branch.  After the squash,
+you can try the merge again, and this time conflict resolution should be relatively
+straightforward.
+
+If you set the following in your repo's ``.git/config``, the ``--rebase`` option can be
+omitted from the ``git pull`` command, as it is implied by default. ::
+
+    [pull]
+            rebase = true
+
+Once you have an updated local copy, you can push to your remote repo.  Note, since your
+remote repo still holds the old history, you would need to do a force push. ::
+
+    $ git push --force origin branch
+
+*Note about force pushing to a branch that is being reviewed:* if you want reviewers to
+look at your updates, please ensure you comment on the PR on GitHub as simply force
+pushing does not trigger a notification in the GitHub user interface.
+
+Also, once you have a pull request up, be sure you pull from ``origin``
+before rebasing and force-pushing. Arrow maintainers can push commits directly
+to your branch, which they sometimes do to help move a pull request along.
+In addition, the GitHub PR "suggestion" feature can also add commits to
+your branch, so it is possible that your local copy of your branch is missing
+some additions.
+
+.. include:: experimental_repos.rst
+
+Guidance for specific features
+==============================
+
+From time to time the community has discussions on specific types of features
+and improvements that they expect to support.  This section outlines decisions
+that have been made in this regard.
+
+Endianness
+++++++++++
+
+The Arrow format allows setting endianness.  Due to the popularity of
+little endian architectures most of implementation assume little endian by
+default. There has been some  effort to support big endian platforms as well.
+Based on a `mailing-list discussion
+<https://mail-archives.apache.org/mod_mbox/arrow-dev/202009.mbox/%3cCAK7Z5T--HHhr9Dy43PYhD6m-XoU4qoGwQVLwZsG-kOxXjPTyZA@mail.gmail.com%3e>`__,
+the requirements for a new platform are:
+
+1. A robust (non-flaky, returning results in a reasonable time) Continuous
+   Integration setup.
+2. Benchmarks for performance critical parts of the code to demonstrate
+   no regression.
+
+Furthermore, for big-endian support, there are two levels that an
+implementation can support:
+
+1. Native endianness (all Arrow communication happens with processes of the
+   same endianness).  This includes ancillary functionality such as reading
+   and writing various file formats, such as Parquet.
+2. Cross endian support (implementations will do byte reordering when
+   appropriate for :ref:`IPC <format-ipc>` and :ref:`Flight <flight-rpc>`
+   messages).
+
+The decision on what level to support is based on maintainers' preferences for
+complexity and technical risk.  In general all implementations should be open
+to native endianness support (provided the CI and performance requirements
+are met).  Cross endianness support is a question for individual maintainers.
+
+The current implementations aiming for cross endian support are:
+
+1. C++
+
+Implementations that do not intend to implement cross endian support:
+
+1. Java
+
+For other libraries, a discussion to gather consensus on the mailing-list
+should be had before submitting PRs.
diff --git a/src/arrow/docs/source/developers/cpp/building.rst b/src/arrow/docs/source/developers/cpp/building.rst
new file mode 100644
index 000000000..6b18c7312
--- /dev/null
+++ b/src/arrow/docs/source/developers/cpp/building.rst
@@ -0,0 +1,510 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _building-arrow-cpp:
+
+==================
+Building Arrow C++
+==================
+
+System setup
+============
+
+Arrow uses CMake as a build configuration system. We recommend building
+out-of-source. If you are not familiar with this terminology:
+
+* **In-source build**: ``cmake`` is invoked directly from the ``cpp``
+  directory. This can be inflexible when you wish to maintain multiple build
+  environments (e.g. one for debug builds and another for release builds)
+* **Out-of-source build**: ``cmake`` is invoked from another directory,
+  creating an isolated build environment that does not interact with any other
+  build environment. For example, you could create ``cpp/build-debug`` and
+  invoke ``cmake $CMAKE_ARGS ..`` from this directory
+
+Building requires:
+
+* A C++11-enabled compiler. On Linux, gcc 4.8 and higher should be
+  sufficient. For Windows, at least Visual Studio 2017 is required.
+* CMake 3.5 or higher
+* On Linux and macOS, either ``make`` or ``ninja`` build utilities
+
+On Ubuntu/Debian you can install the requirements with:
+
+.. code-block:: shell
+
+   sudo apt-get install \
+        build-essential \
+        cmake
+
+On Alpine Linux:
+
+.. code-block:: shell
+
+   apk add autoconf \
+           bash \
+           cmake \
+           g++ \
+           gcc \
+           make
+           
+On Fedora Linux:
+
+.. code-block:: shell
+
+   sudo dnf install \
+        cmake \
+        gcc \
+        gcc-c++ \
+        make
+
+On macOS, you can use `Homebrew <https://brew.sh/>`_:
+
+.. code-block:: shell
+
+   git clone https://github.com/apache/arrow.git
+   cd arrow
+   brew update && brew bundle --file=cpp/Brewfile
+
+With `vcpkg <https://github.com/Microsoft/vcpkg>`_:
+
+.. code-block:: shell
+   
+   git clone https://github.com/apache/arrow.git
+   cd arrow
+   vcpkg install \
+     --x-manifest-root cpp \
+     --feature-flags=versions \
+     --clean-after-build
+
+On MSYS2:
+
+.. code-block:: shell
+
+   pacman --sync --refresh --noconfirm \
+     ccache \
+     git \
+     mingw-w64-${MSYSTEM_CARCH}-boost \
+     mingw-w64-${MSYSTEM_CARCH}-brotli \
+     mingw-w64-${MSYSTEM_CARCH}-cmake \
+     mingw-w64-${MSYSTEM_CARCH}-gcc \
+     mingw-w64-${MSYSTEM_CARCH}-gflags \
+     mingw-w64-${MSYSTEM_CARCH}-glog \
+     mingw-w64-${MSYSTEM_CARCH}-gtest \
+     mingw-w64-${MSYSTEM_CARCH}-lz4 \
+     mingw-w64-${MSYSTEM_CARCH}-protobuf \
+     mingw-w64-${MSYSTEM_CARCH}-python3-numpy \
+     mingw-w64-${MSYSTEM_CARCH}-rapidjson \
+     mingw-w64-${MSYSTEM_CARCH}-snappy \
+     mingw-w64-${MSYSTEM_CARCH}-thrift \
+     mingw-w64-${MSYSTEM_CARCH}-zlib \
+     mingw-w64-${MSYSTEM_CARCH}-zstd
+
+Building
+========
+
+The build system uses ``CMAKE_BUILD_TYPE=release`` by default, so if this
+argument is omitted then a release build will be produced.
+
+.. note::
+
+   You need to more options to build on Windows. See
+   :ref:`developers-cpp-windows` for details.
+
+Minimal release build:
+
+.. code-block:: shell
+
+   git clone https://github.com/apache/arrow.git
+   cd arrow/cpp
+   mkdir release
+   cd release
+   cmake ..
+   make
+
+Minimal debug build with unit tests:
+
+.. code-block:: shell
+
+   git clone https://github.com/apache/arrow.git
+   cd arrow
+   git submodule update --init --recursive
+   export ARROW_TEST_DATA=$PWD/testing/data
+   cd cpp
+   mkdir debug
+   cd debug
+   cmake -DCMAKE_BUILD_TYPE=Debug -DARROW_BUILD_TESTS=ON ..
+   make unittest
+
+The unit tests are not built by default. After building, one can also invoke
+the unit tests using the ``ctest`` tool provided by CMake (note that ``test``
+depends on ``python`` being available).
+
+On some Linux distributions, running the test suite might require setting an
+explicit locale. If you see any locale-related errors, try setting the
+environment variable (which requires the `locales` package or equivalent):
+
+.. code-block:: shell
+
+   export LC_ALL="en_US.UTF-8"
+
+Faster builds with Ninja
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Many contributors use the `Ninja build system <https://ninja-build.org/>`_ to
+get faster builds. It especially speeds up incremental builds. To use
+``ninja``, pass ``-GNinja`` when calling ``cmake`` and then use the ``ninja``
+command instead of ``make``.
+
+Optional Components
+~~~~~~~~~~~~~~~~~~~
+
+By default, the C++ build system creates a fairly minimal build. We have
+several optional system components which you can opt into building by passing
+boolean flags to ``cmake``.
+
+* ``-DARROW_BUILD_UTILITIES=ON`` : Build Arrow commandline utilities
+* ``-DARROW_COMPUTE=ON``: Computational kernel functions and other support
+* ``-DARROW_CSV=ON``: CSV reader module
+* ``-DARROW_CUDA=ON``: CUDA integration for GPU development. Depends on NVIDIA
+  CUDA toolkit. The CUDA toolchain used to build the library can be customized
+  by using the ``$CUDA_HOME`` environment variable.
+* ``-DARROW_DATASET=ON``: Dataset API, implies the Filesystem API
+* ``-DARROW_FILESYSTEM=ON``: Filesystem API for accessing local and remote
+  filesystems
+* ``-DARROW_FLIGHT=ON``: Arrow Flight RPC system, which depends at least on
+  gRPC
+* ``-DARROW_GANDIVA=ON``: Gandiva expression compiler, depends on LLVM,
+  Protocol Buffers, and re2
+* ``-DARROW_GANDIVA_JAVA=ON``: Gandiva JNI bindings for Java
+* ``-DARROW_GCS=ON``: Build Arrow with GCS support (requires the GCloud SDK for C++)
+* ``-DARROW_HDFS=ON``: Arrow integration with libhdfs for accessing the Hadoop
+  Filesystem
+* ``-DARROW_HIVESERVER2=ON``: Client library for HiveServer2 database protocol
+* ``-DARROW_JEMALLOC=ON``: Build the Arrow jemalloc-based allocator, on by default 
+* ``-DARROW_JSON=ON``: JSON reader module
+* ``-DARROW_MIMALLOC=ON``: Build the Arrow mimalloc-based allocator
+* ``-DARROW_ORC=ON``: Arrow integration with Apache ORC
+* ``-DARROW_PARQUET=ON``: Apache Parquet libraries and Arrow integration
+* ``-DARROW_PLASMA=ON``: Plasma Shared Memory Object Store
+* ``-DARROW_PLASMA_JAVA_CLIENT=ON``: Build Java client for Plasma
+* ``-DARROW_PYTHON=ON``: Arrow Python C++ integration library (required for
+  building pyarrow). This library must be built against the same Python version
+  for which you are building pyarrow. NumPy must also be installed. Enabling
+  this option also enables ``ARROW_COMPUTE``, ``ARROW_CSV``, ``ARROW_DATASET``,
+  ``ARROW_FILESYSTEM``, ``ARROW_HDFS``, and ``ARROW_JSON``.
+* ``-DARROW_S3=ON``: Support for Amazon S3-compatible filesystems
+* ``-DARROW_WITH_RE2=ON`` Build with support for regular expressions using the re2 
+  library, on by default and used when ``ARROW_COMPUTE`` or ``ARROW_GANDIVA`` is ``ON``
+* ``-DARROW_WITH_UTF8PROC=ON``: Build with support for Unicode properties using
+  the utf8proc library, on by default and used when ``ARROW_COMPUTE`` or ``ARROW_GANDIVA``
+  is ``ON``
+* ``-DARROW_TENSORFLOW=ON``: Build Arrow with TensorFlow support enabled
+
+Compression options available in Arrow are:
+
+* ``-DARROW_WITH_BROTLI=ON``: Build support for Brotli compression
+* ``-DARROW_WITH_BZ2=ON``: Build support for BZ2 compression
+* ``-DARROW_WITH_LZ4=ON``: Build support for lz4 compression
+* ``-DARROW_WITH_SNAPPY=ON``: Build support for Snappy compression
+* ``-DARROW_WITH_ZLIB=ON``: Build support for zlib (gzip) compression
+* ``-DARROW_WITH_ZSTD=ON``: Build support for ZSTD compression
+
+Some features of the core Arrow shared library can be switched off for improved
+build times if they are not required for your application:
+
+* ``-DARROW_IPC=ON``: build the IPC extensions
+
+Optional Targets
+~~~~~~~~~~~~~~~~
+
+For development builds, you will often want to enable additional targets in
+enable to exercise your changes, using the following ``cmake`` options.
+
+* ``-DARROW_BUILD_BENCHMARKS=ON``: Build executable benchmarks.
+* ``-DARROW_BUILD_EXAMPLES=ON``: Build examples of using the Arrow C++ API.
+* ``-DARROW_BUILD_INTEGRATION=ON``: Build additional executables that are
+  used to exercise protocol interoperability between the different Arrow
+  implementations.
+* ``-DARROW_BUILD_UTILITIES=ON``: Build executable utilities.
+* ``-DARROW_BUILD_TESTS=ON``: Build executable unit tests.
+* ``-DARROW_ENABLE_TIMING_TESTS=ON``: If building unit tests, enable those
+  unit tests that rely on wall-clock timing (this flag is disabled on CI
+  because it can make test results flaky).
+* ``-DARROW_FUZZING=ON``: Build fuzz targets and related executables.
+
+Optional Checks
+~~~~~~~~~~~~~~~
+
+The following special checks are available as well.  They instrument the
+generated code in various ways so as to detect select classes of problems
+at runtime (for example when executing unit tests).
+
+* ``-DARROW_USE_ASAN=ON``: Enable Address Sanitizer to check for memory leaks,
+  buffer overflows or other kinds of memory management issues.
+* ``-DARROW_USE_TSAN=ON``: Enable Thread Sanitizer to check for races in
+  multi-threaded code.
+* ``-DARROW_USE_UBSAN=ON``: Enable Undefined Behavior Sanitizer to check for
+  situations which trigger C++ undefined behavior.
+
+Some of those options are mutually incompatible, so you may have to build
+several times with different options if you want to exercise all of them.
+
+CMake version requirements
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+While we support CMake 3.5 and higher, some features require a newer version of
+CMake:
+
+* Building the benchmarks requires 3.6 or higher
+* Building zstd from source requires 3.7 or higher
+* Building Gandiva JNI bindings requires 3.11 or higher
+
+LLVM and Clang Tools
+~~~~~~~~~~~~~~~~~~~~
+
+We are currently using LLVM 8 for library builds and for other developer tools
+such as code formatting with ``clang-format``. LLVM can be installed via most
+modern package managers (apt, yum, conda, Homebrew, vcpkg, chocolatey).
+
+.. _cpp-build-dependency-management:
+
+Build Dependency Management
+===========================
+
+The build system supports a number of third-party dependencies
+
+  * ``AWSSDK``: for S3 support, requires system cURL and can use the
+    ``BUNDLED`` method described below
+  * ``benchmark``: Google benchmark, for testing
+  * ``Boost``: for cross-platform support
+  * ``Brotli``: for data compression
+  * ``BZip2``: for data compression
+  * ``c-ares``: a dependency of gRPC
+  * ``gflags``: for command line utilities (formerly Googleflags)
+  * ``GLOG``: for logging
+  * ``google_cloud_cpp_storage``: for Google Cloud Storage support, requires 
+    system cURL and can use the ``BUNDLED`` method described below
+  * ``gRPC``: for remote procedure calls
+  * ``GTest``: Googletest, for testing
+  * ``LLVM``: a dependency of Gandiva
+  * ``Lz4``: for data compression
+  * ``ORC``: for Apache ORC format support
+  * ``re2``: for compute kernels and Gandiva, a dependency of gRPC
+  * ``Protobuf``: Google Protocol Buffers, for data serialization
+  * ``RapidJSON``: for data serialization
+  * ``Snappy``: for data compression
+  * ``Thrift``: Apache Thrift, for data serialization
+  * ``utf8proc``: for compute kernels
+  * ``ZLIB``: for data compression
+  * ``zstd``: for data compression
+
+The CMake option ``ARROW_DEPENDENCY_SOURCE`` is a global option that instructs
+the build system how to resolve each dependency. There are a few options:
+
+* ``AUTO``: Try to find package in the system default locations and build from
+  source if not found
+* ``BUNDLED``: Building the dependency automatically from source
+* ``SYSTEM``: Finding the dependency in system paths using CMake's built-in
+  ``find_package`` function, or using ``pkg-config`` for packages that do not
+  have this feature
+* ``CONDA``: Use ``$CONDA_PREFIX`` as alternative ``SYSTEM`` PATH
+* ``VCPKG``: Find dependencies installed by vcpkg, and if not found, run
+  ``vcpkg install`` to install them
+* ``BREW``: Use Homebrew default paths as an alternative ``SYSTEM`` path
+
+The default method is ``AUTO`` unless you are developing within an active conda
+environment (detected by presence of the ``$CONDA_PREFIX`` environment
+variable), in which case it is ``CONDA``.
+
+Individual Dependency Resolution
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+While ``-DARROW_DEPENDENCY_SOURCE=$SOURCE`` sets a global default for all
+packages, the resolution strategy can be overridden for individual packages by
+setting ``-D$PACKAGE_NAME_SOURCE=..``. For example, to build Protocol Buffers
+from source, set
+
+.. code-block:: shell
+
+   -DProtobuf_SOURCE=BUNDLED
+
+This variable is unfortunately case-sensitive; the name used for each package
+is listed above, but the most up-to-date listing can be found in
+`cpp/cmake_modules/ThirdpartyToolchain.cmake <https://github.com/apache/arrow/blob/master/cpp/cmake_modules/ThirdpartyToolchain.cmake>`_.
+
+Bundled Dependency Versions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When using the ``BUNDLED`` method to build a dependency from source, the
+version number from ``cpp/thirdparty/versions.txt`` is used. There is also a
+dependency source downloader script (see below), which can be used to set up
+offline builds.
+
+When using ``BUNDLED`` for dependency resolution (and if you use either the
+jemalloc or mimalloc allocators, which are recommended), statically linking the
+Arrow libraries in a third party project is more complex. See below for
+instructions about how to configure your build system in this case.
+
+Boost-related Options
+~~~~~~~~~~~~~~~~~~~~~
+
+We depend on some Boost C++ libraries for cross-platform support. In most cases,
+the Boost version available in your package manager may be new enough, and the
+build system will find it automatically. If you have Boost installed in a
+non-standard location, you can specify it by passing
+``-DBOOST_ROOT=$MY_BOOST_ROOT`` or setting the ``BOOST_ROOT`` environment
+variable.
+
+Offline Builds
+~~~~~~~~~~~~~~
+
+If you do not use the above variables to direct the Arrow build system to
+preinstalled dependencies, they will be built automatically by the Arrow build
+system. The source archive for each dependency will be downloaded via the
+internet, which can cause issues in environments with limited access to the
+internet.
+
+To enable offline builds, you can download the source artifacts yourself and
+use environment variables of the form ``ARROW_$LIBRARY_URL`` to direct the
+build system to read from a local file rather than accessing the internet.
+
+To make this easier for you, we have prepared a script
+``thirdparty/download_dependencies.sh`` which will download the correct version
+of each dependency to a directory of your choosing. It will print a list of
+bash-style environment variable statements at the end to use for your build
+script.
+
+.. code-block:: shell
+
+   # Download tarballs into $HOME/arrow-thirdparty
+   $ ./thirdparty/download_dependencies.sh $HOME/arrow-thirdparty
+
+You can then invoke CMake to create the build directory and it will use the
+declared environment variable pointing to downloaded archives instead of
+downloading them (one for each build dir!).
+
+Statically Linking
+~~~~~~~~~~~~~~~~~~
+
+When ``-DARROW_BUILD_STATIC=ON``, all build dependencies built as static
+libraries by the Arrow build system will be merged together to create a static
+library ``arrow_bundled_dependencies``. In UNIX-like environments (Linux, macOS,
+MinGW), this is called ``libarrow_bundled_dependencies.a`` and on Windows with
+Visual Studio ``arrow_bundled_dependencies.lib``. This "dependency bundle"
+library is installed in the same place as the other Arrow static libraries.
+
+If you are using CMake, the bundled dependencies will automatically be included
+when linking if you use the ``arrow_static`` CMake target. In other build
+systems, you may need to explicitly link to the dependency bundle. We created
+an `example CMake-based build configuration
+<https://github.com/apache/arrow/tree/master/cpp/examples/minimal_build>`_ to
+show you a working example.
+
+On Linux and macOS, if your application does not link to the ``pthread``
+library already, you must include ``-pthread`` in your linker setup. In CMake
+this can be accomplished with the ``Threads`` built-in package:
+
+.. code-block:: cmake
+
+   set(THREADS_PREFER_PTHREAD_FLAG ON)
+   find_package(Threads REQUIRED)
+   target_link_libraries(my_target PRIVATE Threads::Threads)
+
+Extra debugging help
+~~~~~~~~~~~~~~~~~~~~
+
+If you use the CMake option ``-DARROW_EXTRA_ERROR_CONTEXT=ON`` it will compile
+the libraries with extra debugging information on error checks inside the
+``RETURN_NOT_OK`` macro. In unit tests with ``ASSERT_OK``, this will yield error
+outputs like:
+
+.. code-block:: shell
+
+   ../src/arrow/ipc/ipc-read-write-test.cc:609: Failure
+   Failed
+   ../src/arrow/ipc/metadata-internal.cc:508 code: TypeToFlatbuffer(fbb, *field.type(), &children, &layout, &type_enum, dictionary_memo, &type_offset)
+   ../src/arrow/ipc/metadata-internal.cc:598 code: FieldToFlatbuffer(fbb, *schema.field(i), dictionary_memo, &offset)
+   ../src/arrow/ipc/metadata-internal.cc:651 code: SchemaToFlatbuffer(fbb, schema, dictionary_memo, &fb_schema)
+   ../src/arrow/ipc/writer.cc:697 code: WriteSchemaMessage(schema_, dictionary_memo_, &schema_fb)
+   ../src/arrow/ipc/writer.cc:730 code: WriteSchema()
+   ../src/arrow/ipc/writer.cc:755 code: schema_writer.Write(&dictionaries_)
+   ../src/arrow/ipc/writer.cc:778 code: CheckStarted()
+   ../src/arrow/ipc/ipc-read-write-test.cc:574 code: writer->WriteRecordBatch(batch)
+   NotImplemented: Unable to convert type: decimal(19, 4)
+
+Deprecations and API Changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We use the compiler definition ``ARROW_NO_DEPRECATED_API`` to disable APIs that
+have been deprecated. It is a good practice to compile third party applications
+with this flag to proactively catch and account for API changes.
+
+Modular Build Targets
+~~~~~~~~~~~~~~~~~~~~~
+
+Since there are several major parts of the C++ project, we have provided
+modular CMake targets for building each library component, group of unit tests
+and benchmarks, and their dependencies:
+
+* ``make arrow`` for Arrow core libraries
+* ``make parquet`` for Parquet libraries
+* ``make gandiva`` for Gandiva (LLVM expression compiler) libraries
+* ``make plasma`` for Plasma libraries, server
+
+.. note::
+   If you have selected Ninja as CMake generator, replace ``make arrow`` with
+   ``ninja arrow``, and so on.
+
+To build the unit tests or benchmarks, add ``-tests`` or ``-benchmarks``
+to the target name. So ``make arrow-tests`` will build the Arrow core unit
+tests. Using the ``-all`` target, e.g. ``parquet-all``, will build everything.
+
+If you wish to only build and install one or more project subcomponents, we
+have provided the CMake option ``ARROW_OPTIONAL_INSTALL`` to only install
+targets that have been built. For example, if you only wish to build the
+Parquet libraries, its tests, and its dependencies, you can run:
+
+.. code-block:: shell
+
+   cmake .. -DARROW_PARQUET=ON \
+         -DARROW_OPTIONAL_INSTALL=ON \
+         -DARROW_BUILD_TESTS=ON
+   make parquet
+   make install
+
+If you omit an explicit target when invoking ``make``, all targets will be
+built.
+
+Debugging with Xcode on macOS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Xcode is the IDE provided with macOS and can be use to develop and debug Arrow
+by generating an Xcode project:
+
+.. code-block:: shell
+
+   cd cpp
+   mkdir xcode-build
+   cd xcode-build
+   cmake .. -G Xcode -DARROW_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=DEBUG
+   open arrow.xcodeproj
+
+This will generate a project and open it in the Xcode app. As an alternative,
+the command ``xcodebuild`` will perform a command-line build using the
+generated project. It is recommended to use the "Automatically Create Schemes"
+option when first launching the project.  Selecting an auto-generated scheme
+will allow you to build and run a unittest with breakpoints enabled.
diff --git a/src/arrow/docs/source/developers/cpp/conventions.rst b/src/arrow/docs/source/developers/cpp/conventions.rst
new file mode 100644
index 000000000..9db15fbcf
--- /dev/null
+++ b/src/arrow/docs/source/developers/cpp/conventions.rst
@@ -0,0 +1,90 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. highlight:: cpp
+
+===========
+Conventions
+===========
+
+This section provides some information about some of the abstractions and
+development approaches we use to solve problems common to many parts of the C++
+project.
+
+File Naming
+===========
+
+C++ source and header files should use underscores for word separation, not hyphens.
+Compiled executables, however, will automatically use hyphens (such that
+e.g. ``src/arrow/scalar_test.cc`` will be compiled into ``arrow-scalar-test``).
+
+C++ header files use the ``.h`` extension. Any header file name not
+containing ``internal`` is considered to be a public header, and will be
+automatically installed by the build.
+
+Comments and Docstrings
+=======================
+
+Regular comments start with ``//``.
+
+Doxygen docstrings start with ``///``, and Doxygen directives start with ``\``,
+like this::
+
+   /// \brief Allocate a fixed size mutable buffer from a memory pool, zero its padding.
+   ///
+   /// \param[in] size size of buffer to allocate
+   /// \param[in] pool a memory pool
+   ARROW_EXPORT
+   Result<std::unique_ptr<Buffer>> AllocateBuffer(const int64_t size,
+                                                  MemoryPool* pool = NULLPTR);
+
+The summary line of a docstring uses the infinitive, not the indicative
+(for example, "Allocate a buffer" rather than "Allocates a buffer").
+
+Memory Pools
+============
+
+We provide a default memory pool with ``arrow::default_memory_pool()``.
+
+Error Handling and Exceptions
+=============================
+
+For error handling, we return ``arrow::Status`` values instead of throwing C++
+exceptions. Since the Arrow C++ libraries are intended to be useful as a
+component in larger C++ projects, using ``Status`` objects can help with good
+code hygiene by making explicit when a function is expected to be able to fail.
+
+A more recent option is to return a ``arrow::Result<T>`` object that can
+represent either a successful result with a ``T`` value, or an error result
+with a ``Status`` value.
+
+For expressing internal invariants and "cannot fail" errors, we use ``DCHECK`` macros
+defined in ``arrow/util/logging.h``. These checks are disabled in release builds
+and are intended to catch internal development errors, particularly when
+refactoring. These macros are not to be included in any public header files.
+
+Since we do not use exceptions, we avoid doing expensive work in object
+constructors. Objects that are expensive to construct may often have private
+constructors, with public static factory methods that return ``Status`` or
+``Result<T>``.
+
+There are a number of object constructors, like ``arrow::Schema`` and
+``arrow::RecordBatch`` where larger STL container objects like ``std::vector`` may
+be created. While it is possible for ``std::bad_alloc`` to be thrown in these
+constructors, the circumstances where they would are somewhat esoteric, and it
+is likely that an application would have encountered other more serious
+problems prior to having ``std::bad_alloc`` thrown in a constructor.
diff --git a/src/arrow/docs/source/developers/cpp/development.rst b/src/arrow/docs/source/developers/cpp/development.rst
new file mode 100644
index 000000000..4098f1c4e
--- /dev/null
+++ b/src/arrow/docs/source/developers/cpp/development.rst
@@ -0,0 +1,294 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+======================
+Development Guidelines
+======================
+
+This section provides information for developers who wish to contribute to the
+C++ codebase.
+
+.. note::
+
+   Since most of the project's developers work on Linux or macOS, not all
+   features or developer tools are uniformly supported on Windows. If you are
+   on Windows, have a look at :ref:`developers-cpp-windows`.
+
+Compiler warning levels
+=======================
+
+The ``BUILD_WARNING_LEVEL`` CMake option switches between sets of predetermined
+compiler warning levels that we use for code tidiness. For release builds, the
+default warning level is ``PRODUCTION``, while for debug builds the default is
+``CHECKIN``.
+
+When using ``CHECKIN`` for debug builds, ``-Werror`` is added when using gcc
+and clang, causing build failures for any warning, and ``/WX`` is set with MSVC
+having the same effect.
+
+Running unit tests
+==================
+
+The ``-DARROW_BUILD_TESTS=ON`` CMake option enables building of unit test
+executables.  You can then either run them individually, by launching the
+desired executable, or run them all at once by launching the ``ctest``
+executable (which is part of the CMake suite).
+
+A possible invocation is something like::
+
+   $ ctest -j16 --output-on-failure
+
+where the ``-j16`` option runs up to 16 tests in parallel, taking advantage
+of multiple CPU cores and hardware threads.
+
+Running benchmarks
+==================
+
+The ``-DARROW_BUILD_BENCHMARKS=ON`` CMake option enables building of benchmark
+executables.  You can then run benchmarks individually by launching the
+corresponding executable from the command line, e.g.::
+
+   $ ./build/release/arrow-builder-benchmark
+
+.. note::
+   For meaningful benchmark numbers, it is very strongly recommended to build
+   in ``Release`` mode, so as to enable compiler optimizations.
+
+Code Style, Linting, and CI
+===========================
+
+This project follows `Google's C++ Style Guide
+<https://google.github.io/styleguide/cppguide.html>`_ with minor exceptions:
+
+* We relax the line length restriction to 90 characters.
+* We use the ``NULLPTR`` macro in header files (instead of ``nullptr``) defined
+  in ``src/arrow/util/macros.h`` to support building C++/CLI (ARROW-1134)
+* We relax the guide's rules regarding structs.  For public headers we should
+  use struct only for objects that are principally simple data containers where
+  it is OK to expose all the internal members and any methods are primarily
+  conveniences.  For private headers the rules are relaxed further and structs
+  can be used where convenient for types that do not need access control even
+  though they may not be simple data containers.
+
+Our continuous integration builds on GitHub Actions run the unit test
+suites on a variety of platforms and configuration, including using
+Address Sanitizer and Undefined Behavior Sanitizer to check for various
+patterns of misbehaviour such as memory leaks. In addition, the
+codebase is subjected to a number of code style and code cleanliness checks.
+
+In order to have a passing CI build, your modified git branch must pass the
+following checks:
+
+* C++ builds with the project's active version of ``clang`` without
+  compiler warnings with ``-DBUILD_WARNING_LEVEL=CHECKIN``. Note that
+  there are classes of warnings (such as ``-Wdocumentation``, see more
+  on this below) that are not caught by ``gcc``.
+* CMake files pass style checks, can be fixed by running
+  ``archery lint --cmake-format --fix``. This requires Python
+  3 and `cmake_format <https://github.com/cheshirekow/cmake_format>`_ (note:
+  this currently does not work on Windows)
+* Passes various C++ (and others) style checks, checked with the ``lint``
+  subcommand to :ref:`Archery <archery>`. This can also be fixed locally
+  by running ``archery lint --cpplint --fix``.
+
+In order to account for variations in the behavior of ``clang-format`` between
+major versions of LLVM, we pin the version of ``clang-format`` used (current
+LLVM 8).
+
+Depending on how you installed clang-format, the build system may not be able
+to find it. You can provide an explicit path to your LLVM installation (or the
+root path for the clang tools) with the environment variable
+`$CLANG_TOOLS_PATH` or by passing ``-DClangTools_PATH=$PATH_TO_CLANG_TOOLS`` when
+invoking CMake.
+
+To make linting more reproducible for everyone, we provide a ``docker-compose``
+target that is executable from the root of the repository:
+
+.. code-block:: shell
+
+   docker-compose run ubuntu-lint
+
+Cleaning includes with include-what-you-use (IWYU)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+We occasionally use Google's `include-what-you-use
+<https://github.com/include-what-you-use/include-what-you-use>`_ tool, also
+known as IWYU, to remove unnecessary imports.
+
+To begin using IWYU, you must first build it by following the instructions in
+the project's documentation. Once the ``include-what-you-use`` executable is in
+your ``$PATH``, you must run CMake with ``-DCMAKE_EXPORT_COMPILE_COMMANDS=ON``
+in a new out-of-source CMake build directory like so:
+
+.. code-block:: shell
+
+   mkdir -p $ARROW_ROOT/cpp/iwyu
+   cd $ARROW_ROOT/cpp/iwyu
+   cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
+     -DARROW_PYTHON=ON \
+     -DARROW_PARQUET=ON \
+     -DARROW_FLIGHT=ON \
+     -DARROW_PLASMA=ON \
+     -DARROW_GANDIVA=ON \
+     -DARROW_BUILD_BENCHMARKS=ON \
+     -DARROW_BUILD_BENCHMARKS_REFERENCE=ON \
+     -DARROW_BUILD_TESTS=ON \
+     -DARROW_BUILD_UTILITIES=ON \
+     -DARROW_S3=ON \
+     -DARROW_WITH_BROTLI=ON \
+     -DARROW_WITH_BZ2=ON \
+     -DARROW_WITH_LZ4=ON \
+     -DARROW_WITH_SNAPPY=ON \
+     -DARROW_WITH_ZLIB=ON \
+     -DARROW_WITH_ZSTD=ON ..
+
+In order for IWYU to run on the desired component in the codebase, it must be
+enabled by the CMake configuration flags. Once this is done, you can run IWYU
+on the whole codebase by running a helper ``iwyu.sh`` script:
+
+.. code-block:: shell
+
+   IWYU_SH=$ARROW_ROOT/cpp/build-support/iwyu/iwyu.sh
+   ./$IWYU_SH
+
+Since this is very time consuming, you can check a subset of files matching
+some string pattern with the special "match" option
+
+.. code-block:: shell
+
+   ./$IWYU_SH match $PATTERN
+
+For example, if you wanted to do IWYU checks on all files in
+``src/arrow/array``, you could run
+
+.. code-block:: shell
+
+   ./$IWYU_SH match arrow/array
+
+Checking for ABI and API stability
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To build ABI compliance reports, you need to install the two tools
+``abi-dumper`` and ``abi-compliance-checker``.
+
+Build Arrow C++ in Debug mode, alternatively you could use ``-Og`` which also
+builds with the necessary symbols but includes a bit of code optimization.
+Once the build has finished, you can generate ABI reports using:
+
+.. code-block:: shell
+
+   abi-dumper -lver 9 debug/libarrow.so -o ABI-9.dump
+
+The above version number is freely selectable. As we want to compare versions,
+you should now ``git checkout`` the version you want to compare it to and re-run
+the above command using a different version number. Once both reports are
+generated, you can build a comparison report using
+
+.. code-block:: shell
+
+   abi-compliance-checker -l libarrow -d1 ABI-PY-9.dump -d2 ABI-PY-10.dump
+
+The report is then generated in ``compat_reports/libarrow`` as a HTML.
+
+API Documentation
+=================
+
+We use Doxygen style comments (``///``) in header files for comments
+that we wish to show up in API documentation for classes and
+functions.
+
+When using ``clang`` and building with
+``-DBUILD_WARNING_LEVEL=CHECKIN``, the ``-Wdocumentation`` flag is
+used which checks for some common documentation inconsistencies, like
+documenting some, but not all function parameters with ``\param``. See
+the `LLVM documentation warnings section
+<https://releases.llvm.org/7.0.1/tools/clang/docs/DiagnosticsReference.html#wdocumentation>`_
+for more about this.
+
+While we publish the API documentation as part of the main Sphinx-based
+documentation site, you can also build the C++ API documentation anytime using
+Doxygen. Run the following command from the ``cpp/apidoc`` directory:
+
+.. code-block:: shell
+
+   doxygen Doxyfile
+
+This requires `Doxygen <https://www.doxygen.org>`_ to be installed.
+
+Apache Parquet Development
+==========================
+
+To build the C++ libraries for Apache Parquet, add the flag
+``-DARROW_PARQUET=ON`` when invoking CMake.
+To build Apache Parquet with encryption support, add the flag
+``-DPARQUET_REQUIRE_ENCRYPTION=ON`` when invoking CMake. The Parquet libraries and unit tests
+can be built with the ``parquet`` make target:
+
+.. code-block:: shell
+
+   make parquet
+
+On Linux and macOS if you do not have Apache Thrift installed on your system,
+or you are building with ``-DThrift_SOURCE=BUNDLED``, you must install
+``bison`` and ``flex`` packages. On Windows we handle these build dependencies
+automatically when building Thrift from source.
+
+Running ``ctest -L unittest`` will run all built C++ unit tests, while ``ctest -L
+parquet`` will run only the Parquet unit tests. The unit tests depend on an
+environment variable ``PARQUET_TEST_DATA`` that depends on a git submodule to the
+repository https://github.com/apache/parquet-testing:
+
+.. code-block:: shell
+
+   git submodule update --init
+   export PARQUET_TEST_DATA=$ARROW_ROOT/cpp/submodules/parquet-testing/data
+
+Here ``$ARROW_ROOT`` is the absolute path to the Arrow codebase.
+
+Arrow Flight RPC
+================
+
+In addition to the Arrow dependencies, Flight requires:
+
+* gRPC (>= 1.14, roughly)
+* Protobuf (>= 3.6, earlier versions may work)
+* c-ares (used by gRPC)
+
+By default, Arrow will try to download and build these dependencies
+when building Flight.
+
+The optional ``flight`` libraries and tests can be built by passing
+``-DARROW_FLIGHT=ON``.
+
+.. code-block:: shell
+
+   cmake .. -DARROW_FLIGHT=ON -DARROW_BUILD_TESTS=ON
+   make
+
+You can also use existing installations of the extra dependencies.
+When building, set the environment variables ``gRPC_ROOT`` and/or
+``Protobuf_ROOT`` and/or ``c-ares_ROOT``.
+
+We are developing against recent versions of gRPC, and the versions. The
+``grpc-cpp`` package available from https://conda-forge.org/ is one reliable
+way to obtain gRPC in a cross-platform way. You may try using system libraries
+for gRPC and Protobuf, but these are likely to be too old. On macOS, you can
+try `Homebrew <https://brew.sh/>`_:
+
+.. code-block:: shell
+
+   brew install grpc
diff --git a/src/arrow/docs/source/developers/cpp/fuzzing.rst b/src/arrow/docs/source/developers/cpp/fuzzing.rst
new file mode 100644
index 000000000..41398a13d
--- /dev/null
+++ b/src/arrow/docs/source/developers/cpp/fuzzing.rst
@@ -0,0 +1,99 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+=================
+Fuzzing Arrow C++
+=================
+
+To make the handling of invalid input more robust, we have enabled
+fuzz testing on several parts of the Arrow C++ feature set, currently:
+
+* the IPC stream format
+* the IPC file format
+* the Parquet file format
+
+We welcome any contribution to expand the scope of fuzz testing and cover
+areas ingesting potentially invalid or malicious data.
+
+Fuzz Targets and Utilities
+==========================
+
+By passing the ``-DARROW_FUZZING=ON`` CMake option, you will build
+the fuzz targets corresponding to the aforementioned Arrow features, as well
+as additional related utilities.
+
+Generating the seed corpus
+--------------------------
+
+Fuzzing essentially explores the domain space by randomly mutating previously
+tested inputs, without having any high-level understanding of the area being
+fuzz-tested.  However, the domain space is so huge that this strategy alone
+may fail to actually produce any "interesting" inputs.
+
+To guide the process, it is therefore important to provide a *seed corpus*
+of valid (or invalid, but remarkable) inputs from which the fuzzing
+infrastructure can derive new inputs for testing.  A script is provided
+to automate that task.  Assuming the fuzzing executables can be found in
+``build/debug``, the seed corpus can be generated thusly:
+
+.. code-block:: shell
+
+   $ ./build-support/fuzzing/generate_corpuses.sh build/debug
+
+Continuous fuzzing infrastructure
+=================================
+
+The process of fuzz testing is computationally intensive and therefore
+benefits from dedicated computing facilities.  Arrow C++ is exercised by
+the `OSS-Fuzz`_ continuous fuzzing infrastructure operated by Google.
+
+Issues found by OSS-Fuzz are notified and available to a limited set of
+`core developers <https://github.com/google/oss-fuzz/blob/master/projects/arrow/project.yaml>`_.
+If you are a Arrow core developer and want to be added to that list, you can
+ask on the :ref:`mailing-list <contributing>`.
+
+.. _OSS-Fuzz: https://google.github.io/oss-fuzz/
+
+Reproducing locally
+===================
+
+When a crash is found by fuzzing, it is often useful to download the data
+used to produce the crash, and use it to reproduce the crash so as to debug
+and investigate.
+
+Assuming you are in a subdirectory inside ``cpp``, the following command
+would allow you to build the fuzz targets with debug information and the
+various sanitizer checks enabled.
+
+.. code-block:: shell
+
+   $ cmake .. -GNinja \
+       -DCMAKE_BUILD_TYPE=Debug \
+       -DARROW_USE_ASAN=on \
+       -DARROW_USE_UBSAN=on \
+       -DARROW_FUZZING=on
+
+Then, assuming you have downloaded the crashing data file (let's call it
+``testcase-arrow-ipc-file-fuzz-123465``), you can reproduce the crash
+by running the affected fuzz target on that file:
+
+.. code-block:: shell
+
+   $ build/debug/arrow-ipc-file-fuzz testcase-arrow-ipc-file-fuzz-123465
+
+(you may want to run that command under a debugger so as to inspect the
+program state more closely)
diff --git a/src/arrow/docs/source/developers/cpp/index.rst b/src/arrow/docs/source/developers/cpp/index.rst
new file mode 100644
index 000000000..36c9778be
--- /dev/null
+++ b/src/arrow/docs/source/developers/cpp/index.rst
@@ -0,0 +1,31 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _cpp-development:
+
+***************
+C++ Development
+***************
+
+.. toctree::
+   :maxdepth: 2
+
+   building
+   development
+   windows
+   conventions
+   fuzzing
diff --git a/src/arrow/docs/source/developers/cpp/windows.rst b/src/arrow/docs/source/developers/cpp/windows.rst
new file mode 100644
index 000000000..ee5a613bc
--- /dev/null
+++ b/src/arrow/docs/source/developers/cpp/windows.rst
@@ -0,0 +1,412 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _developers-cpp-windows:
+
+=====================
+Developing on Windows
+=====================
+
+Like Linux and macOS, we have worked to enable builds to work "out of the box"
+with CMake for a reasonably large subset of the project.
+
+.. _windows-system-setup:
+
+System Setup
+============
+
+Microsoft provides the free Visual Studio Community edition. When doing
+development in the shell, you must initialize the development environment
+each time you open the shell.
+
+For Visual Studio 2017, execute the following batch script:
+
+.. code-block:: shell
+
+   "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\Common7\Tools\VsDevCmd.bat" -arch=amd64
+
+For Visual Studio 2019, the script is:
+
+.. code-block:: shell
+
+  "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\Common7\Tools\VsDevCmd.bat" -arch=amd64
+
+One can configure a console emulator like `cmder <https://cmder.net/>`_ to
+automatically launch this when starting a new development console.
+
+Using conda-forge for build dependencies
+========================================
+
+`Miniconda <https://conda.io/miniconda.html>`_ is a minimal Python distribution
+including the `conda <https://conda.io>`_ package manager. Some memers of the
+Apache Arrow community participate in the maintenance of `conda-forge
+<https://conda-forge.org/>`_, a community-maintained cross-platform package
+repository for conda.
+
+To use ``conda-forge`` for your C++ build dependencies on Windows, first
+download and install a 64-bit distribution from the `Miniconda homepage
+<https://conda.io/miniconda.html>`_
+
+To configure ``conda`` to use the ``conda-forge`` channel by default, launch a
+command prompt (``cmd.exe``), run the initialization command shown
+:ref:`above<windows-system-setup>` (``vcvarsall.bat`` or ``VsDevCmd.bat``), then
+run the command:
+
+.. code-block:: shell
+
+   conda config --add channels conda-forge
+
+Now, you can bootstrap a build environment (call from the root directory of the
+Arrow codebase):
+
+.. code-block:: shell
+
+   conda create -y -n arrow-dev --file=ci\conda_env_cpp.txt
+
+Then "activate" this conda environment with:
+
+.. code-block:: shell
+
+   activate arrow-dev
+
+If the environment has been activated, the Arrow build system will
+automatically see the ``%CONDA_PREFIX%`` environment variable and use that for
+resolving the build dependencies. This is equivalent to setting
+
+.. code-block:: shell
+
+   -DARROW_DEPENDENCY_SOURCE=SYSTEM ^
+   -DARROW_PACKAGE_PREFIX=%CONDA_PREFIX%\Library
+
+To use the Visual Studio IDE with this conda environment activated, launch it by
+running the command ``devenv`` from the same command prompt.
+
+Note that dependencies installed as conda packages are built in release mode and
+cannot link with debug builds. If you intend to use ``-DCMAKE_BUILD_TYPE=debug``
+then you must build the packages from source.
+``-DCMAKE_BUILD_TYPE=relwithdebinfo`` is also available, which produces a build
+that can both be linked with release libraries and be debugged.
+
+.. note::
+
+   If you run into any problems using conda packages for dependencies, a very
+   common problem is mixing packages from the ``defaults`` channel with those
+   from ``conda-forge``. You can examine the installed packages in your
+   environment (and their origin) with ``conda list``
+
+Using vcpkg for build dependencies
+========================================
+
+`vcpkg <https://github.com/microsoft/vcpkg>`_ is an open source package manager
+from Microsoft. It hosts community-contributed ports of C and C++ packages and
+their dependencies. Arrow includes a manifest file `cpp/vcpkg.json
+<https://github.com/apache/arrow/blob/master/cpp/vcpkg.json>`_ that specifies
+which vcpkg packages are required to build the C++ library.
+
+To use vcpkg for C++ build dependencies on Windows, first
+`install <https://docs.microsoft.com/en-us/cpp/build/install-vcpkg>`_ and
+`integrate <https://docs.microsoft.com/en-us/cpp/build/integrate-vcpkg>`_
+vcpkg. Then change working directory in ``cmd.exe`` to the root directory
+of Arrow and run the command:
+
+.. code-block:: shell
+
+   vcpkg install ^
+     --triplet x64-windows ^
+     --x-manifest-root cpp  ^
+     --feature-flags=versions ^
+     --clean-after-build
+
+On Windows, vcpkg builds dynamic link libraries by default. Use the triplet
+``x64-windows-static`` to build static libraries. vcpkg downloads source
+packages and compiles them locally, so installing dependencies with vcpkg is
+more time-consuming than with conda.
+
+Then in your ``cmake`` command, to use dependencies installed by vcpkg, set:
+
+.. code-block:: shell
+
+   -DARROW_DEPENDENCY_SOURCE=VCPKG
+
+You can optionally set other variables to override the default CMake
+configurations for vcpkg, including:
+
+* ``-DCMAKE_TOOLCHAIN_FILE``: by default, the CMake scripts automatically find
+  the location of the vcpkg CMake toolchain file ``vcpkg.cmake``; use this to
+  instead specify its location
+* ``-DVCPKG_TARGET_TRIPLET``: by default, the CMake scripts attempt to infer the
+  vcpkg
+  `triplet <https://github.com/microsoft/vcpkg/blob/master/docs/users/triplets.md>`_;
+  use this to instead specify the triplet
+* ``-DARROW_DEPENDENCY_USE_SHARED``: default is ``ON``; set to ``OFF`` for
+  static libraries
+* ``-DVCPKG_MANIFEST_MODE``: default is ``ON``; set to ``OFF`` to ignore the
+  ``vcpkg.json`` manifest file and only look for vcpkg packages that are
+  already installed under the directory where vcpkg is installed
+
+
+Building using Visual Studio (MSVC) Solution Files
+==================================================
+
+Change working directory in ``cmd.exe`` to the root directory of Arrow and do
+an out of source build by generating a MSVC solution:
+
+.. code-block:: shell
+
+   cd cpp
+   mkdir build
+   cd build
+   cmake .. -G "Visual Studio 15 2017" -A x64 ^
+         -DARROW_BUILD_TESTS=ON
+   cmake --build . --config Release
+
+For newer versions of Visual Studio, specify the generator
+``Visual Studio 16 2019`` or see ``cmake --help`` for available
+generators.
+
+Building with Ninja and clcache
+===============================
+
+The `Ninja <https://ninja-build.org/>`_ build system offers better build
+parallelization, and the optional `clcache
+<https://github.com/frerich/clcache/>`_ compiler cache keeps track of
+past compilations to avoid running them over and over again (in a way similar
+to the Unix-specific ``ccache``).
+
+Newer versions of Visual Studio include Ninja. To see if your Visual Studio
+includes Ninja, run the initialization command shown
+:ref:`above<windows-system-setup>` (``vcvarsall.bat`` or ``VsDevCmd.bat``), then
+run ``ninja --version``.
+
+If Ninja is not included in your version of Visual Studio, and you are using
+conda, activate your conda environment and install Ninja and clcache:
+
+.. code-block:: shell
+
+   activate arrow-dev
+   conda install -c conda-forge ninja
+   pip install git+https://github.com/frerich/clcache.git
+
+If you are not using conda,
+`install Ninja from another source <https://github.com/ninja-build/ninja/wiki/Pre-built-Ninja-packages>`_
+and optionally
+`install clcache from another source <https://github.com/frerich/clcache/wiki/Installation>`_
+.
+
+After installation is complete, change working directory in ``cmd.exe`` to the root directory of Arrow and
+do an out of source build by generating Ninja files:
+
+.. code-block:: shell
+
+   cd cpp
+   mkdir build
+   cd build
+   cmake -G "Ninja" ^
+         -DCMAKE_C_COMPILER=clcache ^
+         -DCMAKE_CXX_COMPILER=clcache ^
+         -DARROW_BUILD_TESTS=ON ^
+         -DGTest_SOURCE=BUNDLED ..
+   cmake --build . --config Release
+
+Setting ``CMAKE_C_COMPILER`` and ``CMAKE_CXX_COMPILER`` in the command line
+of ``cmake`` is the preferred method of using ``clcache``. Alternatively, you
+can set ``CC`` and ``CXX`` environment variables before calling ``cmake``:
+
+.. code-block:: shell
+
+   ...
+   set CC=clcache
+   set CXX=clcache
+   cmake -G "Ninja" ^
+   ...
+
+
+
+Building with NMake
+===================
+
+Change working directory in ``cmd.exe`` to the root directory of Arrow and
+do an out of source build using ``nmake``:
+
+.. code-block:: shell
+
+   cd cpp
+   mkdir build
+   cd build
+   cmake -G "NMake Makefiles" ..
+   nmake
+
+Building on MSYS2
+=================
+
+You can build on MSYS2 terminal, ``cmd.exe`` or PowerShell terminal.
+
+On MSYS2 terminal:
+
+.. code-block:: shell
+
+   cd cpp
+   mkdir build
+   cd build
+   cmake -G "MSYS Makefiles" ..
+   make
+
+On ``cmd.exe`` or PowerShell terminal, you can use the following batch
+file:
+
+.. code-block:: batch
+
+   setlocal
+
+   REM For 64bit
+   set MINGW_PACKAGE_PREFIX=mingw-w64-x86_64
+   set MINGW_PREFIX=c:\msys64\mingw64
+   set MSYSTEM=MINGW64
+
+   set PATH=%MINGW_PREFIX%\bin;c:\msys64\usr\bin;%PATH%
+
+   rmdir /S /Q cpp\build
+   mkdir cpp\build
+   pushd cpp\build
+   cmake -G "MSYS Makefiles" .. || exit /B
+   make || exit /B
+   popd
+
+Debug builds
+============
+
+To build a Debug version of Arrow, you should have pre-installed a Debug
+version of Boost. It's recommended to configure ``cmake`` with the following
+variables for Debug build:
+
+* ``-DARROW_BOOST_USE_SHARED=OFF``: enables static linking with boost debug
+  libs and simplifies run-time loading of 3rd parties
+* ``-DBOOST_ROOT``: sets the root directory of boost libs. (Optional)
+* ``-DBOOST_LIBRARYDIR``: sets the directory with boost lib files. (Optional)
+
+The command line to build Arrow in Debug mode will look something like this:
+
+.. code-block:: shell
+
+   cd cpp
+   mkdir build
+   cd build
+   cmake .. -G "Visual Studio 15 2017" -A x64 ^
+         -DARROW_BOOST_USE_SHARED=OFF ^
+         -DCMAKE_BUILD_TYPE=Debug ^
+         -DBOOST_ROOT=C:/local/boost_1_63_0  ^
+         -DBOOST_LIBRARYDIR=C:/local/boost_1_63_0/lib64-msvc-14.0
+   cmake --build . --config Debug
+
+Windows dependency resolution issues
+====================================
+
+Because Windows uses ``.lib`` files for both static and dynamic linking of
+dependencies, the static library sometimes may be named something different
+like ``%PACKAGE%_static.lib`` to distinguish itself. If you are statically
+linking some dependencies, we provide some options
+
+* ``-DBROTLI_MSVC_STATIC_LIB_SUFFIX=%BROTLI_SUFFIX%``
+* ``-DSNAPPY_MSVC_STATIC_LIB_SUFFIX=%SNAPPY_SUFFIX%``
+* ``-LZ4_MSVC_STATIC_LIB_SUFFIX=%LZ4_SUFFIX%``
+* ``-ZSTD_MSVC_STATIC_LIB_SUFFIX=%ZSTD_SUFFIX%``
+
+To get the latest build instructions, you can reference `ci/appveyor-built.bat
+<https://github.com/apache/arrow/blob/master/ci/appveyor-cpp-build.bat>`_,
+which is used by automated Appveyor builds.
+
+Statically linking to Arrow on Windows
+======================================
+
+The Arrow headers on Windows static library builds (enabled by the CMake
+option ``ARROW_BUILD_STATIC``) use the preprocessor macro ``ARROW_STATIC`` to
+suppress dllimport/dllexport marking of symbols. Projects that statically link
+against Arrow on Windows additionally need this definition. The Unix builds do
+not use the macro.
+
+Replicating Appveyor Builds
+===========================
+
+For people more familiar with linux development but need to replicate a failing
+appveyor build, here are some rough notes from replicating the
+``Static_Crt_Build`` (make unittest will probably still fail but many unit
+tests can be made with there individual make targets).
+
+1. Microsoft offers trial VMs for `Windows with Microsoft Visual Studio
+   <https://developer.microsoft.com/en-us/windows/downloads/virtual-machines>`_.
+   Download and install a version.
+2. Run the VM and install `Git <https://git-scm.com/>`_, `CMake
+   <https://cmake.org/>`_, and Miniconda or Anaconda (these instructions assume
+   Anaconda). Also install the `"Build Tools for Visual Studio"
+   <https://visualstudio.microsoft.com/downloads/#build-tools-for-visual-studio-2019>`_.
+   Make sure to select the C++ toolchain in the installer wizard, and reboot
+   after installation.
+3. Download `pre-built Boost debug binaries
+   <https://sourceforge.net/projects/boost/files/boost-binaries/>`_ and install
+   it.
+
+   Run this from an Anaconda/Miniconda command prompt (*not* PowerShell prompt),
+   and make sure to run "vcvarsall.bat x64" first. The location of vcvarsall.bat
+   will depend, it may be under a different path than commonly indicated,
+   e.g. "``C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat``"
+   with the 2019 build tools.
+
+.. code-block:: shell
+
+   cd $EXTRACT_BOOST_DIRECTORY
+   .\bootstrap.bat
+   @rem This is for static libraries needed for static_crt_build in appveyor
+   .\b2 link=static --with-filesystem --with-regex --with-system install
+   @rem this should put libraries and headers in c:\Boost
+
+4. Activate anaconda/miniconda:
+
+.. code-block:: shell
+
+   @rem this might differ for miniconda
+   C:\Users\User\Anaconda3\Scripts\activate
+
+5. Clone and change directories to the arrow source code (you might need to
+   install git).
+6. Setup environment variables:
+
+.. code-block:: shell
+
+   @rem Change the build type based on which appveyor job you want.
+   SET JOB=Static_Crt_Build
+   SET GENERATOR=Ninja
+   SET APPVEYOR_BUILD_WORKER_IMAGE=Visual Studio 2017
+   SET USE_CLCACHE=false
+   SET ARROW_BUILD_GANDIVA=OFF
+   SET ARROW_LLVM_VERSION=8.0.*
+   SET PYTHON=3.6
+   SET ARCH=64
+   SET PATH=C:\Users\User\Anaconda3;C:\Users\User\Anaconda3\Scripts;C:\Users\User\Anaconda3\Library\bin;%PATH%
+   SET BOOST_LIBRARYDIR=C:\Boost\lib
+   SET BOOST_ROOT=C:\Boost
+
+7. Run appveyor scripts:
+
+.. code-block:: shell
+
+   conda install -c conda-forge --file .\ci\conda_env_cpp.txt
+   .\ci\appveyor-cpp-setup.bat
+   @rem this might fail but at this point most unit tests should be buildable by there individual targets
+   @rem see next line for example.
+   .\ci\appveyor-cpp-build.bat
+   @rem you can also just invoke cmake directly with the desired options
+   cmake --build . --config Release --target arrow-compute-hash-test
diff --git a/src/arrow/docs/source/developers/crossbow.rst b/src/arrow/docs/source/developers/crossbow.rst
new file mode 100644
index 000000000..cb49a2446
--- /dev/null
+++ b/src/arrow/docs/source/developers/crossbow.rst
@@ -0,0 +1,258 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Packaging and Testing with Crossbow
+===================================
+
+The content of ``arrow/dev/tasks`` directory aims for automating the process of
+Arrow packaging and integration testing.
+
+Packages:
+  - C++ and Python `conda-forge packages`_ for Linux, Mac and Windows
+  - Python `Wheels`_ for Linux, Mac and Windows
+  - C++ and GLib `Linux packages`_ for multiple distributions
+  - Java for Gandiva
+
+Integration tests:
+  - Various docker tests
+  - Pandas
+  - Dask
+  - Turbodbc
+  - HDFS
+  - Spark
+
+Architecture
+------------
+
+Executors
+~~~~~~~~~
+
+Individual jobs are executed on public CI services, currently:
+
+- Linux: TravisCI, CircleCI, Azure Pipelines
+- Mac: TravisCI, Azure Pipelines
+- Windows: AppVeyor, Azure Pipelines
+
+Queue
+~~~~~
+
+Because of the nature of how the CI services work, the scheduling of
+jobs happens through an additional git repository, which acts like a job
+queue for the tasks. Anyone can host a ``queue`` repository which is usually
+called as ``crossbow``.
+
+A job is a git commit on a particular git branch, containing only the required
+configuration file to run the requested build (like ``.travis.yml``,
+``appveyor.yml`` or ``azure-pipelines.yml``).
+
+Scheduler
+~~~~~~~~~
+
+Crossbow handles version generation, task rendering and
+submission. The tasks are defined in ``tasks.yml``.
+
+Install
+-------
+
+The following guide depends on GitHub, but theoretically any git
+server can be used.
+
+If you are not using the `ursacomputing/crossbow <https://github.com/ursacomputing/crossbow>`_
+repository, you will need to complete the first two steps, otherwise procede
+to step 3:
+
+1. `Create the queue repository`_
+
+2. Enable `TravisCI`_, `Appveyor`_, `Azure Pipelines`_ and `CircleCI`_
+   integrations on for the newly created queue repository.
+
+   -  turn off Travis’ `auto cancellation`_ feature on branches
+
+3. Clone either ursacomputing/crossbow if you are using that, or the newly
+   created repository next to the arrow repository:
+
+   By default the scripts looks for ``crossbow`` next to arrow repository, but
+   this can configured through command line arguments.
+
+   .. code:: bash
+
+      git clone https://github.com/<user>/crossbow crossbow
+
+   **Important note:** Crossbow only supports GitHub token based
+   authentication. Although it overwrites the repository urls provided with ssh
+   protocol, it's advisable to use the HTTPS repository URLs.
+
+4. `Create a Personal Access Token`_ with ``repo`` and ``workflow`` permissions (other
+   permissions are not needed)
+
+5. Locally export the token as an environment variable:
+
+   .. code:: bash
+
+      export CROSSBOW_GITHUB_TOKEN=<token>
+
+   or pass as an argument to the CLI script ``--github-token``
+
+6. Export the previously created GitHub token on both CI services:
+
+   Use ``CROSSBOW_GITHUB_TOKEN`` encrypted environment variable. You can
+   set them at the following URLs, where ``ghuser`` is the GitHub
+   username and ``ghrepo`` is the GitHub repository name (typically
+   ``crossbow``):
+
+   -  TravisCI: ``https://travis-ci.org/<ghuser>/<ghrepo>/settings``
+   -  Appveyor:
+      ``https://ci.appveyor.com/project/<ghuser>/<ghrepo>/settings/environment``
+   -  CircleCI:
+      ``https://circleci.com/gh/<ghuser>/<ghrepo>/edit#env-vars``
+
+   On Appveyor check the ``skip branches without appveyor.yml`` checkbox
+   on the web UI under crossbow repository’s settings.
+
+7. Install Python (minimum supported version is 3.6):
+
+   Miniconda is preferred, see installation instructions:
+   https://conda.io/docs/user-guide/install/index.html
+
+8. Install the archery toolset containing crossbow itself:
+
+   .. code:: bash
+
+      pip install -e "arrow/dev/archery[crossbow]"
+
+9. Try running it:
+
+   .. code:: bash
+
+      $ archery crossbow --help
+
+Usage
+-----
+
+The script does the following:
+
+1. Detects the current repository, thus supports forks. The following
+   snippet will build kszucs’s fork instead of the upstream apache/arrow
+   repository.
+
+   .. code:: bash
+
+      $ git clone https://github.com/kszucs/arrow
+      $ git clone https://github.com/kszucs/crossbow
+
+      $ cd arrow/dev/tasks
+      $ archery crossbow submit --help  # show the available options
+      $ archery crossbow submit conda-win conda-linux conda-osx
+
+2. Gets the HEAD commit of the currently checked out branch and
+   generates the version number based on `setuptools_scm`_. So to build
+   a particular branch check out before running the script:
+
+   .. code:: bash
+
+      git checkout ARROW-<ticket number>
+      archery crossbow submit --dry-run conda-linux conda-osx
+
+   Note that the arrow branch must be pushed beforehand, because the
+   script will clone the selected branch.
+
+3. Reads and renders the required build configurations with the
+   parameters substituted.
+
+4. Create a branch per task, prefixed with the job id. For example to
+   build conda recipes on linux it will create a new branch:
+   ``crossbow@build-<id>-conda-linux``.
+
+5. Pushes the modified branches to GitHub which triggers the builds. For
+   authentication it uses GitHub OAuth tokens described in the install
+   section.
+
+Query the build status
+~~~~~~~~~~~~~~~~~~~~~~
+
+Build id (which has a corresponding branch in the queue repository) is returned
+by the ``submit`` command.
+
+.. code:: bash
+
+   archery crossbow status <build id / branch name>
+
+Download the build artifacts
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code:: bash
+
+   archery crossbow artifacts <build id / branch name>
+
+Examples
+~~~~~~~~
+
+Submit command accepts a list of task names and/or a list of task-group names
+to select which tasks to build.
+
+Run multiple builds:
+
+.. code:: bash
+
+   $ archery crossbow submit debian-stretch conda-linux-gcc-py37-r40
+   Repository: https://github.com/kszucs/arrow@tasks
+   Commit SHA: 810a718836bb3a8cefc053055600bdcc440e6702
+   Version: 0.9.1.dev48+g810a7188.d20180414
+   Pushed branches:
+    - debian-stretch
+    - conda-linux-gcc-py37-r40
+
+Just render without applying or committing the changes:
+
+.. code:: bash
+
+   $ archery crossbow submit --dry-run task_name
+
+Run only ``conda`` package builds and a Linux one:
+
+.. code:: bash
+
+   $ archery crossbow submit --group conda centos-7
+
+Run ``wheel`` builds:
+
+.. code:: bash
+
+   $ archery crossbow submit --group wheel
+
+There are multiple task groups in the ``tasks.yml`` like docker, integration
+and cpp-python for running docker based tests.
+
+``archery crossbow submit`` supports multiple options and arguments, for more
+see its help page:
+
+.. code:: bash
+
+  $ archery crossbow submit --help
+
+
+.. _conda-forge packages: conda-recipes
+.. _Wheels: python-wheels
+.. _Linux packages: linux-packages
+.. _Create the queue repository: https://help.github.com/articles/creating-a-new-repository
+.. _TravisCI: https://travis-ci.org/getting_started
+.. _Appveyor: https://www.appveyor.com/docs/
+.. _CircleCI: https://circleci.com/docs/2.0/getting-started/
+.. _Azure Pipelines: https://docs.microsoft.com/en-us/azure/devops/pipelines/get-started/pipelines-sign-up
+.. _auto cancellation: https://docs.travis-ci.com/user/customizing-the-build/#Building-only-the-latest-commit
+.. _Create a Personal Access Token: https://help.github.com/articles/creating-a-personal-access-token-for-the-command-line/
+.. _setuptools_scm: https://pypi.python.org/pypi/setuptools_scm
diff --git a/src/arrow/docs/source/developers/docker.rst b/src/arrow/docs/source/developers/docker.rst
new file mode 100644
index 000000000..36b468752
--- /dev/null
+++ b/src/arrow/docs/source/developers/docker.rst
@@ -0,0 +1,226 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _docker-builds:
+
+Running Docker Builds
+=====================
+
+Most of our Linux based Continuous Integration tasks are decoupled from public
+CI services using `Docker <https://docs.docker.com/>`_ and
+`docker-compose <https://docs.docker.com/compose/>`_.  Keeping the CI configuration
+minimal makes local reproducibility possible.
+
+Usage
+-----
+
+There are multiple ways to execute the docker based builds.
+The recommended way is to use the :ref:`Archery <archery>` tool:
+
+Examples
+~~~~~~~~
+
+**List the available images:**
+
+.. code:: bash
+
+    archery docker images
+
+**Execute a build:**
+
+.. code:: bash
+
+    archery docker run conda-python
+
+Archery calls the following docker-compose commands:
+
+.. code:: bash
+
+    docker-compose pull --ignore-pull-failures conda-cpp
+    docker-compose pull --ignore-pull-failures conda-python
+    docker-compose build conda-cpp
+    docker-compose build conda-python
+    docker-compose run --rm conda-python
+
+**Show the docker-compose commands instead of executing them:**
+
+.. code:: bash
+
+    archery docker run --dry-run conda-python
+
+**To disable the image pulling:**
+
+.. code:: bash
+
+    archery docker run --no-cache conda-python
+
+Which translates to:
+
+.. code:: bash
+
+    docker-compose build --no-cache conda-cpp
+    docker-compose build --no-cache conda-python
+    docker-compose run --rm conda-python
+
+**To disable the cache only for the leaf image:**
+
+Useful to force building the development version of a dependency.
+In case of the example below the command builds the
+``conda-cpp > conda-python > conda-python-pandas`` branch of the image tree
+where the leaf image is ``conda-python-pandas``.
+
+.. code:: bash
+
+    PANDAS=master archery docker run --no-leaf-cache conda-python-pandas
+
+Which translates to:
+
+.. code:: bash
+
+    export PANDAS=master
+    docker-compose pull --ignore-pull-failures conda-cpp
+    docker-compose pull --ignore-pull-failures conda-python
+    docker-compose build conda-cpp
+    docker-compose build conda-python
+    docker-compose build --no-cache conda-python-pandas
+    docker-compose run --rm conda-python-pandas
+
+Note that it doesn't pull the conda-python-pandas image and disable the cache
+when building it.
+
+``PANDAS`` is a `build parameter <Docker Build Parameters>`_, see the
+defaults in the .env file.
+
+**To entirely skip building the image:**
+
+The layer-caching mechanism of docker-compose can be less reliable than
+docker's, depending on the version, the ``cache_from`` build entry, and the
+backend used (docker-py, docker-cli, docker-cli and buildkit). This can lead to
+different layer hashes - even when executing the same build command
+repeatedly - eventually causing cache misses full image rebuilds.
+
+*If the image has been already built but the cache doesn't work properly*, it
+can be useful to skip the build phases:
+
+.. code:: bash
+
+    # first run ensures that the image is built
+    archery docker run conda-python
+
+    # if the second run tries the build the image again and none of the files
+    # referenced in the relevant dockerfile have changed, then it indicates a
+    # cache miss caused by the issue described above
+    archery docker run conda-python
+
+    # since the image is properly built with the first command, there is no
+    # need to rebuild it, so manually disable the pull and build phases to
+    # spare the some time
+    archery docker run --no-pull --no-build conda-python
+
+**Pass environment variables to the container:**
+
+Most of the build scripts used within the containers can be configured through
+environment variables. Pass them using ``--env`` or ``-e`` CLI options -
+similar to the ``docker run`` and ``docker-compose run`` interface.
+
+.. code:: bash
+
+    archery docker run --env CMAKE_BUILD_TYPE=release ubuntu-cpp
+
+For the available environment variables in the C++ builds see the
+``ci/scripts/cpp_build.sh`` script.
+
+**Run the image with custom command:**
+
+Custom docker commands may be passed as the second argument to
+``archery docker run``.
+
+The following example starts an interactive ``bash`` session in the container
+- useful for debugging the build interactively:
+
+.. code:: bash
+
+    archery docker run ubuntu-cpp bash
+
+Docker Volume Caches
+~~~~~~~~~~~~~~~~~~~~
+
+Most of the compose container have specific directories mounted from the host
+to reuse ``ccache`` and ``maven`` artifacts. These docker volumes are placed
+in the ``.docker`` directory.
+
+In order to clean up the cache simply delete one or more directories (or the
+whole ``.docker`` directory).
+
+
+Development
+-----------
+
+The docker-compose configuration is tuned towards reusable development
+containers using hierarchical images. For example multiple language bindings
+are dependent on the C++ implementation, so instead of redefining the
+C++ environment multiple Dockerfiles, we can reuse the exact same base C++
+image when building Glib, Ruby, R and Python bindings.
+This reduces duplication and streamlines maintenance, but makes the
+docker-compose configuration more complicated.
+
+Docker Build Parameters
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The build time parameters are pushed down to the dockerfiles to make the
+image building more flexible. These parameters are usually called as docker
+build args, but we pass these values as environment variables to
+docker-compose.yml. The build parameters are extensively used for:
+
+- defining the docker registry used for caching
+- platform architectures
+- operation systems and versions
+- defining various versions if dependencies
+
+The default parameter values are stored in the top level .env file.
+For detailed examples see the docker-compose.yml.
+
+Build Scripts
+~~~~~~~~~~~~~
+
+The scripts maintainted under ci/scripts directory should be kept
+parametrizable but reasonably minimal to clearly encapsulate the tasks it is
+responsible for. Like:
+
+- ``cpp_build.sh``: build the C++ implementation without running the tests.
+- ``cpp_test.sh``: execute the C++ tests.
+- ``python_build.sh``: build the Python bindings without running the tests.
+- ``python_test.sh``: execute the python tests.
+- ``docs_build.sh``: build the Sphinx documentation.
+- ``integration_dask.sh``: execute the dask integration tests.
+- ``integration_pandas.sh``: execute the pandas integration tests.
+- ``install_minio.sh``: install minio server for multiple platforms.
+- ``install_conda.sh``: install miniconda for multiple platforms.
+- ``install_gcs_testbench.sh``: install the GCS testbench for multiple platforms.
+
+The parametrization (like the C++ CMake options) is achieved via environment
+variables with useful defaults to keep the build configurations declarative.
+
+A good example is ``cpp_build.sh`` build script which forwards environment
+variables as CMake options - so the same scripts can be invoked in various
+configurations without the necessity of changing it. For examples see how the
+environment variables are passed in the docker-compose.yml's C++ images.
+
+Adding New Images
+~~~~~~~~~~~~~~~~~
+
+See the inline comments available in the docker-compose.yml file.
diff --git a/src/arrow/docs/source/developers/documentation.rst b/src/arrow/docs/source/developers/documentation.rst
new file mode 100644
index 000000000..813cc9cbd
--- /dev/null
+++ b/src/arrow/docs/source/developers/documentation.rst
@@ -0,0 +1,103 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _building-docs:
+
+Building the Documentation
+==========================
+
+Prerequisites
+-------------
+
+The documentation build process uses `Doxygen <http://www.doxygen.nl/>`_ and
+`Sphinx <http://www.sphinx-doc.org/>`_ along with a few extensions.
+
+If you're using Conda, the required software can be installed in a single line:
+
+.. code-block:: shell
+
+   conda install -c conda-forge --file=ci/conda_env_sphinx.txt
+
+Otherwise, you'll first need to install `Doxygen <http://www.doxygen.nl/>`_
+yourself (for example from your distribution's official repositories, if
+using Linux).  Then you can install the Python-based requirements with the
+following command:
+
+.. code-block:: shell
+
+   pip install -r docs/requirements.txt
+
+Building
+--------
+
+.. note::
+
+   If you are building the documentation on Windows, not all sections
+   may build properly.
+
+These two steps are mandatory and must be executed in order.
+
+#. Process the C++ API using Doxygen
+
+   .. code-block:: shell
+
+      pushd cpp/apidoc
+      doxygen
+      popd
+
+#. Build the complete documentation using Sphinx.
+
+   .. note::
+
+      This step requires the pyarrow library is installed
+      in your python environment.  One way to accomplish
+      this is to follow the build instructions at :ref:`python-development`
+      and then run ``python setup.py install`` in arrow/python
+      (it is best to do this in a dedicated conda/virtual environment).
+
+   .. code-block:: shell
+
+      pushd docs
+      make html
+      popd
+
+.. note::
+
+   Note that building the documentation may fail if your build of pyarrow is
+   not sufficiently comprehensive. Portions of the Python API documentation
+   will also not build without CUDA support having been built.
+
+After these steps are completed, the documentation is rendered in HTML
+format in ``docs/_build/html``.  In particular, you can point your browser
+at ``docs/_build/html/index.html`` to read the docs and review any changes
+you made.
+
+Building with Docker
+--------------------
+
+You can use :ref:`Archery <archery>` to build the documentation within a
+Docker container.
+
+.. code-block:: shell
+
+  archery docker run ubuntu-docs
+
+The final output is located under ``docs/_build/html``.
+
+.. seealso::
+
+   :ref:`docker-builds`.
diff --git a/src/arrow/docs/source/developers/experimental_repos.rst b/src/arrow/docs/source/developers/experimental_repos.rst
new file mode 100644
index 000000000..f13adba2b
--- /dev/null
+++ b/src/arrow/docs/source/developers/experimental_repos.rst
@@ -0,0 +1,65 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Experimental repositories
+=========================
+
+Apache Arrow has an explicit policy over developing experimental repositories
+in the context of
+`rules for revolutionaries <https://grep.codeconsult.ch/2020/04/07/rules-for-revolutionaries-2000-edition/>`_.
+
+The main motivation for this policy is to offer a lightweight mechanism to
+conduct experimental work, with the necessary creative freedom, within the ASF
+and the Apache Arrow governance model. This policy allows committers to work on
+new repositories, as they offer many important tools to manage it (e.g. github
+issues, “watch”, “github stars” to measure overall interest).
+
+Process
++++++++
+
+* A committer *may* initiate experimental work by creating a separate git
+  repository within the Apache Arrow (e.g. via `selfserve <https://selfserve.apache.org/>`_)
+  and announcing it on the mailing list, together with its goals, and a link to the
+  newly created repository.
+* The committer *must* initiate an email thread with the sole purpose of
+  presenting updates to the community about the status of the repo.
+* There *must not* be official releases from the repository.
+* Any decision to make the experimental repo official in any way, whether by merging or migrating, *must* be discussed and voted on in the mailing list.
+* The committer is responsible for managing issues, documentation, CI of the repository,
+  including licensing checks.
+* The committer decides when the repository is archived.
+
+Repository management
++++++++++++++++++++++
+
+* The repository *must* be under ``apache/``
+* The repository’s name *must* be prefixed by ``arrow-experimental-``
+* The committer has full permissions over the repository (within possible in ASF)
+* Push / merge permissions *must only* be granted to Apache Arrow committers
+
+Development process
++++++++++++++++++++
+
+* The repository must follow the ASF requirements about 3rd party code.
+* The committer decides how to manage issues, PRs, etc.
+
+Divergences
++++++++++++
+
+* If any of the “must” above fails to materialize and no correction measure
+  is taken by the committer upon request, the PMC *should* take ownership
+  and decide what to do.
diff --git a/src/arrow/docs/source/developers/python.rst b/src/arrow/docs/source/developers/python.rst
new file mode 100644
index 000000000..3795512ef
--- /dev/null
+++ b/src/arrow/docs/source/developers/python.rst
@@ -0,0 +1,565 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+.. _python-development:
+
+==================
+Python Development
+==================
+
+This page provides general Python development guidelines and source build
+instructions for all platforms.
+
+Coding Style
+============
+
+We follow a similar PEP8-like coding style to the `pandas project
+<https://github.com/pandas-dev/pandas>`_.  To check style issues, use the
+:ref:`Archery <archery>` subcommand ``lint``:
+
+.. code-block:: shell
+
+   pip install -e arrow/dev/archery[lint]
+
+.. code-block:: shell
+
+   archery lint --python
+
+Some of the issues can be automatically fixed by passing the ``--fix`` option:
+
+.. code-block:: shell
+
+   archery lint --python --fix
+
+Unit Testing
+============
+
+We are using `pytest <https://docs.pytest.org/en/latest/>`_ to develop our unit
+test suite. After building the project (see below) you can run its unit tests
+like so:
+
+.. code-block:: shell
+
+   pytest pyarrow
+
+Package requirements to run the unit tests are found in
+``requirements-test.txt`` and can be installed if needed with ``pip install -r
+requirements-test.txt``.
+
+The project has a number of custom command line options for its test
+suite. Some tests are disabled by default, for example. To see all the options,
+run
+
+.. code-block:: shell
+
+   pytest pyarrow --help
+
+and look for the "custom options" section.
+
+Test Groups
+-----------
+
+We have many tests that are grouped together using pytest marks. Some of these
+are disabled by default. To enable a test group, pass ``--$GROUP_NAME``,
+e.g. ``--parquet``. To disable a test group, prepend ``disable``, so
+``--disable-parquet`` for example. To run **only** the unit tests for a
+particular group, prepend ``only-`` instead, for example ``--only-parquet``.
+
+The test groups currently include:
+
+* ``gandiva``: tests for Gandiva expression compiler (uses LLVM)
+* ``hdfs``: tests that use libhdfs or libhdfs3 to access the Hadoop filesystem
+* ``hypothesis``: tests that use the ``hypothesis`` module for generating
+  random test cases. Note that ``--hypothesis`` doesn't work due to a quirk
+  with pytest, so you have to pass ``--enable-hypothesis``
+* ``large_memory``: Test requiring a large amount of system RAM
+* ``orc``: Apache ORC tests
+* ``parquet``: Apache Parquet tests
+* ``plasma``: Plasma Object Store tests
+* ``s3``: Tests for Amazon S3
+* ``tensorflow``: Tests that involve TensorFlow
+* ``flight``: Flight RPC tests
+
+Benchmarking
+------------
+
+For running the benchmarks, see :ref:`python-benchmarks`.
+
+Building on Linux and MacOS
+=============================
+
+System Requirements
+-------------------
+
+On macOS, any modern XCode (6.4 or higher; the current version is 10) is
+sufficient.
+
+On Linux, for this guide, we require a minimum of gcc 4.8, or clang 3.7 or
+higher. You can check your version by running
+
+.. code-block:: shell
+
+   $ gcc --version
+
+If the system compiler is older than gcc 4.8, it can be set to a newer version
+using the ``$CC`` and ``$CXX`` environment variables:
+
+.. code-block:: shell
+
+   export CC=gcc-4.8
+   export CXX=g++-4.8
+
+Environment Setup and Build
+---------------------------
+
+First, let's clone the Arrow git repository:
+
+.. code-block:: shell
+
+   mkdir repos
+   cd repos
+   git clone https://github.com/apache/arrow.git
+
+You should now see
+
+.. code-block:: shell
+
+   $ ls -l
+   total 8
+   drwxrwxr-x 12 wesm wesm 4096 Apr 15 19:19 arrow/
+
+Pull in the test data and setup the environment variables:
+
+.. code-block:: shell
+
+   pushd arrow
+   git submodule init
+   git submodule update
+   export PARQUET_TEST_DATA="${PWD}/cpp/submodules/parquet-testing/data"
+   export ARROW_TEST_DATA="${PWD}/testing/data"
+   popd
+
+Using Conda
+~~~~~~~~~~~
+
+.. note::
+
+   Using conda to build Arrow on macOS is complicated by the
+   fact that the `conda-forge compilers require an older macOS SDK <https://stackoverflow.com/a/55798942>`_.
+   Conda offers some `installation instructions <https://docs.conda.io/projects/conda-build/en/latest/resources/compiler-tools.html#macos-sdk>`_;
+   the alternative would be to use :ref:`Homebrew <python-homebrew>` and
+   ``pip`` instead.
+
+Let's create a conda environment with all the C++ build and Python dependencies
+from conda-forge, targeting development for Python 3.7:
+
+On Linux and macOS:
+
+.. code-block:: shell
+
+    conda create -y -n pyarrow-dev -c conda-forge \
+        --file arrow/ci/conda_env_unix.txt \
+        --file arrow/ci/conda_env_cpp.txt \
+        --file arrow/ci/conda_env_python.txt \
+        --file arrow/ci/conda_env_gandiva.txt \
+        compilers \
+        python=3.7 \
+        pandas
+
+As of January 2019, the ``compilers`` package is needed on many Linux
+distributions to use packages from conda-forge.
+
+With this out of the way, you can now activate the conda environment
+
+.. code-block:: shell
+
+   conda activate pyarrow-dev
+
+For Windows, see the `Building on Windows`_ section below.
+
+We need to set some environment variables to let Arrow's build system know
+about our build toolchain:
+
+.. code-block:: shell
+
+   export ARROW_HOME=$CONDA_PREFIX
+
+Using pip
+~~~~~~~~~
+
+.. warning::
+
+   If you installed Python using the Anaconda distribution or `Miniconda
+   <https://conda.io/miniconda.html>`_, you cannot currently use ``virtualenv``
+   to manage your development. Please follow the conda-based development
+   instructions instead.
+
+.. _python-homebrew:
+
+On macOS, use Homebrew to install all dependencies required for
+building Arrow C++:
+
+.. code-block:: shell
+
+   brew update && brew bundle --file=arrow/cpp/Brewfile
+
+See :ref:`here <cpp-build-dependency-management>` for a list of dependencies you
+may need.
+
+On Debian/Ubuntu, you need the following minimal set of dependencies. All other
+dependencies will be automatically built by Arrow's third-party toolchain.
+
+.. code-block:: shell
+
+   $ sudo apt-get install libjemalloc-dev libboost-dev \
+                          libboost-filesystem-dev \
+                          libboost-system-dev \
+                          libboost-regex-dev \
+                          python-dev \
+                          autoconf \
+                          flex \
+                          bison
+
+If you are building Arrow for Python 3, install ``python3-dev`` instead of ``python-dev``.
+
+On Arch Linux, you can get these dependencies via pacman.
+
+.. code-block:: shell
+
+   $ sudo pacman -S jemalloc boost
+
+Now, let's create a Python virtualenv with all Python dependencies in the same
+folder as the repositories and a target installation folder:
+
+.. code-block:: shell
+
+   virtualenv pyarrow
+   source ./pyarrow/bin/activate
+   pip install -r arrow/python/requirements-build.txt \
+        -r arrow/python/requirements-test.txt
+
+   # This is the folder where we will install the Arrow libraries during
+   # development
+   mkdir dist
+
+If your cmake version is too old on Linux, you could get a newer one via
+``pip install cmake``.
+
+We need to set some environment variables to let Arrow's build system know
+about our build toolchain:
+
+.. code-block:: shell
+
+   export ARROW_HOME=$(pwd)/dist
+   export LD_LIBRARY_PATH=$(pwd)/dist/lib:$LD_LIBRARY_PATH
+
+Build and test
+--------------
+
+Now build and install the Arrow C++ libraries:
+
+.. code-block:: shell
+
+   mkdir arrow/cpp/build
+   pushd arrow/cpp/build
+
+   cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
+         -DCMAKE_INSTALL_LIBDIR=lib \
+         -DARROW_WITH_BZ2=ON \
+         -DARROW_WITH_ZLIB=ON \
+         -DARROW_WITH_ZSTD=ON \
+         -DARROW_WITH_LZ4=ON \
+         -DARROW_WITH_SNAPPY=ON \
+         -DARROW_WITH_BROTLI=ON \
+         -DARROW_PARQUET=ON \
+         -DARROW_PYTHON=ON \
+         -DARROW_BUILD_TESTS=ON \
+         ..
+   make -j4
+   make install
+   popd
+
+There are a number of optional components that can can be switched ON by
+adding flags with ``ON``:
+
+* ``ARROW_FLIGHT``: RPC framework
+* ``ARROW_GANDIVA``: LLVM-based expression compiler
+* ``ARROW_ORC``: Support for Apache ORC file format
+* ``ARROW_PARQUET``: Support for Apache Parquet file format
+* ``ARROW_PLASMA``: Shared memory object store
+
+Anything set to ``ON`` above can also be turned off. Note that some compression
+libraries are needed for Parquet support.
+
+If multiple versions of Python are installed in your environment, you may have
+to pass additional parameters to cmake so that it can find the right
+executable, headers and libraries.  For example, specifying
+``-DPython3_EXECUTABLE=$VIRTUAL_ENV/bin/python`` (assuming that you're in
+virtualenv) enables cmake to choose the python executable which you are using.
+
+.. note::
+
+   On Linux systems with support for building on multiple architectures,
+   ``make`` may install libraries in the ``lib64`` directory by default. For
+   this reason we recommend passing ``-DCMAKE_INSTALL_LIBDIR=lib`` because the
+   Python build scripts assume the library directory is ``lib``
+
+.. note::
+
+   If you have conda installed but are not using it to manage dependencies,
+   and you have trouble building the C++ library, you may need to set
+   ``-DARROW_DEPENDENCY_SOURCE=AUTO`` or some other value (described
+   :ref:`here <cpp-build-dependency-management>`)
+   to explicitly tell CMake not to use conda.
+
+.. note::
+
+   With older versions of ``cmake`` (<3.15) you might need to pass ``-DPYTHON_EXECUTABLE``
+   instead of ``-DPython3_EXECUTABLE``. See `cmake documentation <https://cmake.org/cmake/help/latest/module/FindPython3.html#artifacts-specification>`
+   for more details.
+
+For any other C++ build challenges, see :ref:`cpp-development`.
+
+Now, build pyarrow:
+
+.. code-block:: shell
+
+   pushd arrow/python
+   export PYARROW_WITH_PARQUET=1
+   python setup.py build_ext --inplace
+   popd
+
+If you did not build one of the optional components, set the corresponding
+``PYARROW_WITH_$COMPONENT`` environment variable to 0.
+
+Now you are ready to install test dependencies and run `Unit Testing`_, as
+described above.
+
+To build a self-contained wheel (including the Arrow and Parquet C++
+libraries), one can set ``--bundle-arrow-cpp``:
+
+.. code-block:: shell
+
+   pip install wheel  # if not installed
+   python setup.py build_ext --build-type=$ARROW_BUILD_TYPE \
+          --bundle-arrow-cpp bdist_wheel
+
+Docker examples
+~~~~~~~~~~~~~~~
+
+If you are having difficulty building the Python library from source, take a
+look at the ``python/examples/minimal_build`` directory which illustrates a
+complete build and test from source both with the conda and pip/virtualenv
+build methods.
+
+Building with CUDA support
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The :mod:`pyarrow.cuda` module offers support for using Arrow platform
+components with Nvidia's CUDA-enabled GPU devices. To build with this support,
+pass ``-DARROW_CUDA=ON`` when building the C++ libraries, and set the following
+environment variable when building pyarrow:
+
+.. code-block:: shell
+
+   export PYARROW_WITH_CUDA=1
+
+Debugging
+---------
+
+Since pyarrow depends on the Arrow C++ libraries, debugging can
+frequently involve crossing between Python and C++ shared libraries.
+
+Using gdb on Linux
+~~~~~~~~~~~~~~~~~~
+
+To debug the C++ libraries with gdb while running the Python unit
+   test, first start pytest with gdb:
+
+.. code-block:: shell
+
+   gdb --args python -m pytest pyarrow/tests/test_to_run.py -k $TEST_TO_MATCH
+
+To set a breakpoint, use the same gdb syntax that you would when
+debugging a C++ unittest, for example:
+
+.. code-block:: shell
+
+   (gdb) b src/arrow/python/arrow_to_pandas.cc:1874
+   No source file named src/arrow/python/arrow_to_pandas.cc.
+   Make breakpoint pending on future shared library load? (y or [n]) y
+   Breakpoint 1 (src/arrow/python/arrow_to_pandas.cc:1874) pending.
+
+Building on Windows
+===================
+
+Building on Windows requires one of the following compilers to be installed:
+
+- `Build Tools for Visual Studio 2017 <https://download.visualstudio.microsoft.com/download/pr/3e542575-929e-4297-b6c6-bef34d0ee648/639c868e1219c651793aff537a1d3b77/vs_buildtools.exe>`_
+- Visual Studio 2017
+
+During the setup of Build Tools ensure at least one Windows SDK is selected.
+
+Visual Studio 2019 and its build tools are currently not supported.
+
+We bootstrap a conda environment similar to above, but skipping some of the
+Linux/macOS-only packages:
+
+First, starting from fresh clones of Apache Arrow:
+
+.. code-block:: shell
+
+   git clone https://github.com/apache/arrow.git
+
+.. code-block:: shell
+
+   conda create -y -n pyarrow-dev -c conda-forge ^
+       --file arrow\ci\conda_env_cpp.txt ^
+       --file arrow\ci\conda_env_python.txt ^
+       --file arrow\ci\conda_env_gandiva.txt ^
+       python=3.7
+   conda activate pyarrow-dev
+
+Now, we build and install Arrow C++ libraries.
+
+We set a number of environment variables:
+
+- the path of the installation directory of the Arrow C++ libraries as
+  ``ARROW_HOME``
+- add the path of installed DLL libraries to ``PATH``
+- and choose the compiler to be used
+
+.. code-block:: shell
+
+   set ARROW_HOME=%cd%\arrow-dist
+   set PATH=%ARROW_HOME%\bin;%PATH%
+   set PYARROW_CMAKE_GENERATOR=Visual Studio 15 2017 Win64
+
+Let's configure, build and install the Arrow C++ libraries:
+
+.. code-block:: shell
+
+   mkdir arrow\cpp\build
+   pushd arrow\cpp\build
+   cmake -G "%PYARROW_CMAKE_GENERATOR%" ^
+       -DCMAKE_INSTALL_PREFIX=%ARROW_HOME% ^
+       -DCMAKE_UNITY_BUILD=ON ^
+       -DARROW_CXXFLAGS="/WX /MP" ^
+       -DARROW_WITH_LZ4=on ^
+       -DARROW_WITH_SNAPPY=on ^
+       -DARROW_WITH_ZLIB=on ^
+       -DARROW_WITH_ZSTD=on ^
+       -DARROW_PARQUET=on ^
+       -DARROW_PYTHON=on ^
+       ..
+   cmake --build . --target INSTALL --config Release
+   popd
+
+Now, we can build pyarrow:
+
+.. code-block:: shell
+
+   pushd arrow\python
+   set PYARROW_WITH_PARQUET=1
+   python setup.py build_ext --inplace
+   popd
+
+.. note::
+
+   For building pyarrow, the above defined environment variables need to also
+   be set. Remember this if to want to re-build ``pyarrow`` after your initial build.
+
+Then run the unit tests with:
+
+.. code-block:: shell
+
+   pushd arrow\python
+   py.test pyarrow -v
+   popd
+
+.. note::
+
+   With the above instructions the Arrow C++ libraries are not bundled with
+   the Python extension. This is recommended for development as it allows the
+   C++ libraries to be re-built separately.
+
+   As a consequence however, ``python setup.py install`` will also not install
+   the Arrow C++ libraries. Therefore, to use ``pyarrow`` in python, ``PATH``
+   must contain the directory with the Arrow .dll-files.
+
+   If you want to bundle the Arrow C++ libraries with ``pyarrow`` add
+   ``--bundle-arrow-cpp`` as build parameter:
+
+   ``python setup.py build_ext --bundle-arrow-cpp``
+
+   Important: If you combine ``--bundle-arrow-cpp`` with ``--inplace`` the
+   Arrow C++ libraries get copied to the python source tree and are not cleared
+   by ``python setup.py clean``. They remain in place and will take precedence
+   over any later Arrow C++ libraries contained in ``PATH``. This can lead to
+   incompatibilities when ``pyarrow`` is later built without
+   ``--bundle-arrow-cpp``.
+
+Running C++ unit tests for Python integration
+---------------------------------------------
+
+Running C++ unit tests should not be necessary for most developers. If you do
+want to run them, you need to pass ``-DARROW_BUILD_TESTS=ON`` during
+configuration of the Arrow C++ library build:
+
+.. code-block:: shell
+
+   mkdir arrow\cpp\build
+   pushd arrow\cpp\build
+   cmake -G "%PYARROW_CMAKE_GENERATOR%" ^
+       -DCMAKE_INSTALL_PREFIX=%ARROW_HOME% ^
+       -DARROW_CXXFLAGS="/WX /MP" ^
+       -DARROW_PARQUET=on ^
+       -DARROW_PYTHON=on ^
+       -DARROW_BUILD_TESTS=ON ^
+       ..
+   cmake --build . --target INSTALL --config Release
+   popd
+
+
+Getting ``arrow-python-test.exe`` (C++ unit tests for python integration) to
+run is a bit tricky because your ``%PYTHONHOME%`` must be configured to point
+to the active conda environment:
+
+.. code-block:: shell
+
+   set PYTHONHOME=%CONDA_PREFIX%
+   pushd arrow\cpp\build\release\Release
+   arrow-python-test.exe
+   popd
+
+To run all tests of the Arrow C++ library, you can also run ``ctest``:
+
+.. code-block:: shell
+
+   set PYTHONHOME=%CONDA_PREFIX%
+   pushd arrow\cpp\build
+   ctest
+   popd
+
+Windows Caveats
+---------------
+
+Some components are not supported yet on Windows:
+
+* Flight RPC
+* Plasma
diff --git a/src/arrow/docs/source/example.gz b/src/arrow/docs/source/example.gz
new file mode 100644
index 000000000..4fc60405c
--- /dev/null
+++ b/src/arrow/docs/source/example.gz
diff --git a/src/arrow/docs/source/format/Arrow.graffle b/src/arrow/docs/source/format/Arrow.graffle
new file mode 100644
index 000000000..f4eead922
--- /dev/null
+++ b/src/arrow/docs/source/format/Arrow.graffle
diff --git a/src/arrow/docs/source/format/Arrow.png b/src/arrow/docs/source/format/Arrow.png
new file mode 100644
index 000000000..1b09aa2d8
--- /dev/null
+++ b/src/arrow/docs/source/format/Arrow.png
diff --git a/src/arrow/docs/source/format/CDataInterface.rst b/src/arrow/docs/source/format/CDataInterface.rst
new file mode 100644
index 000000000..20446411a
--- /dev/null
+++ b/src/arrow/docs/source/format/CDataInterface.rst
@@ -0,0 +1,948 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _c-data-interface:
+
+==========================
+The Arrow C data interface
+==========================
+
+Rationale
+=========
+
+Apache Arrow is designed to be a universal in-memory format for the representation
+of tabular ("columnar") data. However, some projects may face a difficult
+choice between either depending on a fast-evolving project such as the
+Arrow C++ library, or having to reimplement adapters for data interchange,
+which may require significant, redundant development effort.
+
+The Arrow C data interface defines a very small, stable set of C definitions
+that can be easily *copied* in any project's source code and used for columnar
+data interchange in the Arrow format.  For non-C/C++ languages and runtimes,
+it should be almost as easy to translate the C definitions into the
+corresponding C FFI declarations.
+
+Applications and libraries can therefore work with Arrow memory without
+necessarily using Arrow libraries or reinventing the wheel. Developers can
+choose between tight integration
+with the Arrow *software project* (benefitting from the growing array of
+facilities exposed by e.g. the C++ or Java implementations of Apache Arrow,
+but with the cost of a dependency) or minimal integration with the Arrow
+*format* only.
+
+Goals
+-----
+
+* Expose an ABI-stable interface.
+* Make it easy for third-party projects to implement support for (including partial
+  support where sufficient), with little initial investment.
+* Allow zero-copy sharing of Arrow data between independent runtimes
+  and components running in the same process.
+* Match the Arrow array concepts closely to avoid the development of
+  yet another marshalling layer.
+* Avoid the need for one-to-one adaptation layers such as the limited
+  JPype-based bridge between Java and Python.
+* Enable integration without an explicit dependency (either at compile-time
+  or runtime) on the Arrow software project.
+
+Ideally, the Arrow C data interface can become a low-level *lingua franca*
+for sharing columnar data at runtime and establish Arrow as the universal
+building block in the columnar processing ecosystem.
+
+Non-goals
+---------
+
+* Expose a C API mimicking operations available in higher-level runtimes
+  (such as C++, Java...).
+* Data sharing between distinct processes or storage persistence.
+
+
+Comparison with the Arrow IPC format
+------------------------------------
+
+Pros of the C data interface vs. the IPC format:
+
+* No dependency on Flatbuffers.
+* No buffer reassembly (data is already exposed in logical Arrow format).
+* Zero-copy by design.
+* Easy to reimplement from scratch.
+* Minimal C definition that can be easily copied into other codebases.
+* Resource lifetime management through a custom release callback.
+
+Pros of the IPC format vs. the data interface:
+
+* Works across processes and machines.
+* Allows data storage and persistence.
+* Being a streamable format, the IPC format has room for composing more features
+  (such as integrity checks, compression...).
+* Does not require explicit C data access.
+
+Data type description -- format strings
+=======================================
+
+A data type is described using a format string.  The format string only
+encodes information about the top-level type; for nested type, child types
+are described separately.  Also, metadata is encoded in a separate string.
+
+The format strings are designed to be easily parsable, even from a language
+such as C.  The most common primitive formats have one-character format
+strings:
+
++-----------------+--------------------------+------------+
+| Format string   | Arrow data type          | Notes      |
++=================+==========================+============+
+| ``n``           | null                     |            |
++-----------------+--------------------------+------------+
+| ``b``           | boolean                  |            |
++-----------------+--------------------------+------------+
+| ``c``           | int8                     |            |
++-----------------+--------------------------+------------+
+| ``C``           | uint8                    |            |
++-----------------+--------------------------+------------+
+| ``s``           | int16                    |            |
++-----------------+--------------------------+------------+
+| ``S``           | uint16                   |            |
++-----------------+--------------------------+------------+
+| ``i``           | int32                    |            |
++-----------------+--------------------------+------------+
+| ``I``           | uint32                   |            |
++-----------------+--------------------------+------------+
+| ``l``           | int64                    |            |
++-----------------+--------------------------+------------+
+| ``L``           | uint64                   |            |
++-----------------+--------------------------+------------+
+| ``e``           | float16                  |            |
++-----------------+--------------------------+------------+
+| ``f``           | float32                  |            |
++-----------------+--------------------------+------------+
+| ``g``           | float64                  |            |
++-----------------+--------------------------+------------+
+
++-----------------+---------------------------------------------------+------------+
+| Format string   | Arrow data type                                   | Notes      |
++=================+===================================================+============+
+| ``z``           | binary                                            |            |
++-----------------+---------------------------------------------------+------------+
+| ``Z``           | large binary                                      |            |
++-----------------+---------------------------------------------------+------------+
+| ``u``           | utf-8 string                                      |            |
++-----------------+---------------------------------------------------+------------+
+| ``U``           | large utf-8 string                                |            |
++-----------------+---------------------------------------------------+------------+
+| ``d:19,10``     | decimal128 [precision 19, scale 10]               |            |
++-----------------+---------------------------------------------------+------------+
+| ``d:19,10,NNN`` | decimal bitwidth = NNN [precision 19, scale 10]   |            |
++-----------------+---------------------------------------------------+------------+
+| ``w:42``        | fixed-width binary [42 bytes]                     |            |
++-----------------+---------------------------------------------------+------------+
+
+Temporal types have multi-character format strings starting with ``t``:
+
++-----------------+---------------------------------------------------+------------+
+| Format string   | Arrow data type                                   | Notes      |
++=================+===================================================+============+
+| ``tdD``         | date32 [days]                                     |            |
++-----------------+---------------------------------------------------+------------+
+| ``tdm``         | date64 [milliseconds]                             |            |
++-----------------+---------------------------------------------------+------------+
+| ``tts``         | time32 [seconds]                                  |            |
++-----------------+---------------------------------------------------+------------+
+| ``ttm``         | time32 [milliseconds]                             |            |
++-----------------+---------------------------------------------------+------------+
+| ``ttu``         | time64 [microseconds]                             |            |
++-----------------+---------------------------------------------------+------------+
+| ``ttn``         | time64 [nanoseconds]                              |            |
++-----------------+---------------------------------------------------+------------+
+| ``tss:...``     | timestamp [seconds] with timezone "..."           | \(1)       |
++-----------------+---------------------------------------------------+------------+
+| ``tsm:...``     | timestamp [milliseconds] with timezone "..."      | \(1)       |
++-----------------+---------------------------------------------------+------------+
+| ``tsu:...``     | timestamp [microseconds] with timezone "..."      | \(1)       |
++-----------------+---------------------------------------------------+------------+
+| ``tsn:...``     | timestamp [nanoseconds] with timezone "..."       | \(1)       |
++-----------------+---------------------------------------------------+------------+
+| ``tDs``         | duration [seconds]                                |            |
++-----------------+---------------------------------------------------+------------+
+| ``tDm``         | duration [milliseconds]                           |            |
++-----------------+---------------------------------------------------+------------+
+| ``tDu``         | duration [microseconds]                           |            |
++-----------------+---------------------------------------------------+------------+
+| ``tDn``         | duration [nanoseconds]                            |            |
++-----------------+---------------------------------------------------+------------+
+| ``tiM``         | interval [months]                                 |            |
++-----------------+---------------------------------------------------+------------+
+| ``tiD``         | interval [days, time]                             |            |
++-----------------+---------------------------------------------------+------------+
+| ``tin``         | interval [month, day, nanoseconds]                |            |
++-----------------+---------------------------------------------------+------------+
+
+
+Dictionary-encoded types do not have a specific format string.  Instead, the
+format string of the base array represents the dictionary index type, and the
+value type can be read from the dependent dictionary array (see below
+"Dictionary-encoded arrays").
+
+Nested types have multiple-character format strings starting with ``+``.  The
+names and types of child fields are read from the child arrays.
+
++------------------------+---------------------------------------------------+------------+
+| Format string          | Arrow data type                                   | Notes      |
++========================+===================================================+============+
+| ``+l``                 | list                                              |            |
++------------------------+---------------------------------------------------+------------+
+| ``+L``                 | large list                                        |            |
++------------------------+---------------------------------------------------+------------+
+| ``+w:123``             | fixed-sized list [123 items]                      |            |
++------------------------+---------------------------------------------------+------------+
+| ``+s``                 | struct                                            |            |
++------------------------+---------------------------------------------------+------------+
+| ``+m``                 | map                                               | \(2)       |
++------------------------+---------------------------------------------------+------------+
+| ``+ud:I,J,...``        | dense union with type ids I,J...                  |            |
++------------------------+---------------------------------------------------+------------+
+| ``+us:I,J,...``        | sparse union with type ids I,J...                 |            |
++------------------------+---------------------------------------------------+------------+
+
+Notes:
+
+(1)
+   The timezone string is appended as-is after the colon character ``:``, without
+   any quotes.  If the timezone is empty, the colon ``:`` must still be included.
+
+(2)
+   As specified in the Arrow columnar format, the map type has a single child type
+   named ``entries``, itself a 2-child struct type of ``(key, value)``.
+
+Examples
+--------
+
+* A dictionary-encoded ``decimal128(precision = 12, scale = 5)`` array
+  with ``int16`` indices has format string ``s``, and its dependent dictionary
+  array has format string ``d:12,5``.
+* A ``list<uint64>`` array has format string ``+l``, and its single child
+  has format string ``L``.
+* A ``struct<ints: int32, floats: float32>`` has format string ``+s``; its two
+  children have names ``ints`` and ``floats``, and format strings ``i`` and
+  ``f`` respectively.
+* A ``map<string, float64>`` array has format string ``+m``; its single child
+  has name ``entries`` and format string ``+s``; its two grandchildren have names
+  ``key`` and ``value``, and format strings ``u`` and ``g`` respectively.
+* A ``sparse_union<ints: int32, floats: float32>`` with type ids ``4, 5``
+  has format string ``+us:4,5``; its two children have names ``ints`` and
+  ``floats``, and format strings ``i`` and ``f`` respectively.
+
+
+Structure definitions
+=====================
+
+The following free-standing definitions are enough to support the Arrow
+C data interface in your project.  Like the rest of the Arrow project, they
+are available under the Apache License 2.0.
+
+.. code-block:: c
+
+   #define ARROW_FLAG_DICTIONARY_ORDERED 1
+   #define ARROW_FLAG_NULLABLE 2
+   #define ARROW_FLAG_MAP_KEYS_SORTED 4
+
+   struct ArrowSchema {
+     // Array type description
+     const char* format;
+     const char* name;
+     const char* metadata;
+     int64_t flags;
+     int64_t n_children;
+     struct ArrowSchema** children;
+     struct ArrowSchema* dictionary;
+
+     // Release callback
+     void (*release)(struct ArrowSchema*);
+     // Opaque producer-specific data
+     void* private_data;
+   };
+
+   struct ArrowArray {
+     // Array data description
+     int64_t length;
+     int64_t null_count;
+     int64_t offset;
+     int64_t n_buffers;
+     int64_t n_children;
+     const void** buffers;
+     struct ArrowArray** children;
+     struct ArrowArray* dictionary;
+
+     // Release callback
+     void (*release)(struct ArrowArray*);
+     // Opaque producer-specific data
+     void* private_data;
+   };
+
+The ArrowSchema structure
+-------------------------
+
+The ``ArrowSchema`` structure describes the type and metadata of an exported
+array or record batch.  It has the following fields:
+
+.. c:member:: const char* ArrowSchema.format
+
+   Mandatory.  A null-terminated, UTF8-encoded string describing
+   the data type.  If the data type is nested, child types are not
+   encoded here but in the :c:member:`ArrowSchema.children` structures.
+
+   Consumers MAY decide not to support all data types, but they
+   should document this limitation.
+
+.. c:member:: const char* ArrowSchema.name
+
+   Optional.  A null-terminated, UTF8-encoded string of the field
+   or array name.  This is mainly used to reconstruct child fields
+   of nested types.
+
+   Producers MAY decide not to provide this information, and consumers
+   MAY decide to ignore it.  If omitted, MAY be NULL or an empty string.
+
+.. c:member:: const char* ArrowSchema.metadata
+
+   Optional.  A binary string describing the type's metadata.
+   If the data type is nested, child types are not encoded here but
+   in the :c:member:`ArrowSchema.children` structures.
+
+   This string is not null-terminated but follows a specific format::
+
+      int32: number of key/value pairs (noted N below)
+      int32: byte length of key 0
+      key 0 (not null-terminated)
+      int32: byte length of value 0
+      value 0 (not null-terminated)
+      ...
+      int32: byte length of key N - 1
+      key N - 1 (not null-terminated)
+      int32: byte length of value N - 1
+      value N - 1 (not null-terminated)
+
+   Integers are stored in native endianness.  For example, the metadata
+   ``[('key1', 'value1')]`` is encoded on a little-endian machine as::
+
+      \x01\x00\x00\x00\x04\x00\x00\x00key1\x06\x00\x00\x00value1
+
+   On a big-endian machine, the same example would be encoded as::
+
+      \x00\x00\x00\x01\x00\x00\x00\x04key1\x00\x00\x00\x06value1
+
+   If omitted, this field MUST be NULL (not an empty string).
+
+   Consumers MAY choose to ignore this information.
+
+.. c:member:: int64_t ArrowSchema.flags
+
+   Optional.  A bitfield of flags enriching the type description.
+   Its value is computed by OR'ing together the flag values.
+   The following flags are available:
+
+   * ``ARROW_FLAG_NULLABLE``: whether this field is semantically nullable
+     (regardless of whether it actually has null values).
+   * ``ARROW_FLAG_DICTIONARY_ORDERED``: for dictionary-encoded types,
+     whether the ordering of dictionary indices is semantically meaningful.
+   * ``ARROW_FLAG_MAP_KEYS_SORTED``: for map types, whether the keys within
+     each map value are sorted.
+
+   If omitted, MUST be 0.
+
+   Consumers MAY choose to ignore some or all of the flags.  Even then,
+   they SHOULD keep this value around so as to propagate its information
+   to their own consumers.
+
+.. c:member:: int64_t ArrowSchema.n_children
+
+   Mandatory.  The number of children this type has.
+
+.. c:member:: ArrowSchema** ArrowSchema.children
+
+   Optional.  A C array of pointers to each child type of this type.
+   There must be :c:member:`ArrowSchema.n_children` pointers.
+
+   MAY be NULL only if :c:member:`ArrowSchema.n_children` is 0.
+
+.. c:member:: ArrowSchema* ArrowSchema.dictionary
+
+   Optional.  A pointer to the type of dictionary values.
+
+   MUST be present if the ArrowSchema represents a dictionary-encoded type.
+   MUST be NULL otherwise.
+
+.. c:member:: void (*ArrowSchema.release)(struct ArrowSchema*)
+
+   Mandatory.  A pointer to a producer-provided release callback.
+
+   See below for memory management and release callback semantics.
+
+.. c:member:: void* ArrowSchema.private_data
+
+   Optional.  An opaque pointer to producer-provided private data.
+
+   Consumers MUST not process this member.  Lifetime of this member
+   is handled by the producer, and especially by the release callback.
+
+
+The ArrowArray structure
+------------------------
+
+The ``ArrowArray`` describes the data of an exported array or record batch.
+For the ``ArrowArray`` structure to be interpreted type, the array type
+or record batch schema must already be known.  This is either done by
+convention -- for example a producer API that always produces the same data
+type -- or by passing a ``ArrowSchema`` on the side.
+
+It has the following fields:
+
+.. c:member:: int64_t ArrowArray.length
+
+   Mandatory.  The logical length of the array (i.e. its number of items).
+
+.. c:member:: int64_t ArrowArray.null_count
+
+   Mandatory.  The number of null items in the array.  MAY be -1 if not
+   yet computed.
+
+.. c:member:: int64_t ArrowArray.offset
+
+   Mandatory.  The logical offset inside the array (i.e. the number of items
+   from the physical start of the buffers).  MUST be 0 or positive.
+
+   Producers MAY specify that they will only produce 0-offset arrays to
+   ease implementation of consumer code.
+   Consumers MAY decide not to support non-0-offset arrays, but they
+   should document this limitation.
+
+.. c:member:: int64_t ArrowArray.n_buffers
+
+   Mandatory.  The number of physical buffers backing this array.  The
+   number of buffers is a function of the data type, as described in the
+   :ref:`Columnar format specification <format_columnar>`.
+
+   Buffers of children arrays are not included.
+
+.. c:member:: const void** ArrowArray.buffers
+
+   Mandatory.  A C array of pointers to the start of each physical buffer
+   backing this array.  Each `void*` pointer is the physical start of
+   a contiguous buffer.  There must be :c:member:`ArrowArray.n_buffers` pointers.
+
+   The producer MUST ensure that each contiguous buffer is large enough to
+   represent `length + offset` values encoded according to the
+   :ref:`Columnar format specification <format_columnar>`.
+
+   It is recommended, but not required, that the memory addresses of the
+   buffers be aligned at least according to the type of primitive data that
+   they contain. Consumers MAY decide not to support unaligned memory.
+
+   The pointer to the null bitmap buffer, if the data type specifies one,
+   MAY be NULL only if :c:member:`ArrowArray.null_count` is 0.
+
+   Buffers of children arrays are not included.
+
+.. c:member:: int64_t ArrowArray.n_children
+
+   Mandatory.  The number of children this array has.  The number of children
+   is a function of the data type, as described in the
+   :ref:`Columnar format specification <format_columnar>`.
+
+.. c:member:: ArrowArray** ArrowArray.children
+
+   Optional.  A C array of pointers to each child array of this array.
+   There must be :c:member:`ArrowArray.n_children` pointers.
+
+   MAY be NULL only if :c:member:`ArrowArray.n_children` is 0.
+
+.. c:member:: ArrowArray* ArrowArray.dictionary
+
+   Optional.  A pointer to the underlying array of dictionary values.
+
+   MUST be present if the ArrowArray represents a dictionary-encoded array.
+   MUST be NULL otherwise.
+
+.. c:member:: void (*ArrowArray.release)(struct ArrowArray*)
+
+   Mandatory.  A pointer to a producer-provided release callback.
+
+   See below for memory management and release callback semantics.
+
+.. c:member:: void* ArrowArray.private_data
+
+   Optional.  An opaque pointer to producer-provided private data.
+
+   Consumers MUST not process this member.  Lifetime of this member
+   is handled by the producer, and especially by the release callback.
+
+
+Dictionary-encoded arrays
+-------------------------
+
+For dictionary-encoded arrays, the :c:member:`ArrowSchema.format` string
+encodes the *index* type.  The dictionary *value* type can be read
+from the :c:member:`ArrowSchema.dictionary` structure.
+
+The same holds for :c:member:`ArrowArray` structure: while the parent
+structure points to the index data, the :c:member:`ArrowArray.dictionary`
+points to the dictionary values array.
+
+Extension arrays
+----------------
+
+For extension arrays, the :c:member:`ArrowSchema.format` string encodes the
+*storage* type.  Information about the extension type is encoded in the
+:c:member:`ArrowSchema.metadata` string, similarly to the
+:ref:`IPC format <format_metadata_extension_types>`.  Specifically, the
+metadata key ``ARROW:extension:name``  encodes the extension type name,
+and the metadata key ``ARROW:extension:metadata`` encodes the
+implementation-specific serialization of the extension type (for
+parameterized extension types).  The base64 encoding of metadata values
+ensures that any possible serialization is representable.
+
+The ``ArrowArray`` structure exported from an extension array simply points
+to the storage data of the extension array.
+
+Memory management
+-----------------
+
+The ``ArrowSchema`` and ``ArrowArray`` structures follow the same conventions
+for memory management.  The term *"base structure"* below refers to the
+``ArrowSchema`` or ``ArrowArray`` that is passed between producer and consumer
+-- not any child structure thereof.
+
+Member allocation
+'''''''''''''''''
+
+It is intended for the base structure to be stack- or heap-allocated by the
+consumer.  In this case, the producer API should take a pointer to the
+consumer-allocated structure.
+
+However, any data pointed to by the struct MUST be allocated and maintained
+by the producer.  This includes the format and metadata strings, the arrays
+of buffer and children pointers, etc.
+
+Therefore, the consumer MUST not try to interfere with the producer's
+handling of these members' lifetime.  The only way the consumer influences
+data lifetime is by calling the base structure's ``release`` callback.
+
+.. _c-data-interface-released:
+
+Released structure
+''''''''''''''''''
+
+A released structure is indicated by setting its ``release`` callback to NULL.
+Before reading and interpreting a structure's data, consumers SHOULD check
+for a NULL release callback and treat it accordingly (probably by erroring
+out).
+
+Release callback semantics -- for consumers
+'''''''''''''''''''''''''''''''''''''''''''
+
+Consumers MUST call a base structure's release callback when they won't be using
+it anymore, but they MUST not call any of its children's release callbacks
+(including the optional dictionary).  The producer is responsible for releasing
+the children.
+
+In any case, a consumer MUST not try to access the base structure anymore
+after calling its release callback -- including any associated data such
+as its children.
+
+Release callback semantics -- for producers
+'''''''''''''''''''''''''''''''''''''''''''
+
+If producers need additional information for lifetime handling (for
+example, a C++ producer may want to use ``shared_ptr`` for array and
+buffer lifetime), they MUST use the ``private_data`` member to locate the
+required bookkeeping information.
+
+The release callback MUST not assume that the structure will be located
+at the same memory location as when it was originally produced.  The consumer
+is free to move the structure around (see "Moving an array").
+
+The release callback MUST walk all children structures (including the optional
+dictionary) and call their own release callbacks.
+
+The release callback MUST free any data area directly owned by the structure
+(such as the buffers and children members).
+
+The release callback MUST mark the structure as released, by setting
+its ``release`` member to NULL.
+
+Below is a good starting point for implementing a release callback, where the
+TODO area must be filled with producer-specific deallocation code:
+
+.. code-block:: c
+
+   static void ReleaseExportedArray(struct ArrowArray* array) {
+     // This should not be called on already released array
+     assert(array->format != NULL);
+
+     // Release children
+     for (int64_t i = 0; i < array->n_children; ++i) {
+       struct ArrowArray* child = array->children[i];
+       if (child->release != NULL) {
+         child->release(child);
+         assert(child->release == NULL);
+       }
+     }
+
+     // Release dictionary
+     struct ArrowArray* dict = array->dictionary;
+     if (dict != NULL && dict->release != NULL) {
+       dict->release(dict);
+       assert(dict->release == NULL);
+     }
+
+     // TODO here: release and/or deallocate all data directly owned by
+     // the ArrowArray struct, such as the private_data.
+
+     // Mark array released
+     array->release = NULL;
+   }
+
+
+Moving an array
+'''''''''''''''
+
+The consumer can *move* the ``ArrowArray`` structure by bitwise copying or
+shallow member-wise copying.  Then it MUST mark the source structure released
+(see "released structure" above for how to do it) but *without* calling the
+release callback.  This ensures that only one live copy of the struct is
+active at any given time and that lifetime is correctly communicated to
+the producer.
+
+As usual, the release callback will be called on the destination structure
+when it is not needed anymore.
+
+Moving child arrays
+~~~~~~~~~~~~~~~~~~~
+
+It is also possible to move one or several child arrays, but the parent
+``ArrowArray`` structure MUST be released immediately afterwards, as it
+won't point to valid child arrays anymore.
+
+The main use case for this is to keep alive only a subset of child arrays
+(for example if you are only interested in certain columns of the data),
+while releasing the others.
+
+.. note::
+
+   For moving to work correctly, the ``ArrowArray`` structure has to be
+   trivially relocatable.  Therefore, pointer members inside the ``ArrowArray``
+   structure (including ``private_data``) MUST not point inside the structure
+   itself.  Also, external pointers to the structure MUST not be separately
+   stored by the producer.  Instead, the producer MUST use the ``private_data``
+   member so as to remember any necessary bookkeeping information.
+
+Record batches
+--------------
+
+A record batch can be trivially considered as an equivalent struct array with
+additional top-level metadata.
+
+Example use case
+================
+
+A C++ database engine wants to provide the option to deliver results in Arrow
+format, but without imposing themselves a dependency on the Arrow software
+libraries.  With the Arrow C data interface, the engine can let the caller pass
+a pointer to a ``ArrowArray`` structure, and fill it with the next chunk of
+results.
+
+It can do so without including the Arrow C++ headers or linking with the
+Arrow DLLs.  Furthermore, the database engine's C API can benefit other
+runtimes and libraries that know about the Arrow C data interface,
+through e.g. a C FFI layer.
+
+C producer examples
+===================
+
+Exporting a simple ``int32`` array
+----------------------------------
+
+Export a non-nullable ``int32`` type with empty metadata.  In this case,
+all ``ArrowSchema`` members point to statically-allocated data, so the
+release callback is trivial.
+
+.. code-block:: c
+
+   static void release_int32_type(struct ArrowSchema* schema) {
+      // Mark released
+      schema->release = NULL;
+   }
+
+   void export_int32_type(struct ArrowSchema* schema) {
+      *schema = (struct ArrowSchema) {
+         // Type description
+         .format = "i",
+         .name = "",
+         .metadata = NULL,
+         .flags = 0,
+         .n_children = 0,
+         .children = NULL,
+         .dictionary = NULL,
+         // Bookkeeping
+         .release = &release_int32_type
+      };
+   }
+
+Export a C-malloc()ed array of the same type as a Arrow array, transferring
+ownership to the consumer through the release callback:
+
+.. code-block:: c
+
+   static void release_int32_array(struct ArrowArray* array) {
+      assert(array->n_buffers == 2);
+      // Free the buffers and the buffers array
+      free((void *) array->buffers[1]);
+      free(array->buffers);
+      // Mark released
+      array->release = NULL;
+   }
+
+   void export_int32_array(const int32_t* data, int64_t nitems,
+                           struct ArrowArray* array) {
+      // Initialize primitive fields
+      *array = (struct ArrowArray) {
+         // Data description
+         .length = nitems,
+         .offset = 0,
+         .null_count = 0,
+         .n_buffers = 2,
+         .n_children = 0,
+         .children = NULL,
+         .dictionary = NULL,
+         // Bookkeeping
+         .release = &release_int32_array
+      };
+      // Allocate list of buffers
+      array->buffers = (const void**) malloc(sizeof(void*) * array->n_buffers);
+      assert(array->buffers != NULL);
+      array->buffers[0] = NULL;  // no nulls, null bitmap can be omitted
+      array->buffers[1] = data;
+   }
+
+Exporting a ``struct<float32, utf8>`` array
+-------------------------------------------
+
+Export the array type as a ``ArrowSchema`` with C-malloc()ed children:
+
+.. code-block:: c
+
+   static void release_malloced_type(struct ArrowSchema* schema) {
+      int i;
+      for (i = 0; i < schema->n_children; ++i) {
+         struct ArrowSchema* child = schema->children[i];
+         if (child->release != NULL) {
+            child->release(child);
+         }
+      }
+      free(schema->children);
+      // Mark released
+      schema->release = NULL;
+   }
+
+   void export_float32_utf8_type(struct ArrowSchema* schema) {
+      struct ArrowSchema* child;
+
+      //
+      // Initialize parent type
+      //
+      *schema = (struct ArrowSchema) {
+         // Type description
+         .format = "+s",
+         .name = "",
+         .metadata = NULL,
+         .flags = 0,
+         .n_children = 2,
+         .dictionary = NULL,
+         // Bookkeeping
+         .release = &release_malloced_type
+      };
+      // Allocate list of children types
+      schema->children = malloc(sizeof(struct ArrowSchema*) * schema->n_children);
+
+      //
+      // Initialize child type #0
+      //
+      child = schema->children[0] = malloc(sizeof(struct ArrowSchema));
+      *child = (struct ArrowSchema) {
+         // Type description
+         .format = "f",
+         .name = "floats",
+         .metadata = NULL,
+         .flags = ARROW_FLAG_NULLABLE,
+         .n_children = 0,
+         .dictionary = NULL,
+         .children = NULL,
+         // Bookkeeping
+         .release = &release_malloced_type
+      };
+
+      //
+      // Initialize child type #1
+      //
+      child = schema->children[1] = malloc(sizeof(struct ArrowSchema));
+      *child = (struct ArrowSchema) {
+         // Type description
+         .format = "u",
+         .name = "strings",
+         .metadata = NULL,
+         .flags = ARROW_FLAG_NULLABLE,
+         .n_children = 0,
+         .dictionary = NULL,
+         .children = NULL,
+         // Bookkeeping
+         .release = &release_malloced_type
+      };
+   }
+
+Export C-malloc()ed arrays in Arrow-compatible layout as an Arrow struct array,
+transferring ownership to the consumer:
+
+.. code-block:: c
+
+   static void release_malloced_array(struct ArrowArray* array) {
+      int i;
+      // Free children
+      for (i = 0; i < array->n_children; ++i) {
+         struct ArrowArray* child = array->children[i];
+         if (child->release != NULL) {
+            child->release(child);
+         }
+      }
+      free(array->children);
+      // Free buffers
+      for (i = 0; i < array->n_buffers; ++i) {
+         free((void *) array->buffers[i]);
+      }
+      free(array->buffers);
+      // Mark released
+      array->release = NULL;
+   }
+
+   void export_float32_utf8_array(
+         int64_t nitems,
+         const uint8_t* float32_nulls, const float* float32_data,
+         const uint8_t* utf8_nulls, const int32_t* utf8_offsets, const uint8_t* utf8_data,
+         struct ArrowArray* array) {
+      struct ArrowArray* child;
+
+      //
+      // Initialize parent array
+      //
+      *array = (struct ArrowArray) {
+         // Data description
+         .length = nitems,
+         .offset = 0,
+         .null_count = 0,
+         .n_buffers = 1,
+         .n_children = 2,
+         .dictionary = NULL,
+         // Bookkeeping
+         .release = &release_malloced_array
+      };
+      // Allocate list of parent buffers
+      array->buffers = malloc(sizeof(void*) * array->n_buffers);
+      array->buffers[0] = NULL;  // no nulls, null bitmap can be omitted
+      // Allocate list of children arrays
+      array->children = malloc(sizeof(struct ArrowArray*) * array->n_children);
+
+      //
+      // Initialize child array #0
+      //
+      child = array->children[0] = malloc(sizeof(struct ArrowArray));
+      *child = (struct ArrowArray) {
+         // Data description
+         .length = nitems,
+         .offset = 0,
+         .null_count = -1,
+         .n_buffers = 2,
+         .n_children = 0,
+         .dictionary = NULL,
+         .children = NULL,
+         // Bookkeeping
+         .release = &release_malloced_array
+      };
+      child->buffers = malloc(sizeof(void*) * array->n_buffers);
+      child->buffers[0] = float32_nulls;
+      child->buffers[1] = float32_data;
+
+      //
+      // Initialize child array #1
+      //
+      child = array->children[1] = malloc(sizeof(struct ArrowArray));
+      *child = (struct ArrowArray) {
+         // Data description
+         .length = nitems,
+         .offset = 0,
+         .null_count = -1,
+         .n_buffers = 3,
+         .n_children = 0,
+         .dictionary = NULL,
+         .children = NULL,
+         // Bookkeeping
+         .release = &release_malloced_array
+      };
+      child->buffers = malloc(sizeof(void*) * array->n_buffers);
+      child->buffers[0] = utf8_nulls;
+      child->buffers[1] = utf8_offsets;
+      child->buffers[2] = utf8_data;
+   }
+
+
+Why two distinct structures?
+============================
+
+In many cases, the same type or schema description applies to multiple,
+possibly short, batches of data.  To avoid paying the cost of exporting
+and importing the type description for each batch, the ``ArrowSchema``
+can be passed once, separately, at the beginning of the conversation between
+producer and consumer.
+
+In other cases yet, the data type is fixed by the producer API, and may not
+need to be communicated at all.
+
+However, if a producer is focused on one-shot exchange of data, it can
+communicate the ``ArrowSchema`` and ``ArrowArray`` structures in the same
+API call.
+
+Updating this specification
+===========================
+
+Once this specification is supported in an official Arrow release, the C
+ABI is frozen.  This means the ``ArrowSchema`` and ``ArrowArray`` structure
+definitions should not change in any way -- including adding new members.
+
+Backwards-compatible changes are allowed, for example new
+:c:member:`ArrowSchema.flags` values or expanded possibilities for
+the :c:member:`ArrowSchema.format` string.
+
+Any incompatible changes should be part of a new specification, for example
+"Arrow C data interface v2".
+
+Inspiration
+===========
+
+The Arrow C data interface is inspired by the `Python buffer protocol`_,
+which has proven immensely successful in allowing various Python libraries
+exchange numerical data with no knowledge of each other and near-zero
+adaptation cost.
+
+
+.. _Python buffer protocol: https://www.python.org/dev/peps/pep-3118/
diff --git a/src/arrow/docs/source/format/CStreamInterface.rst b/src/arrow/docs/source/format/CStreamInterface.rst
new file mode 100644
index 000000000..b8ccce355
--- /dev/null
+++ b/src/arrow/docs/source/format/CStreamInterface.rst
@@ -0,0 +1,218 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. highlight:: c
+
+.. _c-stream-interface:
+
+============================
+The Arrow C stream interface
+============================
+
+.. warning::
+   This interface is experimental and may evolve based on feedback from
+   early users.  ABI stability is not guaranteed yet.  Feel free to
+   `contact us <https://arrow.apache.org/community/>`__.
+
+The C stream interface builds on the structures defined in the
+:ref:`C data interface <c-data-interface>` and combines them into a higher-level
+specification so as to ease the communication of streaming data within a single
+process.
+
+Semantics
+=========
+
+An Arrow C stream exposes a streaming source of data chunks, each with the
+same schema.  Chunks are obtained by calling a blocking pull-style iteration
+function.
+
+Structure definition
+====================
+
+The C stream interface is defined by a single ``struct`` definition::
+
+   struct ArrowArrayStream {
+     // Callbacks providing stream functionality
+     int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out);
+     int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out);
+     const char* (*get_last_error)(struct ArrowArrayStream*);
+
+     // Release callback
+     void (*release)(struct ArrowArrayStream*);
+
+     // Opaque producer-specific data
+     void* private_data;
+   };
+
+The ArrowArrayStream structure
+------------------------------
+
+The ``ArrowArrayStream`` provides the required callbacks to interact with a
+streaming source of Arrow arrays.  It has the following fields:
+
+.. c:member:: int (*ArrowArrayStream.get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out)
+
+   *Mandatory.*  This callback allows the consumer to query the schema of
+   the chunks of data in the stream.  The schema is the same for all
+   data chunks.
+
+   This callback must NOT be called on a released ``ArrowArrayStream``.
+
+   *Return value:* 0 on success, a non-zero
+   :ref:`error code <c-stream-interface-error-codes>` otherwise.
+
+.. c:member:: int (*ArrowArrayStream.get_next)(struct ArrowArrayStream*, struct ArrowArray* out)
+
+   *Mandatory.*  This callback allows the consumer to get the next chunk
+   of data in the stream.
+
+   This callback must NOT be called on a released ``ArrowArrayStream``.
+
+   *Return value:* 0 on success, a non-zero
+   :ref:`error code <c-stream-interface-error-codes>` otherwise.
+
+   On success, the consumer must check whether the ``ArrowArray`` is
+   marked :ref:`released <c-data-interface-released>`.  If the
+   ``ArrowArray`` is released, then the end of stream has been reached.
+   Otherwise, the ``ArrowArray`` contains a valid data chunk.
+
+.. c:member:: const char* (*ArrowArrayStream.get_last_error)(struct ArrowArrayStream*)
+
+   *Mandatory.*  This callback allows the consumer to get a textual description
+   of the last error.
+
+   This callback must ONLY be called if the last operation on the
+   ``ArrowArrayStream`` returned an error.  It must NOT be called on a
+   released ``ArrowArrayStream``.
+
+   *Return value:* a pointer to a NULL-terminated character string (UTF8-encoded).
+   NULL can also be returned if no detailed description is available.
+
+   The returned pointer is only guaranteed to be valid until the next call of
+   one of the stream's callbacks.  The character string it points to should
+   be copied to consumer-managed storage if it is intended to survive longer.
+
+.. c:member:: void (*ArrowArrayStream.release)(struct ArrowArrayStream*)
+
+   *Mandatory.*  A pointer to a producer-provided release callback.
+
+.. c:member:: void* ArrowArrayStream.private_data
+
+   *Optional.*  An opaque pointer to producer-provided private data.
+
+   Consumers MUST not process this member.  Lifetime of this member
+   is handled by the producer, and especially by the release callback.
+
+
+.. _c-stream-interface-error-codes:
+
+Error codes
+-----------
+
+The ``get_schema`` and ``get_next`` callbacks may return an error under the form
+of a non-zero integer code.  Such error codes should be interpreted like
+``errno`` numbers (as defined by the local platform).  Note that the symbolic
+forms of these constants are stable from platform to platform, but their numeric
+values are platform-specific.
+
+In particular, it is recommended to recognize the following values:
+
+* ``EINVAL``: for a parameter or input validation error
+* ``ENOMEM``: for a memory allocation failure (out of memory)
+* ``EIO``: for a generic input/output error
+
+.. seealso::
+   `Standard POSIX error codes <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html>`__.
+
+   `Error codes recognized by the Windows C runtime library
+   <https://docs.microsoft.com/en-us/cpp/c-runtime-library/errno-doserrno-sys-errlist-and-sys-nerr>`__.
+
+Result lifetimes
+----------------
+
+The data returned by the ``get_schema`` and ``get_next`` callbacks must be
+released independently.  Their lifetimes are not tied to that of the
+``ArrowArrayStream``.
+
+Stream lifetime
+---------------
+
+Lifetime of the C stream is managed using a release callback with similar
+usage as in the :ref:`C data interface <c-data-interface-released>`.
+
+
+C consumer example
+==================
+
+Let's say a particular database provides the following C API to execute
+a SQL query and return the result set as a Arrow C stream::
+
+   void MyDB_Query(const char* query, struct ArrowArrayStream* result_set);
+
+Then a consumer could use the following code to iterate over the results::
+
+   static void handle_error(int errcode, struct ArrowArrayStream* stream) {
+      // Print stream error
+      const char* errdesc = stream->get_last_error(stream);
+      if (errdesc != NULL) {
+         fputs(errdesc, stderr);
+      } else {
+         fputs(strerror(errcode), stderr);
+      }
+      // Release stream and abort
+      stream->release(stream),
+      exit(1);
+   }
+
+   void run_query() {
+      struct ArrowArrayStream stream;
+      struct ArrowSchema schema;
+      struct ArrowArray chunk;
+      int errcode;
+
+      MyDB_Query("SELECT * FROM my_table", &stream);
+
+      // Query result set schema
+      errcode = stream.get_schema(&stream, &schema);
+      if (errcode != 0) {
+         handle_error(errcode, &stream);
+      }
+
+      int64_t num_rows = 0;
+
+      // Iterate over results: loop until error or end of stream
+      while ((errcode = stream.get_next(&stream, &chunk) == 0) &&
+             chunk.release != NULL) {
+         // Do something with chunk...
+         fprintf(stderr, "Result chunk: got %lld rows\n", chunk.length);
+         num_rows += chunk.length;
+
+         // Release chunk
+         chunk.release(&chunk);
+      }
+
+      // Was it an error?
+      if (errcode != 0) {
+         handle_error(errcode, &stream);
+      }
+
+      fprintf(stderr, "Result stream ended: total %lld rows\n", num_rows);
+
+      // Release schema and stream
+      schema.release(&schema);
+      stream.release(&stream);
+   }
diff --git a/src/arrow/docs/source/format/Columnar.rst b/src/arrow/docs/source/format/Columnar.rst
new file mode 100644
index 000000000..85261e7d9
--- /dev/null
+++ b/src/arrow/docs/source/format/Columnar.rst
@@ -0,0 +1,1221 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _format_columnar:
+
+*********************
+Arrow Columnar Format
+*********************
+
+*Version: 1.0*
+
+The "Arrow Columnar Format" includes a language-agnostic in-memory
+data structure specification, metadata serialization, and a protocol
+for serialization and generic data transport.
+
+This document is intended to provide adequate detail to create a new
+implementation of the columnar format without the aid of an existing
+implementation. We utilize Google's `Flatbuffers`_ project for
+metadata serialization, so it will be necessary to refer to the
+project's `Flatbuffers protocol definition files`_
+while reading this document.
+
+The columnar format has some key features:
+
+* Data adjacency for sequential access (scans)
+* O(1) (constant-time) random access
+* SIMD and vectorization-friendly
+* Relocatable without "pointer swizzling", allowing for true zero-copy
+  access in shared memory
+
+The Arrow columnar format provides analytical performance and data
+locality guarantees in exchange for comparatively more expensive
+mutation operations. This document is concerned only with in-memory
+data representation and serialization details; issues such as
+coordinating mutation of data structures are left to be handled by
+implementations.
+
+Terminology
+===========
+
+Since different projects have used different words to describe various
+concepts, here is a small glossary to help disambiguate.
+
+* **Array** or **Vector**: a sequence of values with known length all
+  having the same type. These terms are used interchangeably in
+  different Arrow implementations, but we use "array" in this
+  document.
+* **Slot**: a single logical value in an array of some particular data type
+* **Buffer** or **Contiguous memory region**: a sequential virtual
+  address space with a given length. Any byte can be reached via a
+  single pointer offset less than the region's length.
+* **Physical Layout**: The underlying memory layout for an array
+  without taking into account any value semantics. For example, a
+  32-bit signed integer array and 32-bit floating point array have the
+  same layout.
+* **Parent** and **child arrays**: names to express relationships
+  between physical value arrays in a nested type structure. For
+  example, a ``List<T>``-type parent array has a T-type array as its
+  child (see more on lists below).
+* **Primitive type**: a data type having no child types. This includes
+  such types as fixed bit-width, variable-size binary, and null types.
+* **Nested type**: a data type whose full structure depends on one or
+  more other child types. Two fully-specified nested types are equal
+  if and only if their child types are equal. For example, ``List<U>``
+  is distinct from ``List<V>`` iff U and V are different types.
+* **Logical type**: An application-facing semantic value type that is
+  implemented using some physical layout. For example, Decimal
+  values are stored as 16 bytes in a fixed-size binary
+  layout. Similarly, strings can be stored as ``List<1-byte>``. A
+  timestamp may be stored as 64-bit fixed-size layout.
+
+.. _format_layout:
+
+Physical Memory Layout
+======================
+
+Arrays are defined by a few pieces of metadata and data:
+
+* A logical data type.
+* A sequence of buffers.
+* A length as a 64-bit signed integer. Implementations are permitted
+  to be limited to 32-bit lengths, see more on this below.
+* A null count as a 64-bit signed integer.
+* An optional **dictionary**, for dictionary-encoded arrays.
+
+Nested arrays additionally have a sequence of one or more sets of
+these items, called the **child arrays**.
+
+Each logical data type has a well-defined physical layout. Here are
+the different physical layouts defined by Arrow:
+
+* **Primitive (fixed-size)**: a sequence of values each having the
+  same byte or bit width
+* **Variable-size Binary**: a sequence of values each having a variable
+  byte length. Two variants of this layout are supported using 32-bit
+  and 64-bit length encoding.
+* **Fixed-size List**: a nested layout where each value has the same
+  number of elements taken from a child data type.
+* **Variable-size List**: a nested layout where each value is a
+  variable-length sequence of values taken from a child data type. Two
+  variants of this layout are supported using 32-bit and 64-bit length
+  encoding.
+* **Struct**: a nested layout consisting of a collection of named
+  child **fields** each having the same length but possibly different
+  types.
+* **Sparse** and **Dense Union**: a nested layout representing a
+  sequence of values, each of which can have type chosen from a
+  collection of child array types.
+* **Null**: a sequence of all null values, having null logical type
+
+The Arrow columnar memory layout only applies to *data* and not
+*metadata*. Implementations are free to represent metadata in-memory
+in whichever form is convenient for them. We handle metadata
+**serialization** in an implementation-independent way using
+`Flatbuffers`_, detailed below.
+
+Buffer Alignment and Padding
+----------------------------
+
+Implementations are recommended to allocate memory on aligned
+addresses (multiple of 8- or 64-bytes) and pad (overallocate) to a
+length that is a multiple of 8 or 64 bytes. When serializing Arrow
+data for interprocess communication, these alignment and padding
+requirements are enforced. If possible, we suggest that you prefer
+using 64-byte alignment and padding. Unless otherwise noted, padded
+bytes do not need to have a specific value.
+
+The alignment requirement follows best practices for optimized memory
+access:
+
+* Elements in numeric arrays will be guaranteed to be retrieved via aligned access.
+* On some architectures alignment can help limit partially used cache lines.
+
+The recommendation for 64 byte alignment comes from the `Intel
+performance guide`_ that recommends alignment of memory to match SIMD
+register width.  The specific padding length was chosen because it
+matches the largest SIMD instruction registers available on widely
+deployed x86 architecture (Intel AVX-512).
+
+The recommended padding of 64 bytes allows for using `SIMD`_
+instructions consistently in loops without additional conditional
+checks.  This should allow for simpler, efficient and CPU
+cache-friendly code.  In other words, we can load the entire 64-byte
+buffer into a 512-bit wide SIMD register and get data-level
+parallelism on all the columnar values packed into the 64-byte
+buffer. Guaranteed padding can also allow certain compilers to
+generate more optimized code directly (e.g. One can safely use Intel's
+``-qopt-assume-safe-padding``).
+
+Array lengths
+-------------
+
+Array lengths are represented in the Arrow metadata as a 64-bit signed
+integer. An implementation of Arrow is considered valid even if it only
+supports lengths up to the maximum 32-bit signed integer, though. If using
+Arrow in a multi-language environment, we recommend limiting lengths to
+2 :sup:`31` - 1 elements or less. Larger data sets can be represented using
+multiple array chunks.
+
+Null count
+----------
+
+The number of null value slots is a property of the physical array and
+considered part of the data structure. The null count is represented
+in the Arrow metadata as a 64-bit signed integer, as it may be as
+large as the array length.
+
+Validity bitmaps
+----------------
+
+Any value in an array may be semantically null, whether primitive or nested
+type.
+
+All array types, with the exception of union types (more on these later),
+utilize a dedicated memory buffer, known as the validity (or "null") bitmap, to
+encode the nullness or non-nullness of each value slot. The validity bitmap
+must be large enough to have at least 1 bit for each array slot.
+
+Whether any array slot is valid (non-null) is encoded in the respective bits of
+this bitmap. A 1 (set bit) for index ``j`` indicates that the value is not null,
+while a 0 (bit not set) indicates that it is null. Bitmaps are to be
+initialized to be all unset at allocation time (this includes padding): ::
+
+    is_valid[j] -> bitmap[j / 8] & (1 << (j % 8))
+
+We use `least-significant bit (LSB) numbering`_ (also known as
+bit-endianness). This means that within a group of 8 bits, we read
+right-to-left: ::
+
+    values = [0, 1, null, 2, null, 3]
+
+    bitmap
+    j mod 8   7  6  5  4  3  2  1  0
+              0  0  1  0  1  0  1  1
+
+Arrays having a 0 null count may choose to not allocate the validity
+bitmap. Implementations may choose to always allocate one anyway as a
+matter of convenience, but this should be noted when memory is being
+shared.
+
+Nested type arrays except for union types have their own validity bitmap and
+null count regardless of the null count and valid bits of their child arrays.
+
+Array slots which are null are not required to have a particular
+value; any "masked" memory can have any value and need not be zeroed,
+though implementations frequently choose to zero memory for null
+values.
+
+Fixed-size Primitive Layout
+---------------------------
+
+A primitive value array represents an array of values each having the
+same physical slot width typically measured in bytes, though the spec
+also provides for bit-packed types (e.g. boolean values encoded in
+bits).
+
+Internally, the array contains a contiguous memory buffer whose total
+size is at least as large as the slot width multiplied by the array
+length. For bit-packed types, the size is rounded up to the nearest
+byte.
+
+The associated validity bitmap is contiguously allocated (as described
+above) but does not need to be adjacent in memory to the values
+buffer.
+
+**Example Layout: Int32 Array**
+
+For example a primitive array of int32s: ::
+
+    [1, null, 2, 4, 8]
+
+Would look like: ::
+
+    * Length: 5, Null count: 1
+    * Validity bitmap buffer:
+
+      |Byte 0 (validity bitmap) | Bytes 1-63            |
+      |-------------------------|-----------------------|
+      | 00011101                | 0 (padding)           |
+
+    * Value Buffer:
+
+      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-63 |
+      |------------|-------------|-------------|-------------|-------------|-------------|
+      | 1          | unspecified | 2           | 4           | 8           | unspecified |
+
+**Example Layout: Non-null int32 Array**
+
+``[1, 2, 3, 4, 8]`` has two possible layouts: ::
+
+    * Length: 5, Null count: 0
+    * Validity bitmap buffer:
+
+      | Byte 0 (validity bitmap) | Bytes 1-63            |
+      |--------------------------|-----------------------|
+      | 00011111                 | 0 (padding)           |
+
+    * Value Buffer:
+
+      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | bytes 12-15 | bytes 16-19 | Bytes 20-63 |
+      |------------|-------------|-------------|-------------|-------------|-------------|
+      | 1          | 2           | 3           | 4           | 8           | unspecified |
+
+or with the bitmap elided: ::
+
+    * Length 5, Null count: 0
+    * Validity bitmap buffer: Not required
+    * Value Buffer:
+
+      |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | bytes 12-15 | bytes 16-19 | Bytes 20-63 |
+      |------------|-------------|-------------|-------------|-------------|-------------|
+      | 1          | 2           | 3           | 4           | 8           | unspecified |
+
+Variable-size Binary Layout
+---------------------------
+
+Each value in this layout consists of 0 or more bytes. While primitive
+arrays have a single values buffer, variable-size binary have an
+**offsets** buffer and **data** buffer.
+
+The offsets buffer contains `length + 1` signed integers (either
+32-bit or 64-bit, depending on the logical type), which encode the
+start position of each slot in the data buffer. The length of the
+value in each slot is computed using the difference between the offset
+at that slot's index and the subsequent offset. For example, the
+position and length of slot j is computed as:
+
+::
+
+    slot_position = offsets[j]
+    slot_length = offsets[j + 1] - offsets[j]  // (for 0 <= j < length)
+
+It should be noted that a null value may have a positive slot length.
+That is, a null value may occupy a **non-empty** memory space in the data
+buffer. When this is true, the content of the corresponding memory space
+is undefined.
+
+Generally the first value in the offsets array is 0, and the last slot
+is the length of the values array. When serializing this layout, we
+recommend normalizing the offsets to start at 0.
+
+Variable-size List Layout
+-------------------------
+
+List is a nested type which is semantically similar to variable-size
+binary. It is defined by two buffers, a validity bitmap and an offsets
+buffer, and a child array. The offsets are the same as in the
+variable-size binary case, and both 32-bit and 64-bit signed integer
+offsets are supported options for the offsets. Rather than referencing
+an additional data buffer, instead these offsets reference the child
+array.
+
+Similar to the layout of variable-size binary, a null value may
+correspond to a **non-empty** segment in the child array. When this is
+true, the content of the corresponding segment can be arbitrary.
+
+A list type is specified like ``List<T>``, where ``T`` is any type
+(primitive or nested). In these examples we use 32-bit offsets where
+the 64-bit offset version would be denoted by ``LargeList<T>``.
+
+**Example Layout: ``List<Int8>`` Array**
+
+We illustrate an example of ``List<Int8>`` with length 4 having values::
+
+    [[12, -7, 25], null, [0, -127, 127, 50], []]
+
+will have the following representation: ::
+
+    * Length: 4, Null count: 1
+    * Validity bitmap buffer:
+
+      | Byte 0 (validity bitmap) | Bytes 1-63            |
+      |--------------------------|-----------------------|
+      | 00001101                 | 0 (padding)           |
+
+    * Offsets buffer (int32)
+
+      | Bytes 0-3  | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-63 |
+      |------------|-------------|-------------|-------------|-------------|-------------|
+      | 0          | 3           | 3           | 7           | 7           | unspecified |
+
+    * Values array (Int8array):
+      * Length: 7,  Null count: 0
+      * Validity bitmap buffer: Not required
+      * Values buffer (int8)
+
+        | Bytes 0-6                    | Bytes 7-63  |
+        |------------------------------|-------------|
+        | 12, -7, 25, 0, -127, 127, 50 | unspecified |
+
+**Example Layout: ``List<List<Int8>>``**
+
+``[[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], [[9, 10]]]``
+
+will be represented as follows: ::
+
+    * Length 3
+    * Nulls count: 0
+    * Validity bitmap buffer: Not required
+    * Offsets buffer (int32)
+
+      | Bytes 0-3  | Bytes 4-7  | Bytes 8-11 | Bytes 12-15 | Bytes 16-63 |
+      |------------|------------|------------|-------------|-------------|
+      | 0          |  2         |  5         |  6          | unspecified |
+
+    * Values array (`List<Int8>`)
+      * Length: 6, Null count: 1
+      * Validity bitmap buffer:
+
+        | Byte 0 (validity bitmap) | Bytes 1-63  |
+        |--------------------------|-------------|
+        | 00110111                 | 0 (padding) |
+
+      * Offsets buffer (int32)
+
+        | Bytes 0-27           | Bytes 28-63 |
+        |----------------------|-------------|
+        | 0, 2, 4, 7, 7, 8, 10 | unspecified |
+
+      * Values array (Int8):
+        * Length: 10, Null count: 0
+        * Validity bitmap buffer: Not required
+
+          | Bytes 0-9                     | Bytes 10-63 |
+          |-------------------------------|-------------|
+          | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 | unspecified |
+
+Fixed-Size List Layout
+----------------------
+
+Fixed-Size List is a nested type in which each array slot contains a
+fixed-size sequence of values all having the same type.
+
+A fixed size list type is specified like ``FixedSizeList<T>[N]``,
+where ``T`` is any type (primitive or nested) and ``N`` is a 32-bit
+signed integer representing the length of the lists.
+
+A fixed size list array is represented by a values array, which is a
+child array of type T. T may also be a nested type. The value in slot
+``j`` of a fixed size list array is stored in an ``N``-long slice of
+the values array, starting at an offset of ``j * N``.
+
+**Example Layout: ``FixedSizeList<byte>[4]`` Array**
+
+Here we illustrate ``FixedSizeList<byte>[4]``.
+
+For an array of length 4 with respective values: ::
+
+    [[192, 168, 0, 12], null, [192, 168, 0, 25], [192, 168, 0, 1]]
+
+will have the following representation: ::
+
+    * Length: 4, Null count: 1
+    * Validity bitmap buffer:
+
+      | Byte 0 (validity bitmap) | Bytes 1-63            |
+      |--------------------------|-----------------------|
+      | 00001101                 | 0 (padding)           |
+
+    * Values array (byte array):
+      * Length: 16,  Null count: 0
+      * validity bitmap buffer: Not required
+
+        | Bytes 0-3       | Bytes 4-7   | Bytes 8-15                      |
+        |-----------------|-------------|---------------------------------|
+        | 192, 168, 0, 12 | unspecified | 192, 168, 0, 25, 192, 168, 0, 1 |
+
+
+Struct Layout
+-------------
+
+A struct is a nested type parameterized by an ordered sequence of
+types (which can all be distinct), called its fields. Each field must
+have a UTF8-encoded name, and these field names are part of the type
+metadata.
+
+A struct array does not have any additional allocated physical storage
+for its values.  A struct array must still have an allocated validity
+bitmap, if it has one or more null values.
+
+Physically, a struct array has one child array for each field. The
+child arrays are independent and need not be adjacent to each other in
+memory.
+
+For example, the struct (field names shown here as strings for illustration
+purposes)::
+
+    Struct <
+      name: VarBinary
+      age: Int32
+    >
+
+has two child arrays, one ``VarBinary`` array (using variable-size binary
+layout) and one 4-byte primitive value array having ``Int32`` logical
+type.
+
+**Example Layout: ``Struct<VarBinary, Int32>``**
+
+The layout for ``[{'joe', 1}, {null, 2}, null, {'mark', 4}]`` would be: ::
+
+    * Length: 4, Null count: 1
+    * Validity bitmap buffer:
+
+      |Byte 0 (validity bitmap) | Bytes 1-63            |
+      |-------------------------|-----------------------|
+      | 00001011                | 0 (padding)           |
+
+    * Children arrays:
+      * field-0 array (`VarBinary`):
+        * Length: 4, Null count: 2
+        * Validity bitmap buffer:
+
+          | Byte 0 (validity bitmap) | Bytes 1-63            |
+          |--------------------------|-----------------------|
+          | 00001001                 | 0 (padding)           |
+
+        * Offsets buffer:
+
+          | Bytes 0-19     |
+          |----------------|
+          | 0, 3, 3, 3, 7  |
+
+         * Values array:
+            * Length: 7, Null count: 0
+            * Validity bitmap buffer: Not required
+
+            * Value buffer:
+
+              | Bytes 0-6      |
+              |----------------|
+              | joemark        |
+
+      * field-1 array (int32 array):
+        * Length: 4, Null count: 1
+        * Validity bitmap buffer:
+
+          | Byte 0 (validity bitmap) | Bytes 1-63            |
+          |--------------------------|-----------------------|
+          | 00001011                 | 0 (padding)           |
+
+        * Value Buffer:
+
+          |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-63 |
+          |------------|-------------|-------------|-------------|-------------|
+          | 1          | 2           | unspecified | 4           | unspecified |
+
+While a struct does not have physical storage for each of its semantic
+slots (i.e. each scalar C-like struct), an entire struct slot can be
+set to null via the validity bitmap. Any of the child field arrays can
+have null values according to their respective independent validity
+bitmaps. This implies that for a particular struct slot the validity
+bitmap for the struct array might indicate a null slot when one or
+more of its child arrays has a non-null value in their corresponding
+slot.  When reading the struct array the parent validity bitmap takes
+priority.  This is illustrated in the example above, the child arrays
+have valid entries for the null struct but are 'hidden' from the
+consumer by the parent array's validity bitmap.  However, when treated
+independently corresponding values of the children array will be
+non-null.
+
+Union Layout
+------------
+
+A union is defined by an ordered sequence of types; each slot in the
+union can have a value chosen from these types. The types are named
+like a struct's fields, and the names are part of the type metadata.
+
+Unlike other data types, unions do not have their own validity bitmap. Instead,
+the nullness of each slot is determined exclusively by the child arrays which
+are composed to create the union.
+
+We define two distinct union types, "dense" and "sparse", that are
+optimized for different use cases.
+
+Dense Union
+~~~~~~~~~~~
+
+Dense union represents a mixed-type array with 5 bytes of overhead for
+each value. Its physical layout is as follows:
+
+* One child array for each type
+* Types buffer: A buffer of 8-bit signed integers. Each type in the
+  union has a corresponding type id whose values are found in this
+  buffer. A union with more than 127 possible types can be modeled as
+  a union of unions.
+* Offsets buffer: A buffer of signed int32 values indicating the
+  relative offset into the respective child array for the type in a
+  given slot. The respective offsets for each child value array must
+  be in order / increasing.
+
+Critically, the dense union allows for minimal overhead in the ubiquitous
+union-of-structs with non-overlapping-fields use case (``Union<s1: Struct1, s2:
+Struct2, s3: Struct3, ...>``)
+
+**Example Layout: Dense union**
+
+An example layout for logical union of: ``Union<f: float, i: int32>``
+having the values: ``[{f=1.2}, null, {f=3.4}, {i=5}]``
+
+::
+
+    * Length: 4, Null count: 0
+    * Types buffer:
+
+      |Byte 0   | Byte 1      | Byte 2   | Byte 3   | Bytes 4-63  |
+      |---------|-------------|----------|----------|-------------|
+      | 0       | 0           | 0        | 1        | unspecified |
+
+    * Offset buffer:
+
+      |Bytes 0-3 | Bytes 4-7   | Bytes 8-11 | Bytes 12-15 | Bytes 16-63 |
+      |----------|-------------|------------|-------------|-------------|
+      | 0        | 1           | 2          | 0           | unspecified |
+
+    * Children arrays:
+      * Field-0 array (f: float):
+        * Length: 2, Null count: 1
+        * Validity bitmap buffer: 00000101
+
+        * Value Buffer:
+
+          | Bytes 0-11     | Bytes 12-63  |
+          |----------------|-------------|
+          | 1.2, null, 3.4 | unspecified |
+
+
+      * Field-1 array (i: int32):
+        * Length: 1, Null count: 0
+        * Validity bitmap buffer: Not required
+
+        * Value Buffer:
+
+          | Bytes 0-3 | Bytes 4-63  |
+          |-----------|-------------|
+          | 5         | unspecified |
+
+Sparse Union
+~~~~~~~~~~~~
+
+A sparse union has the same structure as a dense union, with the omission of
+the offsets array. In this case, the child arrays are each equal in length to
+the length of the union.
+
+While a sparse union may use significantly more space compared with a
+dense union, it has some advantages that may be desirable in certain
+use cases:
+
+* A sparse union is more amenable to vectorized expression evaluation in some use cases.
+* Equal-length arrays can be interpreted as a union by only defining the types array.
+
+**Example layout: ``SparseUnion<u0: Int32, u1: Float, u2: VarBinary>``**
+
+For the union array: ::
+
+    [{u0=5}, {u1=1.2}, {u2='joe'}, {u1=3.4}, {u0=4}, {u2='mark'}]
+
+will have the following layout: ::
+
+    * Length: 6, Null count: 0
+    * Types buffer:
+
+     | Byte 0     | Byte 1      | Byte 2      | Byte 3      | Byte 4      | Byte 5       | Bytes  6-63           |
+     |------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
+     | 0          | 1           | 2           | 1           | 0           | 2            | unspecified (padding) |
+
+    * Children arrays:
+
+      * u0 (Int32):
+        * Length: 6, Null count: 4
+        * Validity bitmap buffer:
+
+          |Byte 0 (validity bitmap) | Bytes 1-63            |
+          |-------------------------|-----------------------|
+          |00010001                 | 0 (padding)           |
+
+        * Value buffer:
+
+          |Bytes 0-3   | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23  | Bytes 24-63           |
+          |------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
+          | 5          | unspecified | unspecified | unspecified | 4           |  unspecified | unspecified (padding) |
+
+      * u1 (float):
+        * Length: 6, Null count: 4
+        * Validity bitmap buffer:
+
+          |Byte 0 (validity bitmap) | Bytes 1-63            |
+          |-------------------------|-----------------------|
+          | 00001010                | 0 (padding)           |
+
+        * Value buffer:
+
+          |Bytes 0-3    | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23  | Bytes 24-63           |
+          |-------------|-------------|-------------|-------------|-------------|--------------|-----------------------|
+          | unspecified |  1.2        | unspecified | 3.4         | unspecified |  unspecified | unspecified (padding) |
+
+      * u2 (`VarBinary`)
+        * Length: 6, Null count: 4
+        * Validity bitmap buffer:
+
+          | Byte 0 (validity bitmap) | Bytes 1-63            |
+          |--------------------------|-----------------------|
+          | 00100100                 | 0 (padding)           |
+
+        * Offsets buffer (int32)
+
+          | Bytes 0-3  | Bytes 4-7   | Bytes 8-11  | Bytes 12-15 | Bytes 16-19 | Bytes 20-23 | Bytes 24-27 | Bytes 28-63 |
+          |------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|
+          | 0          | 0           | 0           | 3           | 3           | 3           | 7           | unspecified |
+
+        * Values array (VarBinary):
+          * Length: 7,  Null count: 0
+          * Validity bitmap buffer: Not required
+
+            | Bytes 0-6  | Bytes 7-63            |
+            |------------|-----------------------|
+            | joemark    | unspecified (padding) |
+
+Only the slot in the array corresponding to the type index is considered. All
+"unselected" values are ignored and could be any semantically correct array
+value.
+
+Null Layout
+-----------
+
+We provide a simplified memory-efficient layout for the Null data type
+where all values are null. In this case no memory buffers are
+allocated.
+
+.. _dictionary-encoded-layout:
+
+Dictionary-encoded Layout
+-------------------------
+
+Dictionary encoding is a data representation technique to represent
+values by integers referencing a **dictionary** usually consisting of
+unique values. It can be effective when you have data with many
+repeated values.
+
+Any array can be dictionary-encoded. The dictionary is stored as an optional
+property of an array. When a field is dictionary encoded, the values are
+represented by an array of non-negative integers representing the index of the
+value in the dictionary. The memory layout for a dictionary-encoded array is
+the same as that of a primitive integer layout. The dictionary is handled as a
+separate columnar array with its own respective layout.
+
+As an example, you could have the following data: ::
+
+    type: VarBinary
+
+    ['foo', 'bar', 'foo', 'bar', null, 'baz']
+
+In dictionary-encoded form, this could appear as:
+
+::
+
+    data VarBinary (dictionary-encoded)
+       index_type: Int32
+       values: [0, 1, 0, 1, null, 2]
+
+    dictionary
+       type: VarBinary
+       values: ['foo', 'bar', 'baz']
+
+Note that a dictionary is permitted to contain duplicate values or
+nulls:
+
+::
+
+    data VarBinary (dictionary-encoded)
+       index_type: Int32
+       values: [0, 1, 3, 1, 4, 2]
+
+    dictionary
+       type: VarBinary
+       values: ['foo', 'bar', 'baz', 'foo', null]
+
+The null count of such arrays is dictated only by the validity bitmap
+of its indices, irrespective of any null values in the dictionary.
+
+Since unsigned integers can be more difficult to work with in some cases
+(e.g. in the JVM), we recommend preferring signed integers over unsigned
+integers for representing dictionary indices. Additionally, we recommend
+avoiding using 64-bit unsigned integer indices unless they are required by an
+application.
+
+We discuss dictionary encoding as it relates to serialization further
+below.
+
+Buffer Listing for Each Layout
+------------------------------
+
+For the avoidance of ambiguity, we provide listing the order and type
+of memory buffers for each layout.
+
+.. csv-table:: Buffer Layouts
+   :header: "Layout Type", "Buffer 0", "Buffer 1", "Buffer 2"
+   :widths: 30, 20, 20, 20
+
+   "Primitive",validity,data,
+   "Variable Binary",validity,offsets,data
+   "List",validity,offsets,
+   "Fixed-size List",validity,,
+   "Struct",validity,,
+   "Sparse Union",type ids,,
+   "Dense Union",type ids,offsets,
+   "Null",,,
+   "Dictionary-encoded",validity,data (indices),
+
+Logical Types
+=============
+
+The `Schema.fbs`_ defines built-in logical types supported by the
+Arrow columnar format. Each logical type uses one of the above
+physical layouts. Nested logical types may have different physical
+layouts depending on the particular realization of the type.
+
+We do not go into detail about the logical types definitions in this
+document as we consider `Schema.fbs`_ to be authoritative.
+
+.. _format-ipc:
+
+Serialization and Interprocess Communication (IPC)
+==================================================
+
+The primitive unit of serialized data in the columnar format is the
+"record batch". Semantically, a record batch is an ordered collection
+of arrays, known as its **fields**, each having the same length as one
+another but potentially different data types. A record batch's field
+names and types collectively form the batch's **schema**.
+
+In this section we define a protocol for serializing record batches
+into a stream of binary payloads and reconstructing record batches
+from these payloads without need for memory copying.
+
+The columnar IPC protocol utilizes a one-way stream of binary messages
+of these types:
+
+* Schema
+* RecordBatch
+* DictionaryBatch
+
+We specify a so-called *encapsulated IPC message* format which
+includes a serialized Flatbuffer type along with an optional message
+body. We define this message format before describing how to serialize
+each constituent IPC message type.
+
+Encapsulated message format
+---------------------------
+
+For simple streaming and file-based serialization, we define a
+"encapsulated" message format for interprocess communication. Such
+messages can be "deserialized" into in-memory Arrow array objects by
+examining only the message metadata without any need to copy or move
+any of the actual data.
+
+The encapsulated binary message format is as follows:
+
+* A 32-bit continuation indicator. The value ``0xFFFFFFFF`` indicates
+  a valid message. This component was introduced in version 0.15.0 in
+  part to address the 8-byte alignment requirement of Flatbuffers
+* A 32-bit little-endian length prefix indicating the metadata size
+* The message metadata as using the ``Message`` type defined in
+  `Message.fbs`_
+* Padding bytes to an 8-byte boundary
+* The message body, whose length must be a multiple of 8 bytes
+
+Schematically, we have: ::
+
+    <continuation: 0xFFFFFFFF>
+    <metadata_size: int32>
+    <metadata_flatbuffer: bytes>
+    <padding>
+    <message body>
+
+The complete serialized message must be a multiple of 8 bytes so that messages
+can be relocated between streams. Otherwise the amount of padding between the
+metadata and the message body could be non-deterministic.
+
+The ``metadata_size`` includes the size of the ``Message`` plus
+padding. The ``metadata_flatbuffer`` contains a serialized ``Message``
+Flatbuffer value, which internally includes:
+
+* A version number
+* A particular message value (one of ``Schema``, ``RecordBatch``, or
+  ``DictionaryBatch``)
+* The size of the message body
+* A ``custom_metadata`` field for any application-supplied metadata
+
+When read from an input stream, generally the ``Message`` metadata is
+initially parsed and validated to obtain the body size. Then the body
+can be read.
+
+Schema message
+--------------
+
+The Flatbuffers files `Schema.fbs`_ contains the definitions for all
+built-in logical data types and the ``Schema`` metadata type which
+represents the schema of a given record batch. A schema consists of
+an ordered sequence of fields, each having a name and type. A
+serialized ``Schema`` does not contain any data buffers, only type
+metadata.
+
+The ``Field`` Flatbuffers type contains the metadata for a single
+array. This includes:
+
+* The field's name
+* The field's logical type
+* Whether the field is semantically nullable. While this has no
+  bearing on the array's physical layout, many systems distinguish
+  nullable and non-nullable fields and we want to allow them to
+  preserve this metadata to enable faithful schema round trips.
+* A collection of child ``Field`` values, for nested types
+* A ``dictionary`` property indicating whether the field is
+  dictionary-encoded or not. If it is, a dictionary "id" is assigned
+  to allow matching a subsequent dictionary IPC message with the
+  appropriate field.
+
+We additionally provide both schema-level and field-level
+``custom_metadata`` attributes allowing for systems to insert their
+own application defined metadata to customize behavior.
+
+RecordBatch message
+-------------------
+
+A RecordBatch message contains the actual data buffers corresponding
+to the physical memory layout determined by a schema. The metadata for
+this message provides the location and size of each buffer, permitting
+Array data structures to be reconstructed using pointer arithmetic and
+thus no memory copying.
+
+The serialized form of the record batch is the following:
+
+* The ``data header``, defined as the ``RecordBatch`` type in
+  `Message.fbs`_.
+* The ``body``, a flat sequence of memory buffers written end-to-end
+  with appropriate padding to ensure a minimum of 8-byte alignment
+
+The data header contains the following:
+
+* The length and null count for each flattened field in the record
+  batch
+* The memory offset and length of each constituent ``Buffer`` in the
+  record batch's body
+
+Fields and buffers are flattened by a pre-order depth-first traversal
+of the fields in the record batch. For example, let's consider the
+schema ::
+
+    col1: Struct<a: Int32, b: List<item: Int64>, c: Float64>
+    col2: Utf8
+
+The flattened version of this is: ::
+
+    FieldNode 0: Struct name='col1'
+    FieldNode 1: Int32 name='a'
+    FieldNode 2: List name='b'
+    FieldNode 3: Int64 name='item'
+    FieldNode 4: Float64 name='c'
+    FieldNode 5: Utf8 name='col2'
+
+For the buffers produced, we would have the following (refer to the
+table above): ::
+
+    buffer 0: field 0 validity
+    buffer 1: field 1 validity
+    buffer 2: field 1 values
+    buffer 3: field 2 validity
+    buffer 4: field 2 offsets
+    buffer 5: field 3 validity
+    buffer 6: field 3 values
+    buffer 7: field 4 validity
+    buffer 8: field 4 values
+    buffer 9: field 5 validity
+    buffer 10: field 5 offsets
+    buffer 11: field 5 data
+
+The ``Buffer`` Flatbuffers value describes the location and size of a
+piece of memory. Generally these are interpreted relative to the
+**encapsulated message format** defined below.
+
+The ``size`` field of ``Buffer`` is not required to account for padding
+bytes. Since this metadata can be used to communicate in-memory pointer
+addresses between libraries, it is recommended to set ``size`` to the actual
+memory size rather than the padded size.
+
+Byte Order (`Endianness`_)
+---------------------------
+
+The Arrow format is little endian by default.
+
+Serialized Schema metadata has an endianness field indicating
+endianness of RecordBatches. Typically this is the endianness of the
+system where the RecordBatch was generated. The main use case is
+exchanging RecordBatches between systems with the same Endianness.  At
+first we will return an error when trying to read a Schema with an
+endianness that does not match the underlying system. The reference
+implementation is focused on Little Endian and provides tests for
+it. Eventually we may provide automatic conversion via byte swapping.
+
+IPC Streaming Format
+--------------------
+
+We provide a streaming protocol or "format" for record batches. It is
+presented as a sequence of encapsulated messages, each of which
+follows the format above. The schema comes first in the stream, and it
+is the same for all of the record batches that follow. If any fields
+in the schema are dictionary-encoded, one or more ``DictionaryBatch``
+messages will be included. ``DictionaryBatch`` and ``RecordBatch``
+messages may be interleaved, but before any dictionary key is used in
+a ``RecordBatch`` it should be defined in a ``DictionaryBatch``. ::
+
+    <SCHEMA>
+    <DICTIONARY 0>
+    ...
+    <DICTIONARY k - 1>
+    <RECORD BATCH 0>
+    ...
+    <DICTIONARY x DELTA>
+    ...
+    <DICTIONARY y DELTA>
+    ...
+    <RECORD BATCH n - 1>
+    <EOS [optional]: 0xFFFFFFFF 0x00000000>
+
+.. note:: An edge-case for interleaved dictionary and record batches occurs
+   when the record batches contain dictionary encoded arrays that are
+   completely null. In this case, the dictionary for the encoded column might
+   appear after the first record batch.
+
+When a stream reader implementation is reading a stream, after each
+message, it may read the next 8 bytes to determine both if the stream
+continues and the size of the message metadata that follows. Once the
+message flatbuffer is read, you can then read the message body.
+
+The stream writer can signal end-of-stream (EOS) either by writing 8 bytes
+containing the 4-byte continuation indicator (``0xFFFFFFFF``) followed by 0
+metadata length (``0x00000000``) or closing the stream interface. We
+recommend the ".arrows" file extension for the streaming format although
+in many cases these streams will not ever be stored as files.
+
+IPC File Format
+---------------
+
+We define a "file format" supporting random access that is an extension of
+the stream format. The file starts and ends with a magic string ``ARROW1``
+(plus padding). What follows in the file is identical to the stream format.
+At the end of the file, we write a *footer* containing a redundant copy of
+the schema (which is a part of the streaming format) plus memory offsets and
+sizes for each of the data blocks in the file. This enables random access to
+any record batch in the file. See `File.fbs`_ for the precise details of the
+file footer.
+
+Schematically we have: ::
+
+    <magic number "ARROW1">
+    <empty padding bytes [to 8 byte boundary]>
+    <STREAMING FORMAT with EOS>
+    <FOOTER>
+    <FOOTER SIZE: int32>
+    <magic number "ARROW1">
+
+In the file format, there is no requirement that dictionary keys
+should be defined in a ``DictionaryBatch`` before they are used in a
+``RecordBatch``, as long as the keys are defined somewhere in the
+file. Further more, it is invalid to have more than one **non-delta**
+dictionary batch per dictionary ID (i.e. dictionary replacement is not
+supported). Delta dictionaries are applied in the order they appear in
+the file footer. We recommend the ".arrow" extension for files created with
+this format.
+
+Dictionary Messages
+-------------------
+
+Dictionaries are written in the stream and file formats as a sequence of record
+batches, each having a single field. The complete semantic schema for a
+sequence of record batches, therefore, consists of the schema along with all of
+the dictionaries. The dictionary types are found in the schema, so it is
+necessary to read the schema to first determine the dictionary types so that
+the dictionaries can be properly interpreted: ::
+
+    table DictionaryBatch {
+      id: long;
+      data: RecordBatch;
+      isDelta: boolean = false;
+    }
+
+The dictionary ``id`` in the message metadata can be referenced one or more times
+in the schema, so that dictionaries can even be used for multiple fields. See
+the :ref:`dictionary-encoded-layout` section for more about the semantics of
+dictionary-encoded data.
+
+The dictionary ``isDelta`` flag allows existing dictionaries to be
+expanded for future record batch materializations. A dictionary batch
+with ``isDelta`` set indicates that its vector should be concatenated
+with those of any previous batches with the same ``id``. In a stream
+which encodes one column, the list of strings ``["A", "B", "C", "B",
+"D", "C", "E", "A"]``, with a delta dictionary batch could take the
+form: ::
+
+    <SCHEMA>
+    <DICTIONARY 0>
+    (0) "A"
+    (1) "B"
+    (2) "C"
+
+    <RECORD BATCH 0>
+    0
+    1
+    2
+    1
+
+    <DICTIONARY 0 DELTA>
+    (3) "D"
+    (4) "E"
+
+    <RECORD BATCH 1>
+    3
+    2
+    4
+    0
+    EOS
+
+Alternatively, if ``isDelta`` is set to false, then the dictionary
+replaces the existing dictionary for the same ID.  Using the same
+example as above, an alternate encoding could be: ::
+
+
+    <SCHEMA>
+    <DICTIONARY 0>
+    (0) "A"
+    (1) "B"
+    (2) "C"
+
+    <RECORD BATCH 0>
+    0
+    1
+    2
+    1
+
+    <DICTIONARY 0>
+    (0) "A"
+    (1) "C"
+    (2) "D"
+    (3) "E"
+
+    <RECORD BATCH 1>
+    2
+    1
+    3
+    0
+    EOS
+
+
+Custom Application Metadata
+---------------------------
+
+We provide a ``custom_metadata`` field at three levels to provide a
+mechanism for developers to pass application-specific metadata in
+Arrow protocol messages. This includes ``Field``, ``Schema``, and
+``Message``.
+
+The colon symbol ``:`` is to be used as a namespace separator. It can
+be used multiple times in a key.
+
+The ``ARROW`` pattern is a reserved namespace for internal Arrow use
+in the ``custom_metadata`` fields. For example,
+``ARROW:extension:name``.
+
+.. _format_metadata_extension_types:
+
+Extension Types
+---------------
+
+User-defined "extension" types can be defined setting certain
+``KeyValue`` pairs in ``custom_metadata`` in the ``Field`` metadata
+structure. These extension keys are:
+
+* ``'ARROW:extension:name'`` for the string name identifying the
+  custom data type. We recommend that you use a "namespace"-style
+  prefix for extension type names to minimize the possibility of
+  conflicts with multiple Arrow readers and writers in the same
+  application. For example, use ``myorg.name_of_type`` instead of
+  simply ``name_of_type``
+* ``'ARROW:extension:metadata'`` for a serialized representation
+  of the ``ExtensionType`` necessary to reconstruct the custom type
+
+This extension metadata can annotate any of the built-in Arrow logical
+types. The intent is that an implementation that does not support an
+extension type can still handle the underlying data. For example a
+16-byte UUID value could be embedded in ``FixedSizeBinary(16)``, and
+implementations that do not have this extension type can still work
+with the underlying binary values and pass along the
+``custom_metadata`` in subsequent Arrow protocol messages.
+
+Extension types may or may not use the
+``'ARROW:extension:metadata'`` field. Let's consider some example
+extension types:
+
+* ``uuid`` represented as ``FixedSizeBinary(16)`` with empty metadata
+* ``latitude-longitude`` represented as ``struct<latitude: double,
+  longitude: double>``, and empty metadata
+* ``tensor`` (multidimensional array) stored as ``Binary`` values and
+  having serialized metadata indicating the data type and shape of
+  each value. This could be JSON like ``{'type': 'int8', 'shape': [4,
+  5]}`` for a 4x5 cell tensor.
+* ``trading-time`` represented as ``Timestamp`` with serialized
+  metadata indicating the market trading calendar the data corresponds
+  to
+
+Implementation guidelines
+=========================
+
+An execution engine (or framework, or UDF executor, or storage engine,
+etc) can implement only a subset of the Arrow spec and/or extend it
+given the following constraints:
+
+Implementing a subset the spec
+------------------------------
+
+* **If only producing (and not consuming) arrow vectors**: Any subset
+  of the vector spec and the corresponding metadata can be implemented.
+* **If consuming and producing vectors**: There is a minimal subset of
+  vectors to be supported.  Production of a subset of vectors and
+  their corresponding metadata is always fine.  Consumption of vectors
+  should at least convert the unsupported input vectors to the
+  supported subset (for example Timestamp.millis to timestamp.micros
+  or int32 to int64).
+
+Extensibility
+-------------
+
+An execution engine implementor can also extend their memory
+representation with their own vectors internally as long as they are
+never exposed. Before sending data to another system expecting Arrow
+data, these custom vectors should be converted to a type that exist in
+the Arrow spec.
+
+.. _Flatbuffers: http://github.com/google/flatbuffers
+.. _Flatbuffers protocol definition files: https://github.com/apache/arrow/tree/master/format
+.. _Schema.fbs: https://github.com/apache/arrow/blob/master/format/Schema.fbs
+.. _Message.fbs: https://github.com/apache/arrow/blob/master/format/Message.fbs
+.. _File.fbs: https://github.com/apache/arrow/blob/master/format/File.fbs
+.. _least-significant bit (LSB) numbering: https://en.wikipedia.org/wiki/Bit_numbering
+.. _Intel performance guide: https://software.intel.com/en-us/articles/practical-intel-avx-optimization-on-2nd-generation-intel-core-processors
+.. _Endianness: https://en.wikipedia.org/wiki/Endianness
+.. _SIMD: https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-introduction-to-the-simd-data-layout-templates
+.. _Parquet: https://parquet.apache.org/documentation/latest/
diff --git a/src/arrow/docs/source/format/Flight.rst b/src/arrow/docs/source/format/Flight.rst
new file mode 100644
index 000000000..c79c56386
--- /dev/null
+++ b/src/arrow/docs/source/format/Flight.rst
@@ -0,0 +1,152 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _flight-rpc:
+
+Arrow Flight RPC
+================
+
+Arrow Flight is an RPC framework for high-performance data services
+based on Arrow data, and is built on top of gRPC_ and the :doc:`IPC
+format <IPC>`.
+
+Flight is organized around streams of Arrow record batches, being
+either downloaded from or uploaded to another service. A set of
+metadata methods offers discovery and introspection of streams, as
+well as the ability to implement application-specific methods.
+
+Methods and message wire formats are defined by Protobuf, enabling
+interoperability with clients that may support gRPC and Arrow
+separately, but not Flight. However, Flight implementations include
+further optimizations to avoid overhead in usage of Protobuf (mostly
+around avoiding excessive memory copies).
+
+.. _gRPC: https://grpc.io/
+
+RPC Methods
+-----------
+
+Flight defines a set of RPC methods for uploading/downloading data,
+retrieving metadata about a data stream, listing available data
+streams, and for implementing application-specific RPC methods. A
+Flight service implements some subset of these methods, while a Flight
+client can call any of these methods. Thus, one Flight client can
+connect to any Flight service and perform basic operations.
+
+Data streams are identified by descriptors, which are either a path or
+an arbitrary binary command. A client that wishes to download the data
+would:
+
+#. Construct or acquire a ``FlightDescriptor`` for the data set they
+   are interested in. A client may know what descriptor they want
+   already, or they may use methods like ``ListFlights`` to discover
+   them.
+#. Call ``GetFlightInfo(FlightDescriptor)`` to get a ``FlightInfo``
+   message containing details on where the data is located (as well as
+   other metadata, like the schema and possibly an estimate of the
+   dataset size).
+
+   Flight does not require that data live on the same server as
+   metadata: this call may list other servers to connect to. The
+   ``FlightInfo`` message includes a ``Ticket``, an opaque binary
+   token that the server uses to identify the exact data set being
+   requested.
+#. Connect to other servers (if needed).
+#. Call ``DoGet(Ticket)`` to get back a stream of Arrow record
+   batches.
+
+To upload data, a client would:
+
+#. Construct or acquire a ``FlightDescriptor``, as before.
+#. Call ``DoPut(FlightData)`` and upload a stream of Arrow record
+   batches. They would also include the ``FlightDescriptor`` with the
+   first message.
+
+See `Protocol Buffer Definitions`_ for full details on the methods and
+messages involved.
+
+Authentication
+--------------
+
+Flight supports application-implemented authentication
+methods. Authentication, if enabled, has two phases: at connection
+time, the client and server can exchange any number of messages. Then,
+the client can provide a token alongside each call, and the server can
+validate that token.
+
+Applications may use any part of this; for instance, they may ignore
+the initial handshake and send an externally acquired token on each
+call, or they may establish trust during the handshake and not
+validate a token for each call. (Note that the latter is not secure if
+you choose to deploy a layer 7 load balancer, as is common with gRPC.)
+
+Error Handling
+--------------
+
+Arrow Flight defines its own set of error codes. The implementation
+differs between languages (e.g. in C++, Unimplemented is a general
+Arrow error status while it's a Flight-specific exception in Java),
+but the following set is exposed:
+
++----------------+-------------------------------------------+
+|Error Code      |Description                                |
++================+===========================================+
+|UNKNOWN         |An unknown error. The default if no other  |
+|                |error applies.                             |
++----------------+-------------------------------------------+
+|INTERNAL        |An error internal to the service           |
+|                |implementation occurred.                   |
++----------------+-------------------------------------------+
+|INVALID_ARGUMENT|The client passed an invalid argument to   |
+|                |the RPC.                                   |
++----------------+-------------------------------------------+
+|TIMED_OUT       |The operation exceeded a timeout or        |
+|                |deadline.                                  |
++----------------+-------------------------------------------+
+|NOT_FOUND       |The requested resource (action, data       |
+|                |stream) was not found.                     |
++----------------+-------------------------------------------+
+|ALREADY_EXISTS  |The resource already exists.               |
++----------------+-------------------------------------------+
+|CANCELLED       |The operation was cancelled (either by the |
+|                |client or the server).                     |
++----------------+-------------------------------------------+
+|UNAUTHENTICATED |The client is not authenticated.           |
++----------------+-------------------------------------------+
+|UNAUTHORIZED    |The client is authenticated, but does not  |
+|                |have permissions for the requested         |
+|                |operation.                                 |
++----------------+-------------------------------------------+
+|UNIMPLEMENTED   |The RPC is not implemented.                |
++----------------+-------------------------------------------+
+|UNAVAILABLE     |The server is not available. May be emitted|
+|                |by the client for connectivity reasons.    |
++----------------+-------------------------------------------+
+
+
+External Resources
+------------------
+
+- https://arrow.apache.org/blog/2018/10/09/0.11.0-release/
+- https://www.slideshare.net/JacquesNadeau5/apache-arrow-flight-overview
+
+Protocol Buffer Definitions
+---------------------------
+
+.. literalinclude:: ../../../format/Flight.proto
+   :language: protobuf
+   :linenos:
diff --git a/src/arrow/docs/source/format/Guidelines.rst b/src/arrow/docs/source/format/Guidelines.rst
new file mode 100644
index 000000000..40624521a
--- /dev/null
+++ b/src/arrow/docs/source/format/Guidelines.rst
@@ -0,0 +1,24 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+:orphan:
+
+Implementation Guidelines
+=========================
+
+The contents of this document have relocated to the main :ref:`Columnar
+Specification <format_columnar>` page.
diff --git a/src/arrow/docs/source/format/IPC.rst b/src/arrow/docs/source/format/IPC.rst
new file mode 100644
index 000000000..65b47f7d7
--- /dev/null
+++ b/src/arrow/docs/source/format/IPC.rst
@@ -0,0 +1,24 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+:orphan:
+
+IPC
+===
+
+The contents of this document have relocated to the main :ref:`Columnar
+Specification <format_columnar>` page.
diff --git a/src/arrow/docs/source/format/Integration.rst b/src/arrow/docs/source/format/Integration.rst
new file mode 100644
index 000000000..22d595e99
--- /dev/null
+++ b/src/arrow/docs/source/format/Integration.rst
@@ -0,0 +1,398 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _format_integration_testing:
+
+Integration Testing
+===================
+
+Our strategy for integration testing between Arrow implementations is:
+
+* Test datasets are specified in a custom human-readable, JSON-based format
+  designed exclusively for Arrow's integration tests
+* Each implementation provides a testing executable capable of converting
+  between the JSON and the binary Arrow file representation
+* The test executable is also capable of validating the contents of a binary
+  file against a corresponding JSON file
+
+Running integration tests
+-------------------------
+
+The integration test data generator and runner are implemented inside
+the :ref:`Archery <archery>` utility.
+
+The integration tests are run using the ``archery integration`` command.
+
+.. code-block:: shell
+
+   archery integration --help
+
+In order to run integration tests, you'll first need to build each component
+you want to include. See the respective developer docs for C++, Java, etc.
+for instructions on building those.
+
+Some languages may require additional build options to enable integration
+testing. For C++, for example, you need to add ``-DARROW_BUILD_INTEGRATION=ON``
+to your cmake command.
+
+Depending on which components you have built, you can enable and add them to
+the archery test run. For example, if you only have the C++ project built, run:
+
+.. code-block:: shell
+
+   archery integration --with-cpp=1
+
+
+For Java, it may look like:
+
+.. code-block:: shell
+
+   VERSION=0.11.0-SNAPSHOT
+   export ARROW_JAVA_INTEGRATION_JAR=$JAVA_DIR/tools/target/arrow-tools-$VERSION-jar-with-dependencies.jar
+   archery integration --with-cpp=1 --with-java=1
+
+To run all tests, including Flight integration tests, do:
+
+.. code-block:: shell
+
+   archery integration --with-all --run-flight
+
+Note that we run these tests in continuous integration, and the CI job uses
+docker-compose. You may also run the docker-compose job locally, or at least
+refer to it if you have questions about how to build other languages or enable
+certain tests.
+
+See :ref:`docker-builds` for more information about the project's
+``docker-compose`` configuration.
+
+JSON test data format
+---------------------
+
+A JSON representation of Arrow columnar data is provided for
+cross-language integration testing purposes.
+This representation is `not canonical <https://lists.apache.org/thread.html/6947fb7666a0f9cc27d9677d2dad0fb5990f9063b7cf3d80af5e270f%40%3Cdev.arrow.apache.org%3E>`_
+but it provides a human-readable way of verifying language implementations.
+
+See `here <https://github.com/apache/arrow/tree/master/docs/source/format/integration_json_examples>`_
+for some examples of this JSON data.
+
+.. can we check in more examples, e.g. from the generated_*.json test files?
+
+The high level structure of a JSON integration test files is as follows:
+
+**Data file** ::
+
+    {
+      "schema": /*Schema*/,
+      "batches": [ /*RecordBatch*/ ],
+      "dictionaries": [ /*DictionaryBatch*/ ],
+    }
+
+All files contain ``schema`` and ``batches``, while ``dictionaries`` is only
+present if there are dictionary type fields in the schema.
+
+**Schema** ::
+
+    {
+      "fields" : [
+        /* Field */
+      ],
+      "metadata" : /* Metadata */
+    }
+
+**Field** ::
+
+    {
+      "name" : "name_of_the_field",
+      "nullable" : /* boolean */,
+      "type" : /* Type */,
+      "children" : [ /* Field */ ],
+      "dictionary": {
+        "id": /* integer */,
+        "indexType": /* Type */,
+        "isOrdered": /* boolean */
+      },
+      "metadata" : /* Metadata */
+    }
+
+The ``dictionary`` attribute is present if and only if the ``Field`` corresponds to a
+dictionary type, and its ``id`` maps onto a column in the ``DictionaryBatch``. In this
+case the ``type`` attribute describes the value type of the dictionary.
+
+For primitive types, ``children`` is an empty array.
+
+**Metadata** ::
+
+    null |
+    [ {
+      "key": /* string */,
+      "value": /* string */
+    } ]
+
+A key-value mapping of custom metadata. It may be omitted or null, in which case it is
+considered equivalent to ``[]`` (no metadata). Duplicated keys are not forbidden here.
+
+**Type**: ::
+
+    {
+      "name" : "null|struct|list|largelist|fixedsizelist|union|int|floatingpoint|utf8|largeutf8|binary|largebinary|fixedsizebinary|bool|decimal|date|time|timestamp|interval|duration|map"
+    }
+
+A ``Type`` will have other fields as defined in
+`Schema.fbs <https://github.com/apache/arrow/tree/master/format/Schema.fbs>`_
+depending on its name.
+
+Int: ::
+
+    {
+      "name" : "int",
+      "bitWidth" : /* integer */,
+      "isSigned" : /* boolean */
+    }
+
+FloatingPoint: ::
+
+    {
+      "name" : "floatingpoint",
+      "precision" : "HALF|SINGLE|DOUBLE"
+    }
+
+FixedSizeBinary: ::
+
+    {
+      "name" : "fixedsizebinary",
+      "byteWidth" : /* byte width */
+    }
+
+Decimal: ::
+
+    {
+      "name" : "decimal",
+      "precision" : /* integer */,
+      "scale" : /* integer */
+    }
+
+Timestamp: ::
+
+    {
+      "name" : "timestamp",
+      "unit" : "$TIME_UNIT",
+      "timezone": "$timezone"
+    }
+
+``$TIME_UNIT`` is one of ``"SECOND|MILLISECOND|MICROSECOND|NANOSECOND"``
+
+"timezone" is an optional string.
+
+Duration: ::
+
+    {
+      "name" : "duration",
+      "unit" : "$TIME_UNIT"
+    }
+
+Date: ::
+
+    {
+      "name" : "date",
+      "unit" : "DAY|MILLISECOND"
+    }
+
+Time: ::
+
+    {
+      "name" : "time",
+      "unit" : "$TIME_UNIT",
+      "bitWidth": /* integer: 32 or 64 */
+    }
+
+Interval: ::
+
+    {
+      "name" : "interval",
+      "unit" : "YEAR_MONTH|DAY_TIME"
+    }
+
+Union: ::
+
+    {
+      "name" : "union",
+      "mode" : "SPARSE|DENSE",
+      "typeIds" : [ /* integer */ ]
+    }
+
+The ``typeIds`` field in ``Union`` are the codes used to denote which member of
+the union is active in each array slot. Note that in general these discriminants are not identical
+to the index of the corresponding child array.
+
+List: ::
+
+    {
+      "name": "list"
+    }
+
+The type that the list is a "list of" will be included in the ``Field``'s
+"children" member, as a single ``Field`` there. For example, for a list of
+``int32``, ::
+
+    {
+      "name": "list_nullable",
+      "type": {
+        "name": "list"
+      },
+      "nullable": true,
+      "children": [
+        {
+          "name": "item",
+          "type": {
+            "name": "int",
+            "isSigned": true,
+            "bitWidth": 32
+          },
+          "nullable": true,
+          "children": []
+        }
+      ]
+    }
+
+FixedSizeList: ::
+
+    {
+      "name": "fixedsizelist",
+      "listSize": /* integer */
+    }
+
+This type likewise comes with a length-1 "children" array.
+
+Struct: ::
+
+    {
+      "name": "struct"
+    }
+
+The ``Field``'s "children" contains an array of ``Fields`` with meaningful
+names and types.
+
+Map: ::
+
+    {
+      "name": "map",
+      "keysSorted": /* boolean */
+    }
+
+The ``Field``'s "children" contains a single ``struct`` field, which itself
+contains 2 children, named "key" and "value".
+
+Null: ::
+
+    {
+      "name": "null"
+    }
+
+Extension types are, as in the IPC format, represented as their underlying
+storage type plus some dedicated field metadata to reconstruct the extension
+type.  For example, assuming a "uuid" extension type backed by a
+FixedSizeBinary(16) storage, here is how a "uuid" field would be represented::
+
+    {
+      "name" : "name_of_the_field",
+      "nullable" : /* boolean */,
+      "type" : {
+         "name" : "fixedsizebinary",
+         "byteWidth" : 16
+      },
+      "children" : [],
+      "metadata" : [
+         {"key": "ARROW:extension:name", "value": "uuid"},
+         {"key": "ARROW:extension:metadata", "value": "uuid-serialized"}
+      ]
+    }
+
+**RecordBatch**::
+
+    {
+      "count": /* integer number of rows */,
+      "columns": [ /* FieldData */ ]
+    }
+
+**DictionaryBatch**::
+
+    {
+      "id": /* integer */,
+      "data": [ /* RecordBatch */ ]
+    }
+
+**FieldData**::
+
+    {
+      "name": "field_name",
+      "count" "field_length",
+      "$BUFFER_TYPE": /* BufferData */
+      ...
+      "$BUFFER_TYPE": /* BufferData */
+      "children": [ /* FieldData */ ]
+    }
+
+The "name" member of a ``Field`` in the ``Schema`` corresponds to the "name"
+of a ``FieldData`` contained in the "columns" of a ``RecordBatch``.
+For nested types (list, struct, etc.), ``Field``'s "children" each have a
+"name" that corresponds to the "name" of a ``FieldData`` inside the
+"children" of that ``FieldData``.
+For ``FieldData`` inside of a ``DictionaryBatch``, the "name" field does not
+correspond to anything.
+
+Here ``$BUFFER_TYPE`` is one of ``VALIDITY``, ``OFFSET`` (for
+variable-length types, such as strings and lists), ``TYPE_ID`` (for unions),
+or ``DATA``.
+
+``BufferData`` is encoded based on the type of buffer:
+
+* ``VALIDITY``: a JSON array of 1 (valid) and 0 (null). Data for  non-nullable
+  ``Field`` still has a ``VALIDITY`` array, even though all values are 1.
+* ``OFFSET``: a JSON array of integers for 32-bit offsets or
+  string-formatted integers for 64-bit offsets
+* ``TYPE_ID``: a JSON array of integers
+* ``DATA``: a JSON array of encoded values
+
+The value encoding for ``DATA`` is different depending on the logical
+type:
+
+* For boolean type: an array of 1 (true) and 0 (false).
+* For integer-based types (including timestamps): an array of JSON numbers.
+* For 64-bit integers: an array of integers formatted as JSON strings,
+  so as to avoid loss of precision.
+* For floating point types: an array of JSON numbers. Values are limited
+  to 3 decimal places to avoid loss of precision.
+* For binary types, an array of uppercase hex-encoded strings, so as
+  to represent arbitrary binary data.
+* For UTF-8 string types, an array of JSON strings.
+
+For "list" and "largelist" types, ``BufferData`` has ``VALIDITY`` and
+``OFFSET``, and the rest of the data is inside "children". These child
+``FieldData`` contain all of the same attributes as non-child data, so in
+the example of a list of ``int32``, the child data has ``VALIDITY`` and
+``DATA``.
+
+For "fixedsizelist", there is no ``OFFSET`` member because the offsets are
+implied by the field's "listSize".
+
+Note that the "count" for these child data may not match the parent "count".
+For example, if a ``RecordBatch`` has 7 rows and contains a ``FixedSizeList``
+of ``listSize`` 4, then the data inside the "children" of that ``FieldData``
+will have count 28.
+
+For "null" type, ``BufferData`` does not contain any buffers.
diff --git a/src/arrow/docs/source/format/Layout.rst b/src/arrow/docs/source/format/Layout.rst
new file mode 100644
index 000000000..4568f31c5
--- /dev/null
+++ b/src/arrow/docs/source/format/Layout.rst
@@ -0,0 +1,24 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+:orphan:
+
+Physical Memory Layout
+======================
+
+The contents of this document have relocated to the main :ref:`Columnar
+Specification <format_columnar>` page.
diff --git a/src/arrow/docs/source/format/Metadata.rst b/src/arrow/docs/source/format/Metadata.rst
new file mode 100644
index 000000000..55045abb0
--- /dev/null
+++ b/src/arrow/docs/source/format/Metadata.rst
@@ -0,0 +1,24 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+:orphan:
+
+Format Metadata
+===============
+
+The contents of this document have relocated to the main :ref:`Columnar
+Specification <format_columnar>` page.
diff --git a/src/arrow/docs/source/format/Other.rst b/src/arrow/docs/source/format/Other.rst
new file mode 100644
index 000000000..9504998d6
--- /dev/null
+++ b/src/arrow/docs/source/format/Other.rst
@@ -0,0 +1,63 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Other Data Structures
+=====================
+
+Our Flatbuffers protocol files have metadata for some other data
+structures defined to allow other kinds of applications to take
+advantage of common interprocess communication machinery. These data
+structures are not considered to be part of the columnar format.
+
+An Arrow columnar implementation is not required to implement these
+types.
+
+Tensor (Multi-dimensional Array)
+--------------------------------
+
+The ``Tensor`` message types provides a way to write a
+multidimensional array of fixed-size values (such as a NumPy ndarray).
+
+When writing a standalone encapsulated tensor message, we use the
+encapsulated IPC format defined in the :ref:`Columnar Specification
+<format_columnar>`, but additionally align the starting offset of the
+tensor body to be a multiple of 64 bytes: ::
+
+    <metadata prefix and metadata>
+    <PADDING>
+    <tensor body>
+
+Sparse Tensor
+-------------
+
+``SparseTensor`` represents a multidimensional array whose elements
+are generally almost all zeros.
+
+When writing a standalone encapsulated sparse tensor message, we use
+the encapsulated IPC format defined in the :ref:`Columnar Specification
+<format_columnar>`, but additionally align the starting offsets of the
+sparse index and the sparse tensor body (if writing to a shared memory
+region) to be multiples of 64 bytes: ::
+
+    <metadata prefix and metadata>
+    <PADDING>
+    <sparse index>
+    <PADDING>
+    <sparse tensor body>
+
+The contents of the sparse tensor index depends on what kind of sparse
+format is used.
diff --git a/src/arrow/docs/source/format/README.md b/src/arrow/docs/source/format/README.md
new file mode 100644
index 000000000..68a2d72b5
--- /dev/null
+++ b/src/arrow/docs/source/format/README.md
@@ -0,0 +1,24 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Apache Arrow Format Documentation
+
+These documents go together with the Flatbuffers and Protocol Buffers
+protocol definition files to provide sufficient detail necessary to
+build a new Arrow implementation.
+\ No newline at end of file
diff --git a/src/arrow/docs/source/format/Versioning.rst b/src/arrow/docs/source/format/Versioning.rst
new file mode 100644
index 000000000..b70656987
--- /dev/null
+++ b/src/arrow/docs/source/format/Versioning.rst
@@ -0,0 +1,70 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Format Versioning and Stability
+===============================
+
+Starting with version 1.0.0, Apache Arrow utilizes
+**two versions** to describe each release of the project:
+the **Format Version** and the **Library Version**. Each Library
+Version has a corresponding Format Version, and multiple versions of
+the library may have the same format version. For example, library
+versions 2.0.0 and 3.0.0 may both track format version 1.0.0.
+
+For library versions prior to 1.0.0, major releases may contain API
+changes. From 1.0.0 onward, we follow `Semantic Versioning
+<https://semver.org/>`_ with regards to communicating API changes. We
+expect most releases to be major library releases.
+
+Backward Compatibility
+----------------------
+
+A newer versioned client library will be able to read any data and
+metadata produced by an older client library.
+
+So long as the **major** format version is not changed, a newer
+library is backward compatible with an older library.
+
+Forward Compatibility
+---------------------
+
+An older client library must be able to either read data generated
+from a new client library or detect that it cannot properly read the
+data.
+
+An increase in the **minor** version of the format version, such as
+1.0.0 to 1.1.0, indicates that 1.1.0 contains new features not
+available in 1.0.0. So long as these features are not used (such as a
+new logical data type), forward compatibility is preserved.
+
+Long-Term Stability
+-------------------
+
+A change in the format major version (e.g. from 1.0.0 to 2.0.0)
+indicates a disruption to these compatibility guarantees in some way.
+We **do not expect** this to be a frequent occurrence.
+This would be an exceptional
+event and, should this come to pass, we would exercise caution in
+ensuring that production applications are not harmed.
+
+Pre-1.0.0 Versions
+------------------
+
+We made no forward or backward compatibility guarantees for
+versions prior to 1.0.0. However, we made every effort to ensure
+that new clients can read serialized data produced by library version
+0.8.0 and onward.
diff --git a/src/arrow/docs/source/format/integration_json_examples/simple.json b/src/arrow/docs/source/format/integration_json_examples/simple.json
new file mode 100644
index 000000000..663472919
--- /dev/null
+++ b/src/arrow/docs/source/format/integration_json_examples/simple.json
@@ -0,0 +1,98 @@
+{
+  "schema": {
+    "fields": [
+      {
+        "name": "foo",
+        "type": {"name": "int", "isSigned": true, "bitWidth": 32},
+        "nullable": true,
+        "children": []
+      },
+      {
+        "name": "bar",
+        "type": {"name": "floatingpoint", "precision": "DOUBLE"},
+        "nullable": true,
+        "children": []
+      },
+      {
+        "name": "baz",
+        "type": {"name": "utf8"},
+        "nullable": true,
+        "children": []
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "foo",
+          "count": 5,
+          "VALIDITY": [1, 0, 1, 1, 1],
+          "DATA": [1, 2, 3, 4, 5]
+        },
+        {
+          "name": "bar",
+          "count": 5,
+          "VALIDITY": [1, 0, 0, 1, 1],
+          "DATA": [1.0, 2.0, 3.0, 4.0, 5.0]
+        },
+        {
+          "name": "baz",
+          "count": 5,
+          "VALIDITY": [1, 0, 0, 1, 1],
+          "OFFSET": [0, 2, 2, 2, 5, 9],
+          "DATA": ["aa", "", "", "bbb", "cccc"]
+        }
+      ]
+    },
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "foo",
+          "count": 5,
+          "VALIDITY": [1, 1, 1, 1, 1],
+          "DATA": [1, 2, 3, 4, 5]
+        },
+        {
+          "name": "bar",
+          "count": 5,
+          "VALIDITY": [1, 1, 1, 1, 1],
+          "DATA": [1.0, 2.0, 3.0, 4.0, 5.0]
+        },
+        {
+          "name": "baz",
+          "count": 5,
+          "VALIDITY": [1, 1, 1, 1, 1],
+          "OFFSET": [0, 2, 3, 4, 7, 11],
+          "DATA": ["aa", "b", "c", "ddd", "eeee"]
+        }
+      ]
+    },
+    {
+      "count": 5,
+      "columns": [
+        {
+          "name": "foo",
+          "count": 5,
+          "VALIDITY": [0, 0, 0, 0, 0],
+          "DATA": [1, 2, 3, 4, 5]
+        },
+        {
+          "name": "bar",
+          "count": 5,
+          "VALIDITY": [0, 0, 0, 0, 0],
+          "DATA": [1.0, 2.0, 3.0, 4.0, 5.0]
+        },
+        {
+          "name": "baz",
+          "count": 5,
+          "VALIDITY": [0, 0, 0, 0, 0],
+          "OFFSET": [0, 0, 0, 0, 0, 0],
+          "DATA": ["", "", "", "", ""]
+        }
+      ]
+    }
+  ]
+}
diff --git a/src/arrow/docs/source/format/integration_json_examples/struct.json b/src/arrow/docs/source/format/integration_json_examples/struct.json
new file mode 100644
index 000000000..4e6cc774e
--- /dev/null
+++ b/src/arrow/docs/source/format/integration_json_examples/struct.json
@@ -0,0 +1,201 @@
+{
+  "schema": {
+    "fields": [
+      {
+        "name": "struct_nullable",
+        "type": {
+          "name": "struct"
+        },
+        "nullable": true,
+        "children": [
+          {
+            "name": "f1",
+            "type": {
+              "name": "int",
+              "isSigned": true,
+              "bitWidth": 32
+            },
+            "nullable": true,
+            "children": []
+          },
+          {
+            "name": "f2",
+            "type": {
+              "name": "utf8"
+            },
+            "nullable": true,
+            "children": []
+          }
+        ]
+      }
+    ]
+  },
+  "batches": [
+    {
+      "count": 7,
+      "columns": [
+        {
+          "name": "struct_nullable",
+          "count": 7,
+          "VALIDITY": [
+            0,
+            1,
+            1,
+            1,
+            0,
+            1,
+            0
+          ],
+          "children": [
+            {
+              "name": "f1",
+              "count": 7,
+              "VALIDITY": [
+                1,
+                0,
+                1,
+                1,
+                1,
+                0,
+                0
+              ],
+              "DATA": [
+                1402032511,
+                290876774,
+                137773603,
+                410361374,
+                1959836418,
+                1995074679,
+                -163525262
+              ]
+            },
+            {
+              "name": "f2",
+              "count": 7,
+              "VALIDITY": [
+                0,
+                1,
+                1,
+                1,
+                0,
+                1,
+                0
+              ],
+              "OFFSET": [
+                0,
+                0,
+                7,
+                14,
+                21,
+                21,
+                28,
+                28
+              ],
+              "DATA": [
+                "",
+                "MhRNxD4",
+                "3F9HBxK",
+                "aVd88fp",
+                "",
+                "3loZrRf",
+                ""
+              ]
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "count": 10,
+      "columns": [
+        {
+          "name": "struct_nullable",
+          "count": 10,
+          "VALIDITY": [
+            0,
+            1,
+            1,
+            0,
+            1,
+            0,
+            0,
+            1,
+            1,
+            1
+          ],
+          "children": [
+            {
+              "name": "f1",
+              "count": 10,
+              "VALIDITY": [
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                1,
+                0,
+                0,
+                0
+              ],
+              "DATA": [
+                -2041500147,
+                1715692943,
+                -35444996,
+                1425496657,
+                112765084,
+                1760754983,
+                413888857,
+                2039738337,
+                -1924327700,
+                670528518
+              ]
+            },
+            {
+              "name": "f2",
+              "count": 10,
+              "VALIDITY": [
+                1,
+                0,
+                0,
+                1,
+                1,
+                1,
+                1,
+                1,
+                1,
+                0
+              ],
+              "OFFSET": [
+                0,
+                7,
+                7,
+                7,
+                14,
+                21,
+                28,
+                35,
+                42,
+                49,
+                49
+              ],
+              "DATA": [
+                "AS5oARE",
+                "",
+                "",
+                "JGdagcX",
+                "78SLiRw",
+                "vbGf7OY",
+                "5uh5fTs",
+                "0ilsf82",
+                "LjS9MbU",
+                ""
+              ]
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
diff --git a/src/arrow/docs/source/index.rst b/src/arrow/docs/source/index.rst
new file mode 100644
index 000000000..90d6ac09b
--- /dev/null
+++ b/src/arrow/docs/source/index.rst
@@ -0,0 +1,96 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Apache Arrow
+============
+
+Apache Arrow is a development platform for in-memory analytics. It contains a
+set of technologies that enable big data systems to process and move data
+fast. It specifies a standardized language-independent columnar memory format
+for flat and hierarchical data, organized for efficient analytic operations on
+modern hardware.
+
+The project is developing a multi-language collection of libraries for solving
+systems problems related to in-memory analytical data processing. This includes
+such topics as:
+
+* Zero-copy shared memory and RPC-based data movement
+* Reading and writing file formats (like CSV, Apache ORC, and Apache Parquet)
+* In-memory analytics and query processing
+
+**To learn how to use Arrow refer to the documentation specific to your
+target environment.**
+
+.. _toc.usage:
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Supported Environments
+
+   C/GLib <c_glib/index>
+   C++ <cpp/index>
+   C# <https://github.com/apache/arrow/blob/master/csharp/README.md>
+   Go <https://godoc.org/github.com/apache/arrow/go/arrow>
+   Java <java/index>
+   JavaScript <js/index>
+   Julia <https://github.com/apache/arrow/blob/master/julia/Arrow/README.md>
+   MATLAB <https://github.com/apache/arrow/blob/master/matlab/README.md>
+   Python <python/index>
+   R <r/index>
+   Ruby <https://github.com/apache/arrow/blob/master/ruby/README.md>
+   Rust <https://docs.rs/crate/arrow/>
+   status
+
+.. _toc.cookbook:
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Cookbooks
+
+   C++ <https://arrow.apache.org/cookbook/cpp/>
+   Python <https://arrow.apache.org/cookbook/py/>
+   R <https://arrow.apache.org/cookbook/r/>
+
+.. _toc.columnar:
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Specifications and Protocols
+
+   format/Versioning
+   format/Columnar
+   format/Flight
+   format/Integration
+   format/CDataInterface
+   format/CStreamInterface
+   format/Other   
+
+.. _toc.development:
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Development
+
+   developers/contributing
+   developers/cpp/index
+   developers/python
+   developers/archery
+   developers/crossbow
+   developers/docker
+   developers/benchmarks
+   developers/documentation
+   developers/computeir
diff --git a/src/arrow/docs/source/java/algorithm.rst b/src/arrow/docs/source/java/algorithm.rst
new file mode 100644
index 000000000..f838398af
--- /dev/null
+++ b/src/arrow/docs/source/java/algorithm.rst
@@ -0,0 +1,92 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Java Algorithms
+===============
+
+Arrow's Java library provides algorithms for some commonly-used
+functionalities. The algorithms are provided in the ``org.apache.arrow.algorithm``
+package of the ``algorithm`` module. 
+
+Comparing Vector Elements
+-------------------------
+
+Comparing vector elements is the basic for many algorithms. Vector 
+elements can be compared in one of the two ways:
+
+1. **Equality comparison**: there are two possible results for this type of comparisons: ``equal`` and ``unequal``.
+Currently, this type of comparison is supported through the ``org.apache.arrow.vector.compare.VectorValueEqualizer``
+interface.
+
+2. **Ordering comparison**: there are three possible results for this type of comparisons: ``less than``, ``equal to ``
+and ``greater than``. This comparison is supported by the abstract class ``org.apache.arrow.algorithm.sort.VectorValueComparator``.
+
+We provide default implementations to compare vector elements. However, users can also define ways
+for customized comparisons. 
+
+Vector Element Search
+---------------------
+
+A search algorithm tries to find a particular value in a vector. When successful, a vector index is 
+returned; otherwise, a ``-1`` is returned. The following search algorithms are provided:
+
+1. **Linear search**: this algorithm simply traverses the vector from the beginning, until a match is 
+found, or the end of the vector is reached. So it takes ``O(n)`` time, where ``n`` is the number of elements
+in the vector.  This algorithm is implemented in ``org.apache.arrow.algorithm.search.VectorSearcher#linearSearch``.
+
+2. **Binary search**: this represents a more efficient search algorithm, as it runs in ``O(log(n))`` time. 
+However, it is only applicable to sorted vectors. To get a sorted vector,
+one can use one of our sorting algorithms, which will be discussed in the next section. This algorithm
+is implemented in ``org.apache.arrow.algorithm.search.VectorSearcher#binarySearch``.
+
+3. **Parallel search**: when the vector is large, it takes a long time to traverse the elements to search
+for a value. To make this process faster, one can split the vector into multiple partitions, and perform the 
+search for each partition in parallel. This is supported by ``org.apache.arrow.algorithm.search.ParallelSearcher``.
+
+4. **Range search**: for many scenarios, there can be multiple matching values in the vector. 
+If the vector is sorted, the matching values reside in a contiguous region in the vector. The
+range search algorithm tries to find the upper/lower bound of the region in ``O(log(n))`` time. 
+An implementation is provided in ``org.apache.arrow.algorithm.search.VectorRangeSearcher``.
+
+Vector Sorting
+--------------
+
+Given a vector, a sorting algorithm turns it into a sorted one. The sorting criteria must
+be specified by some ordering comparison operation. The sorting algorithms can be
+classified into the following categories:
+
+1. **In-place sorter**: an in-place sorter performs the sorting by manipulating the original
+vector, without creating any new vector. So it just returns the original vector after the sorting operations.
+Currently, we have ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter`` for in-place
+sorting in ``O(nlog(n))`` time. As the name suggests, it only supports fixed width vectors. 
+
+2. **Out-of-place sorter**: an out-of-place sorter does not mutate the original vector. Instead,
+it copies vector elements to a new vector in sorted order, and returns the new vector.
+We have ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.FixedWidthOutOfPlaceVectorSorter`` 
+and ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.VariableWidthOutOfPlaceVectorSorter``
+for fixed width and variable width vectors, respectively. Both algorithms run in ``O(nlog(n))`` time. 
+
+3. **Index sorter**: this sorter does not actually sort the vector. Instead, it returns an integer
+vector, which correspond to indices of vector elements in sorted order. With the index vector, one can
+easily construct a sorted vector. In addition, some other tasks can be easily achieved, like finding the ``k``th
+smallest value in the vector. Index sorting is supported by ``org.apache.arrow.algorithm.sort.IndexSorter``, 
+which runs in ``O(nlog(n))`` time. It is applicable to vectors of any type. 
+
+Other Algorithms
+----------------
+
+Other algorithms include vector deduplication, dictionary encoding, etc., in the ``algorithm`` module.
diff --git a/src/arrow/docs/source/java/index.rst b/src/arrow/docs/source/java/index.rst
new file mode 100644
index 000000000..65a7a3a4f
--- /dev/null
+++ b/src/arrow/docs/source/java/index.rst
@@ -0,0 +1,31 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Java Implementation
+===================
+
+This is the documentation of the Java API of Apache Arrow. For more details
+on the Arrow format and other language bindings see the :doc:`parent documentation <../index>`.
+
+.. toctree::
+   :maxdepth: 2
+
+   vector
+   vector_schema_root
+   ipc
+   algorithm
+   Reference (javadoc) <reference/index>
diff --git a/src/arrow/docs/source/java/ipc.rst b/src/arrow/docs/source/java/ipc.rst
new file mode 100644
index 000000000..7cab480c4
--- /dev/null
+++ b/src/arrow/docs/source/java/ipc.rst
@@ -0,0 +1,187 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+===========================
+Reading/Writing IPC formats
+===========================
+Arrow defines two types of binary formats for serializing record batches:
+
+* **Streaming format**: for sending an arbitrary number of record
+  batches. The format must be processed from start to end, and does not support
+  random access
+
+* **File or Random Access format**: for serializing a fixed number of record
+  batches. It supports random access, and thus is very useful when used with
+  memory maps
+
+Writing and Reading Streaming Format
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+First, let's populate a :class:`VectorSchemaRoot` with a small batch of records
+
+.. code-block:: Java
+
+    BitVector bitVector = new BitVector("boolean", allocator);
+    VarCharVector varCharVector = new VarCharVector("varchar", allocator);
+    for (int i = 0; i < 10; i++) {
+      bitVector.setSafe(i, i % 2 == 0 ? 0 : 1);
+      varCharVector.setSafe(i, ("test" + i).getBytes(StandardCharsets.UTF_8));
+    }
+    bitVector.setValueCount(10);
+    varCharVector.setValueCount(10);
+
+    List<Field> fields = Arrays.asList(bitVector.getField(), varCharVector.getField());
+    List<FieldVector> vectors = Arrays.asList(bitVector, varCharVector);
+    VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors);
+
+Now, we can begin writing a stream containing some number of these batches. For this we use :class:`ArrowStreamWriter`
+(DictionaryProvider used for any vectors that are dictionary encoded is optional and can be null))::
+
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    ArrowStreamWriter writer = new ArrowStreamWriter(root, /*DictionaryProvider=*/null, Channels.newChannel(out));
+
+
+Here we used an in-memory stream, but this could have been a socket or some other IO stream. Then we can do
+
+.. code-block:: Java
+
+    writer.start();
+    // write the first batch
+    writer.writeBatch();
+
+    // write another four batches.
+    for (int i = 0; i < 4; i++) {
+      // populate VectorSchemaRoot data and write the second batch
+      BitVector childVector1 = (BitVector)root.getVector(0);
+      VarCharVector childVector2 = (VarCharVector)root.getVector(1);
+      childVector1.reset();
+      childVector2.reset();
+      ... do some populate work here, could be different for each batch
+      writer.writeBatch();
+    }
+
+    // end
+    writer.end();
+
+Note since the :class:`VectorSchemaRoot` in writer is a container that can hold batches, batches flow through
+:class:`VectorSchemaRoot` as part of a pipeline, so we need to populate data before `writeBatch` so that later batches
+could overwrite previous ones.
+
+Now the :class:`ByteArrayOutputStream` contains the complete stream which contains 5 record batches.
+We can read such a stream with :class:`ArrowStreamReader`, note that :class:`VectorSchemaRoot` within
+reader will be loaded with new values on every call to :class:`loadNextBatch()`
+
+.. code-block:: Java
+
+    try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayInputStream(out.toByteArray()), allocator)) {
+      Schema schema = reader.getVectorSchemaRoot().getSchema();
+      for (int i = 0; i < 5; i++) {
+        // This will be loaded with new values on every call to loadNextBatch
+        VectorSchemaRoot readBatch = reader.getVectorSchemaRoot();
+        reader.loadNextBatch();
+        ... do something with readBatch
+      }
+
+    }
+
+Here we also give a simple example with dictionary encoded vectors
+
+.. code-block:: Java
+
+    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+    // create dictionary and provider
+    final VarCharVector dictVector = new VarCharVector("dict", allocator);
+    dictVector.allocateNewSafe();
+    dictVector.setSafe(0, "aa".getBytes());
+    dictVector.setSafe(1, "bb".getBytes());
+    dictVector.setSafe(2, "cc".getBytes());
+    dictVector.setValueCount(3);
+
+    Dictionary dictionary =
+        new Dictionary(dictVector, new DictionaryEncoding(1L, false, /*indexType=*/null));
+    provider.put(dictionary);
+
+    // create vector and encode it
+    final VarCharVector vector = new VarCharVector("vector", allocator);
+    vector.allocateNewSafe();
+    vector.setSafe(0, "bb".getBytes());
+    vector.setSafe(1, "bb".getBytes());
+    vector.setSafe(2, "cc".getBytes());
+    vector.setSafe(3, "aa".getBytes());
+    vector.setValueCount(4);
+
+    // get the encoded vector
+    IntVector encodedVector = (IntVector) DictionaryEncoder.encode(vector, dictionary);
+
+    // create VectorSchemaRoot
+    List<Field> fields = Arrays.asList(encodedVector.getField());
+    List<FieldVector> vectors = Arrays.asList(encodedVector);
+    VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors);
+
+    // write data
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    ArrowStreamWriter writer = new ArrowStreamWriter(root, provider, Channels.newChannel(out));
+    writer.start();
+    writer.writeBatch();
+    writer.end();
+
+    // read data
+    try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayInputStream(out.toByteArray()), allocator)) {
+      reader.loadNextBatch();
+      VectorSchemaRoot readRoot = reader.getVectorSchemaRoot();
+      // get the encoded vector
+      IntVector intVector = (IntVector) readRoot.getVector(0);
+
+      // get dictionaries and decode the vector
+      Map<Long, Dictionary> dictionaryMap = reader.getDictionaryVectors();
+      long dictionaryId = intVector.getField().getDictionary().getId();
+      VarCharVector varCharVector =
+          (VarCharVector) DictionaryEncoder.decode(intVector, dictionaryMap.get(dictionaryId));
+
+    }
+
+Writing and Reading Random Access Files
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The :class:`ArrowFileWriter` has the same API as :class:`ArrowStreamWriter`
+
+.. code-block:: Java
+
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    ArrowFileWriter writer = new ArrowFileWriter(root, null, Channels.newChannel(out));
+    writer.start();
+    // write the first batch
+    writer.writeBatch();
+    // write another four batches.
+    for (int i = 0; i < 4; i++) {
+      ... do populate work
+      writer.writeBatch();
+    }
+    writer.end();
+
+The difference between :class:`ArrowFileReader` and :class:`ArrowStreamReader` is that the input source
+must have a ``seek`` method for random access. Because we have access to the entire payload, we know the
+number of record batches in the file, and can read any at random
+
+.. code-block:: Java
+
+    try (ArrowFileReader reader = new ArrowFileReader(
+        new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator)) {
+
+      // read the 4-th batch
+      ArrowBlock block = reader.getRecordBlocks().get(3);
+      reader.loadRecordBatch(block);
+      VectorSchemaRoot readBatch = reader.getVectorSchemaRoot();
+    }
diff --git a/src/arrow/docs/source/java/reference/index.rst b/src/arrow/docs/source/java/reference/index.rst
new file mode 100644
index 000000000..523ac0c7f
--- /dev/null
+++ b/src/arrow/docs/source/java/reference/index.rst
@@ -0,0 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Java Reference (javadoc)
+========================
+
+Stub page for the Java reference docs; actual source is located in the java/ directory.
diff --git a/src/arrow/docs/source/java/vector.rst b/src/arrow/docs/source/java/vector.rst
new file mode 100644
index 000000000..ece07d0a7
--- /dev/null
+++ b/src/arrow/docs/source/java/vector.rst
@@ -0,0 +1,288 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+===========
+ValueVector
+===========
+
+:class:`ValueVector` interface (which called Array in C++ implementation and
+the :doc:`the specification <../format/Columnar>`) is an abstraction that is used to store a
+sequence of values having the same type in an individual column. Internally, those values are
+represented by one or several buffers, the number and meaning of which depend on the vector’s data type.
+
+There are concrete subclasses of :class:`ValueVector` for each primitive data type
+and nested type described in the specification. There are a few differences in naming
+with the type names described in the specification:
+Table with non-intuitive names (BigInt = 64 bit integer, etc).
+
+It is important that vector is allocated before attempting to read or write,
+:class:`ValueVector` "should" strive to guarantee this order of operation:
+create > allocate > mutate > set value count > access > clear (or allocate to start the process over).
+We will go through a concrete example to demonstrate each operation in the next section.
+
+Vector Life Cycle
+=================
+
+As discussed above, each vector goes through several steps in its life cycle,
+and each step is triggered by a vector operation. In particular, we have the following vector operations:
+
+1. **Vector creation**: we create a new vector object by, for example, the vector constructor.
+The following code creates a new ``IntVector`` by the constructor:
+
+.. code-block:: Java
+
+    RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+    ...
+    IntVector vector = new IntVector("int vector", allocator);
+
+By now, a vector object is created. However, no underlying memory has been allocated, so we need the
+following step.
+
+2. **Vector allocation**: in this step, we allocate memory for the vector. For most vectors, we
+have two options: 1) if we know the maximum vector capacity, we can specify it by calling the
+``allocateNew(int)`` method; 2) otherwise, we should call the ``allocateNew()`` method, and  a default
+capacity will be allocated for it. For our running example, we assume that the vector capacity never
+exceeds 10:
+
+.. code-block:: Java
+
+    vector.allocateNew(10);
+
+3. **Vector mutation**: now we can populate the vector with values we desire. For all vectors, we can populate
+vector values through vector writers (An example will be given in the next section). For primitive types,
+we can also mutate the vector by the set methods. There are two classes of set methods: 1) if we can
+be sure the vector has enough capacity, we can call the ``set(index, value)`` method. 2) if we are not sure
+about the vector capacity, we should call the ``setSafe(index, value)`` method, which will automatically
+take care of vector reallocation, if the capacity is not sufficient. For our running example, we know the
+vector has enough capacity, so we can call
+
+.. code-block:: Java
+
+    vector.set(/*index*/5, /*value*/25);
+
+4. **Set value count**: for this step, we set the value count of the vector by calling the
+``setValueCount(int)`` method:
+
+.. code-block:: Java
+
+    vector.setValueCount(10);
+
+After this step, the vector enters an immutable state. In other words, we should no longer mutate it.
+(Unless we reuse the vector by allocating it again. This will be discussed shortly.)
+
+5. **Vector access**: it is time to access vector values. Similarly, we have two options to access values:
+1) get methods and 2) vector reader. Vector reader works for all types of vectors, while get methods are
+only available for primitive vectors. A concrete example for vector reader will be given in the next section.
+Below is an example of vector access by get method:
+
+.. code-block:: Java
+
+    int value = vector.get(5);  // value == 25
+
+6. **Vector clear**: when we are done with the vector, we should clear it to release its memory. This is done by
+calling the ``close()`` method:
+
+.. code-block:: Java
+
+    vector.close();
+
+Some points to note about the steps above:
+
+* The steps are not necessarily performed in a linear sequence. Instead, they can be in a loop. For example,
+  when a vector enters the access step, we can also go back to the vector mutation step, and then set value
+  count, access vector, and so on.
+
+* We should try to make sure the above steps are carried out in order. Otherwise, the vector
+  may be in an undefined state, and some unexpected behavior may occur. However, this restriction
+  is not strict. That means it is possible that we violates the order above, but still get
+  correct results.
+
+* When mutating vector values through set methods, we should prefer ``set(index, value)`` methods to
+  ``setSafe(index, value)`` methods whenever possible, to avoid unnecessary performance overhead of handling
+  vector capacity.
+
+* All vectors implement the ``AutoCloseable`` interface. So they must be closed explicitly when they are
+  no longer used, to avoid resource leak. To make sure of this, it is recommended to place vector related operations
+  into a try-with-resources block.
+
+* For fixed width vectors (e.g. IntVector), we can set values at different indices in arbitrary orders.
+  For variable width vectors (e.g. VarCharVector), however, we must set values in non-decreasing order of the
+  indices. Otherwise, the values after the set position will become invalid. For example, suppose we use the
+  following statements to populate a variable width vector:
+
+.. code-block:: Java
+
+    VarCharVector vector = new VarCharVector("vector", allocator);
+    vector.allocateNew();
+    vector.setSafe(0, "zero");
+    vector.setSafe(1, "one");
+    ...
+    vector.setSafe(9, "nine");
+
+Then we set the value at position 5 again:
+
+.. code-block:: Java
+
+    vector.setSafe(5, "5");
+
+After that, the values at positions 6, 7, 8, and 9 of the vector will become invalid.
+
+Building ValueVector
+====================
+
+Note that the current implementation doesn't enforce the rule that Arrow objects are immutable.
+:class:`ValueVector` instances could be created directly by using new keyword, there are
+set/setSafe APIs and concrete subclasses of FieldWriter for populating values.
+
+For example, the code below shows how to build a :class:`BigIntVector`, in this case, we build a
+vector of the range 0 to 7 where the element that should hold the fourth value is nulled
+
+.. code-block:: Java
+
+    try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+      BigIntVector vector = new BigIntVector("vector", allocator)) {
+      vector.allocateNew(8);
+      vector.set(0, 1);
+      vector.set(1, 2);
+      vector.set(2, 3);
+      vector.setNull(3);
+      vector.set(4, 5);
+      vector.set(5, 6);
+      vector.set(6, 7);
+      vector.set(7, 8);
+      vector.setValueCount(8); // this will finalizes the vector by convention.
+      ...
+    }
+
+The :class:`BigIntVector` holds two ArrowBufs. The first buffer holds the null bitmap, which consists
+here of a single byte with the bits 1|1|1|1|0|1|1|1 (the bit is 1 if the value is non-null).
+The second buffer contains all the above values. As the fourth entry is null, the value at that position
+in the buffer is undefined. Note compared with set API, setSafe API would check value capacity before setting
+values and reallocate buffers if necessary.
+
+Here is how to build a vector using writer
+
+.. code-block:: Java
+
+    try (BigIntVector vector = new BigIntVector("vector", allocator);
+      BigIntWriter writer = new BigIntWriterImpl(vector)) {
+      writer.setPosition(0);
+      writer.writeBigInt(1);
+      writer.setPosition(1);
+      writer.writeBigInt(2);
+      writer.setPosition(2);
+      writer.writeBigInt(3);
+      // writer.setPosition(3) is not called which means the forth value is null.
+      writer.setPosition(4);
+      writer.writeBigInt(5);
+      writer.setPosition(5);
+      writer.writeBigInt(6);
+      writer.setPosition(6);
+      writer.writeBigInt(7);
+      writer.setPosition(7);
+      writer.writeBigInt(8);
+    }
+
+There are get API and concrete subclasses of :class:`FieldReader` for accessing vector values, what needs
+to be declared is that writer/reader is not as efficient as direct access
+
+.. code-block:: Java
+
+    // access via get API
+    for (int i = 0; i < vector.getValueCount(); i++) {
+      if (!vector.isNull(i)) {
+        System.out.println(vector.get(i));
+      }
+    }
+
+    // access via reader
+    BigIntReader reader = vector.getReader();
+    for (int i = 0; i < vector.getValueCount(); i++) {
+      reader.setPosition(i);
+      if (reader.isSet()) {
+        System.out.println(reader.readLong());
+      }
+    }
+
+Building ListVector
+===================
+
+A :class:`ListVector` is a vector that holds a list of values for each index. Working with one you need to handle the same steps as mentioned above (create > allocate > mutate > set value count > access > clear), but the details of how you accomplish this are slightly different since you need to both create the vector and set the list of values for each index.
+
+For example, the code below shows how to build a :class:`ListVector` of int's using the writer :class:`UnionListWriter`. We build a vector from 0 to 9 and each index contains a list with values [[0, 0, 0, 0, 0], [0, 1, 2, 3, 4], [0, 2, 4, 6, 8], …, [0, 9, 18, 27, 36]]. List values can be added in any order so writing a list such as [3, 1, 2] would be just as valid.
+
+.. code-block:: Java
+  
+  try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+    ListVector listVector = ListVector.empty("vector", allocator)) {
+    UnionListWriter writer = listVector.getWriter();
+    for (int i = 0; i < 10; i++) {
+       writer.startList();
+       writer.setPosition(i);
+       for (int j = 0; j < 5; j++) {
+           writer.writeInt(j * i);
+       }
+       writer.setValueCount(5);
+       writer.endList();
+    }
+    listVector.setValueCount(10);
+  }    
+
+:class:`ListVector` values can be accessed either through the get API or through the reader class :class:`UnionListReader`. To read all the values, first enumerate through the indexes, and then enumerate through the inner list values.
+
+.. code-block:: Java
+
+  // access via get API
+  for (int i = 0; i < listVector.getValueCount(); i++) {
+     if (!listVector.isNull(i)) {
+         ArrayList<Integer> elements = (ArrayList<Integer>) listVector.getObject(i);
+         for (Integer element : elements) {
+             System.out.println(element);
+         }
+     }
+  }
+
+  // access via reader
+  UnionListReader reader = listVector.getReader();
+  for (int i = 0; i < listVector.getValueCount(); i++) {
+     reader.setPosition(i);
+     while (reader.next()) {
+         IntReader intReader = reader.reader();
+         if (intReader.isSet()) {
+             System.out.println(intReader.readInteger());
+         }
+     }
+  }
+
+Slicing
+=======
+
+Similar with C++ implementation, it is possible to make zero-copy slices of vectors to obtain a vector
+referring to some logical sub-sequence of the data through :class:`TransferPair`
+
+.. code-block:: Java
+
+    IntVector vector = new IntVector("intVector", allocator);
+    for (int i = 0; i < 10; i++) {
+      vector.setSafe(i, i);
+    }
+    vector.setValueCount(10);
+
+    TransferPair tp = vector.getTransferPair(allocator);
+    tp.splitAndTransfer(0, 5);
+    IntVector sliced = (IntVector) tp.getTo();
+    // In this case, the vector values are [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] and the sliceVector values are [0, 1, 2, 3, 4].
diff --git a/src/arrow/docs/source/java/vector_schema_root.rst b/src/arrow/docs/source/java/vector_schema_root.rst
new file mode 100644
index 000000000..7f787d9d5
--- /dev/null
+++ b/src/arrow/docs/source/java/vector_schema_root.rst
@@ -0,0 +1,74 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+================
+VectorSchemaRoot
+================
+A :class:`VectorSchemaRoot` is a container that can hold batches, batches flow through :class:`VectorSchemaRoot`
+as part of a pipeline. Note this is different from other implementations (i.e. in C++ and Python,
+a :class:`RecordBatch` is a collection of equal-length vector instances and was created each time for a new batch).
+
+The recommended usage for :class:`VectorSchemaRoot` is creating a single :class:`VectorSchemaRoot`
+based on the known schema and populated data over and over into the same VectorSchemaRoot in a stream
+of batches rather than creating a new :class:`VectorSchemaRoot` instance each time
+(see `Numba <https://github.com/apache/arrow/tree/master/java/flight/src/main/java/org/apache/arrow/flight>`_ or
+``ArrowFileWriter`` for better understanding). Thus at any one point a VectorSchemaRoot may have data or
+may have no data (say it was transferred downstream or not yet populated).
+
+
+Here is the example of building a :class:`VectorSchemaRoot`
+
+.. code-block:: Java
+
+    BitVector bitVector = new BitVector("boolean", allocator);
+    VarCharVector varCharVector = new VarCharVector("varchar", allocator);
+    bitVector.allocateNew();
+    varCharVector.allocateNew();
+    for (int i = 0; i < 10; i++) {
+      bitVector.setSafe(i, i % 2 == 0 ? 0 : 1);
+      varCharVector.setSafe(i, ("test" + i).getBytes(StandardCharsets.UTF_8));
+    }
+    bitVector.setValueCount(10);
+    varCharVector.setValueCount(10);
+
+    List<Field> fields = Arrays.asList(bitVector.getField(), varCharVector.getField());
+    List<FieldVector> vectors = Arrays.asList(bitVector, varCharVector);
+    VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(fields, vectors);
+
+The vectors within a :class:`VectorSchemaRoot` could be loaded/unloaded via :class:`VectorLoader` and :class:`VectorUnloader`.
+:class:`VectorLoader` and :class:`VectorUnloader` handles converting between :class:`VectorSchemaRoot` and :class:`ArrowRecordBatch`(
+representation of a RecordBatch :doc:`IPC <../format/IPC.rst>` message). Examples as below
+
+.. code-block:: Java
+
+    // create a VectorSchemaRoot root1 and convert its data into recordBatch
+    VectorSchemaRoot root1 = new VectorSchemaRoot(fields, vectors);
+    VectorUnloader unloader = new VectorUnloader(root1);
+    ArrowRecordBatch recordBatch = unloader.getRecordBatch();
+
+    // create a VectorSchemaRoot root2 and load the recordBatch
+    VectorSchemaRoot root2 = VectorSchemaRoot.create(root1.getSchema(), allocator);
+    VectorLoader loader = new VectorLoader(root2);
+    loader.load(recordBatch);
+
+A new :class:`VectorSchemaRoot` could be sliced from an existing instance with zero-copy
+
+.. code-block:: Java
+
+    // 0 indicates start index (inclusive) and 5 indicated length (exclusive).
+    VectorSchemaRoot newRoot = vectorSchemaRoot.slice(0, 5);
+
diff --git a/src/arrow/docs/source/js/index.rst b/src/arrow/docs/source/js/index.rst
new file mode 100644
index 000000000..77813c137
--- /dev/null
+++ b/src/arrow/docs/source/js/index.rst
@@ -0,0 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+JavaScript docs
+===============
+
+Stub page for the JavaScript docs; actual source is located in js/ sub-directory.
diff --git a/src/arrow/docs/source/python/api.rst b/src/arrow/docs/source/python/api.rst
new file mode 100644
index 000000000..12cf4e068
--- /dev/null
+++ b/src/arrow/docs/source/python/api.rst
@@ -0,0 +1,40 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _api:
+
+*************
+API Reference
+*************
+
+.. toctree::
+   :maxdepth: 2
+
+   api/datatypes
+   api/arrays
+   api/memory
+   api/compute
+   api/files
+   api/tables
+   api/ipc
+   api/flight
+   api/formats
+   api/filesystems
+   api/dataset
+   api/plasma
+   api/cuda
+   api/misc
diff --git a/src/arrow/docs/source/python/api/arrays.rst b/src/arrow/docs/source/python/api/arrays.rst
new file mode 100644
index 000000000..dbc4c0bd1
--- /dev/null
+++ b/src/arrow/docs/source/python/api/arrays.rst
@@ -0,0 +1,127 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _api.array:
+.. currentmodule:: pyarrow
+
+Arrays and Scalars
+==================
+
+Factory Functions
+-----------------
+
+These functions create new Arrow arrays:
+
+.. autosummary::
+   :toctree: ../generated/
+
+   array
+   nulls
+
+Array Types
+-----------
+
+An array's Python class depends on its data type.  Concrete array classes
+may expose data type-specific methods or properties.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   Array
+   BooleanArray
+   FloatingPointArray
+   IntegerArray
+   Int8Array
+   Int16Array
+   Int32Array
+   Int64Array
+   NullArray
+   NumericArray
+   UInt8Array
+   UInt16Array
+   UInt32Array
+   UInt64Array
+   BinaryArray
+   StringArray
+   FixedSizeBinaryArray
+   LargeBinaryArray
+   LargeStringArray
+   Time32Array
+   Time64Array
+   Date32Array
+   Date64Array
+   TimestampArray
+   DurationArray
+   MonthDayNanoIntervalArray
+   Decimal128Array
+   DictionaryArray
+   ListArray
+   FixedSizeListArray
+   LargeListArray
+   StructArray
+   UnionArray
+   ExtensionArray
+
+.. _api.scalar:
+
+Scalars
+-------
+
+This function constructs a new Arrow scalar:
+
+.. autosummary::
+   :toctree: ../generated/
+
+   scalar
+
+A scalar's python class depends on its data type.  Concrete scalar
+classes may expose data type-specific methods or properties.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   NA
+   Scalar
+   BooleanScalar
+   Int8Scalar
+   Int16Scalar
+   Int32Scalar
+   Int64Scalar
+   UInt8Scalar
+   UInt16Scalar
+   UInt32Scalar
+   UInt64Scalar
+   FloatScalar
+   DoubleScalar
+   BinaryScalar
+   StringScalar
+   FixedSizeBinaryScalar
+   LargeBinaryScalar
+   LargeStringScalar
+   Time32Scalar
+   Time64Scalar
+   Date32Scalar
+   Date64Scalar
+   TimestampScalar
+   DurationScalar
+   MonthDayNanoIntervalScalar
+   Decimal128Scalar
+   DictionaryScalar
+   ListScalar
+   LargeListScalar
+   StructScalar
+   UnionScalar
diff --git a/src/arrow/docs/source/python/api/compute.rst b/src/arrow/docs/source/python/api/compute.rst
new file mode 100644
index 000000000..521182f8a
--- /dev/null
+++ b/src/arrow/docs/source/python/api/compute.rst
@@ -0,0 +1,498 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _api.compute:
+.. currentmodule:: pyarrow.compute
+
+Compute Functions
+=================
+
+Aggregations
+------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   all
+   any
+   approximate_median
+   count
+   count_distinct
+   index
+   max
+   mean
+   min
+   min_max
+   mode
+   product
+   quantile
+   stddev
+   sum
+   tdigest
+   variance
+
+Grouped Aggregations
+--------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   hash_all
+   hash_any
+   hash_approximate_median
+   hash_count
+   hash_count_distinct
+   hash_distinct
+   hash_max
+   hash_mean
+   hash_min
+   hash_min_max
+   hash_product
+   hash_stddev
+   hash_sum
+   hash_tdigest
+   hash_variance
+
+Arithmetic Functions
+--------------------
+
+By default these functions do not detect overflow. Most functions are also
+available in an overflow-checking variant, suffixed ``_checked``, which
+throws an ``ArrowInvalid`` exception when overflow is detected.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   abs
+   abs_checked
+   add
+   add_checked
+   divide
+   divide_checked
+   multiply
+   multiply_checked
+   negate
+   negate_checked
+   power
+   power_checked
+   sign
+   subtract
+   subtract_checked
+
+Bit-wise Functions
+------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   bit_wise_and
+   bit_wise_not
+   bit_wise_or
+   bit_wise_xor
+   shift_left
+   shift_left_checked
+   shift_right
+   shift_right_checked
+
+Rounding Functions
+------------------
+
+Rounding functions displace numeric inputs to an approximate value with a simpler
+representation based on the rounding criterion.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ceil
+   floor
+   round
+   round_to_multiple
+   trunc
+
+Logarithmic Functions
+---------------------
+
+Logarithmic functions are also supported, and also offer ``_checked``
+variants which detect domain errors.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ln
+   ln_checked
+   log10
+   log10_checked
+   log1p
+   log1p_checked
+   log2
+   log2_checked
+   logb
+   logb_checked
+
+Trigonometric Functions
+-----------------------
+
+Trigonometric functions are also supported, and also offer ``_checked``
+variants which detect domain errors where appropriate.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   acos
+   acos_checked
+   asin
+   asin_checked
+   atan
+   atan2
+   cos
+   cos_checked
+   sin
+   sin_checked
+   tan
+   tan_checked
+
+Comparisons
+-----------
+
+These functions expect two inputs of the same type. If one of the inputs is `null`
+they return ``null``.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   equal
+   greater
+   greater_equal
+   less
+   less_equal
+   not_equal
+
+These functions take any number of arguments of a numeric or temporal type.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   max_element_wise
+   min_element_wise
+
+Logical Functions
+-----------------
+
+These functions normally emit a null when one of the inputs is null. However, Kleene
+logic variants are provided (suffixed ``_kleene``). See User Guide for details.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   and_
+   and_kleene
+   and_not
+   and_not_kleene
+   invert
+   or_
+   or_kleene
+   xor
+
+String Predicates
+-----------------
+
+In these functions an empty string emits false in the output. For ASCII
+variants (prefixed ``ascii_``) a string element with non-ASCII characters
+emits false in the output.
+
+The first set of functions emit true if the input contains only
+characters of a given class.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ascii_is_alnum
+   ascii_is_alpha
+   ascii_is_decimal
+   ascii_is_lower
+   ascii_is_printable
+   ascii_is_space
+   ascii_is_upper
+   utf8_is_alnum
+   utf8_is_alpha
+   utf8_is_decimal
+   utf8_is_digit
+   utf8_is_lower
+   utf8_is_numeric
+   utf8_is_printable
+   utf8_is_space
+   utf8_is_upper
+
+The second set of functions also consider the order of characters
+in the string element.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ascii_is_title
+   utf8_is_title
+
+The third set of functions examines string elements on
+a byte-by-byte basis.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   string_is_ascii
+
+String Transforms
+-----------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ascii_capitalize
+   ascii_lower
+   ascii_reverse
+   ascii_swapcase
+   ascii_title
+   ascii_upper
+   binary_length
+   binary_replace_slice
+   replace_substring
+   replace_substring_regex
+   utf8_capitalize
+   utf8_length
+   utf8_lower
+   utf8_replace_slice
+   utf8_reverse
+   utf8_swapcase
+   utf8_title
+   utf8_upper
+
+String Padding
+--------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ascii_center
+   ascii_lpad
+   ascii_rpad
+   utf8_center
+   utf8_lpad
+   utf8_rpad
+
+String Trimming
+---------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ascii_ltrim
+   ascii_ltrim_whitespace
+   ascii_rtrim
+   ascii_rtrim_whitespace
+   ascii_trim
+   ascii_trim_whitespace
+   utf8_ltrim
+   utf8_ltrim_whitespace
+   utf8_rtrim
+   utf8_rtrim_whitespace
+   utf8_trim
+   utf8_trim_whitespace
+
+String Splitting
+----------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ascii_split_whitespace
+   split_pattern
+   split_pattern_regex
+   utf8_split_whitespace
+
+String Component Extraction
+---------------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   extract_regex
+
+String Joining
+--------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   binary_join
+   binary_join_element_wise
+
+String Slicing
+--------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   utf8_slice_codeunits
+
+Containment Tests
+-----------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   count_substring
+   count_substring_regex
+   ends_with
+   find_substring
+   find_substring_regex
+   index_in
+   is_in
+   match_like
+   match_substring
+   match_substring_regex
+   starts_with
+
+Categorizations
+---------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   is_finite
+   is_inf
+   is_nan
+   is_null
+   is_valid
+
+Selecting / Multiplexing
+------------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   case_when
+   choose
+   coalesce
+   if_else
+
+Conversions
+-----------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   cast
+   strftime
+   strptime
+
+Temporal Component Extraction
+-----------------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   day
+   day_of_week
+   day_of_year
+   hour
+   iso_week
+   iso_year
+   iso_calendar
+   microsecond
+   millisecond
+   minute
+   month
+   nanosecond
+   quarter
+   second
+   subsecond
+   us_week
+   week
+   year
+
+Temporal Difference
+-------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   day_time_interval_between
+   days_between
+   hours_between
+   microseconds_between
+   milliseconds_between
+   minutes_between
+   month_day_nano_interval_between
+   month_interval_between
+   nanoseconds_between
+   quarters_between
+   seconds_between
+   weeks_between
+   years_between
+
+Timezone Handling
+-----------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   assume_timezone
+
+Associative Transforms
+----------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   dictionary_encode
+   unique
+   value_counts
+
+Selections
+----------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   array_filter
+   array_take
+   drop_null
+   filter
+   take
+
+Sorts and Partitions
+--------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   array_sort_indices
+   partition_nth_indices
+   select_k_unstable
+   sort_indices
+
+Structural Transforms
+---------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   list_element
+   list_flatten
+   list_parent_indices
+   list_value_length
+   make_struct
+   replace_with_mask
diff --git a/src/arrow/docs/source/python/api/cuda.rst b/src/arrow/docs/source/python/api/cuda.rst
new file mode 100644
index 000000000..364f03240
--- /dev/null
+++ b/src/arrow/docs/source/python/api/cuda.rst
@@ -0,0 +1,62 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.cuda
+
+CUDA Integration
+================
+
+.. ifconfig:: not cuda_enabled
+
+   .. error::
+      This documentation was built without CUDA enabled.  The CUDA
+      API docs are not available.
+
+.. NOTE We still generate those API docs (with empty docstrings)
+.. when CUDA is disabled and `pyarrow.cuda` mocked (see conf.py).
+.. Otherwise we'd get autodoc warnings, see https://github.com/sphinx-doc/sphinx/issues/4770
+
+CUDA Contexts
+-------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   Context
+
+CUDA Buffers
+------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   CudaBuffer
+   new_host_buffer
+   HostBuffer
+   BufferReader
+   BufferWriter
+
+Serialization and IPC
+---------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   serialize_record_batch
+   read_record_batch
+   read_message
+   IpcMemHandle
diff --git a/src/arrow/docs/source/python/api/dataset.rst b/src/arrow/docs/source/python/api/dataset.rst
new file mode 100644
index 000000000..9718006ab
--- /dev/null
+++ b/src/arrow/docs/source/python/api/dataset.rst
@@ -0,0 +1,64 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.dataset
+
+.. _api.dataset:
+
+Dataset
+=======
+
+.. warning::
+
+    The ``pyarrow.dataset`` module is experimental (specifically the classes),
+    and a stable API is not yet guaranteed.
+
+Factory functions
+-----------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   dataset
+   parquet_dataset
+   partitioning
+   field
+   scalar
+   write_dataset
+
+Classes
+-------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   FileFormat
+   ParquetFileFormat
+   ORCFileFormat
+   IpcFileFormat
+   CsvFileFormat
+   Partitioning
+   PartitioningFactory
+   DirectoryPartitioning
+   HivePartitioning
+   Dataset
+   FileSystemDataset
+   FileSystemFactoryOptions
+   FileSystemDatasetFactory
+   UnionDataset
+   Scanner
+   Expression
diff --git a/src/arrow/docs/source/python/api/datatypes.rst b/src/arrow/docs/source/python/api/datatypes.rst
new file mode 100644
index 000000000..48a254a00
--- /dev/null
+++ b/src/arrow/docs/source/python/api/datatypes.rst
@@ -0,0 +1,165 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _api.types:
+.. currentmodule:: pyarrow
+
+Data Types and Schemas
+======================
+
+Factory Functions
+-----------------
+
+These should be used to create Arrow data types and schemas.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   null
+   bool_
+   int8
+   int16
+   int32
+   int64
+   uint8
+   uint16
+   uint32
+   uint64
+   float16
+   float32
+   float64
+   time32
+   time64
+   timestamp
+   date32
+   date64
+   duration
+   month_day_nano_interval
+   binary
+   string
+   utf8
+   large_binary
+   large_string
+   large_utf8
+   decimal128
+   list_
+   large_list
+   map_
+   struct
+   dictionary
+   field
+   schema
+   from_numpy_dtype
+
+Utility Functions
+-----------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   unify_schemas
+
+.. _api.type_classes:
+.. currentmodule:: pyarrow
+
+Type Classes
+------------
+
+Do not instantiate these classes directly.  Instead, call one of the factory
+functions above.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   DataType
+   DictionaryType
+   ListType
+   MapType
+   StructType
+   UnionType
+   TimestampType
+   Time32Type
+   Time64Type
+   FixedSizeBinaryType
+   Decimal128Type
+   Field
+   Schema
+
+Specific classes and functions for extension types.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ExtensionType
+   PyExtensionType
+   register_extension_type
+   unregister_extension_type
+
+
+.. _api.types.checking:
+.. currentmodule:: pyarrow.types
+
+Type Checking
+-------------
+
+These functions are predicates to check whether a :class:`DataType` instance
+represents a given data type (such as ``int32``) or general category
+(such as "is a signed integer").
+
+.. autosummary::
+   :toctree: ../generated/
+
+   is_boolean
+   is_integer
+   is_signed_integer
+   is_unsigned_integer
+   is_int8
+   is_int16
+   is_int32
+   is_int64
+   is_uint8
+   is_uint16
+   is_uint32
+   is_uint64
+   is_floating
+   is_float16
+   is_float32
+   is_float64
+   is_decimal
+   is_list
+   is_large_list
+   is_struct
+   is_union
+   is_nested
+   is_temporal
+   is_timestamp
+   is_date
+   is_date32
+   is_date64
+   is_time
+   is_time32
+   is_time64
+   is_null
+   is_binary
+   is_unicode
+   is_string
+   is_large_binary
+   is_large_unicode
+   is_large_string
+   is_fixed_size_binary
+   is_map
+   is_dictionary
diff --git a/src/arrow/docs/source/python/api/files.rst b/src/arrow/docs/source/python/api/files.rst
new file mode 100644
index 000000000..106dfde8a
--- /dev/null
+++ b/src/arrow/docs/source/python/api/files.rst
@@ -0,0 +1,65 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+Streams and File Access
+=======================
+
+.. _api.io:
+
+Factory Functions
+-----------------
+
+These factory functions are the recommended way to create a Arrow stream.
+They accept various kinds of sources, such as in-memory buffers or on-disk files.
+
+.. autosummary::
+   :toctree: ../generated/
+
+   input_stream
+   output_stream
+   memory_map
+   create_memory_map
+
+Stream Classes
+--------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   NativeFile
+   OSFile
+   PythonFile
+   BufferReader
+   BufferOutputStream
+   FixedSizeBufferWriter
+   MemoryMappedFile
+   CompressedInputStream
+   CompressedOutputStream
+
+File Systems
+------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   hdfs.connect
+   LocalFileSystem
+
+.. class:: HadoopFileSystem
+   :noindex:
diff --git a/src/arrow/docs/source/python/api/filesystems.rst b/src/arrow/docs/source/python/api/filesystems.rst
new file mode 100644
index 000000000..3e2ac29ee
--- /dev/null
+++ b/src/arrow/docs/source/python/api/filesystems.rst
@@ -0,0 +1,53 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.fs
+
+Filesystems
+===========
+
+.. _api.fs:
+
+Interface
+---------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   FileInfo
+   FileSelector
+   FileSystem
+
+Concrete Subclasses
+-------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   LocalFileSystem
+   S3FileSystem
+   HadoopFileSystem
+   SubTreeFileSystem
+
+To define filesystems with behavior implemented in Python:
+
+.. autosummary::
+   :toctree: ../generated/
+
+   PyFileSystem
+   FileSystemHandler
+   FSSpecHandler
diff --git a/src/arrow/docs/source/python/api/flight.rst b/src/arrow/docs/source/python/api/flight.rst
new file mode 100644
index 000000000..0cfbb6b4b
--- /dev/null
+++ b/src/arrow/docs/source/python/api/flight.rst
@@ -0,0 +1,91 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.flight
+
+Arrow Flight
+============
+
+.. ifconfig:: not flight_enabled
+
+   .. error::
+      This documentation was built without Flight enabled.  The Flight
+      API docs are not available.
+
+.. NOTE We still generate those API docs (with empty docstrings)
+.. when Flight is disabled and `pyarrow.flight` mocked (see conf.py).
+.. Otherwise we'd get autodoc warnings, see https://github.com/sphinx-doc/sphinx/issues/4770
+
+.. warning:: Flight is currently unstable. APIs are subject to change,
+             though we don't expect drastic changes.
+
+Common Types
+------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+    Action
+    ActionType
+    DescriptorType
+    FlightDescriptor
+    FlightEndpoint
+    FlightInfo
+    Location
+    Ticket
+    Result
+
+Flight Client
+-------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+    FlightCallOptions
+    FlightClient
+    ClientMiddlewareFactory
+    ClientMiddleware
+
+Flight Server
+-------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+    FlightServerBase
+    GeneratorStream
+    RecordBatchStream
+    ServerMiddlewareFactory
+    ServerMiddleware
+
+Authentication
+--------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+    ClientAuthHandler
+    ServerAuthHandler
+
+Middleware
+----------
+
+.. autosummary::
+   :toctree: ../generated/
+
+    FlightMethod
+    CallInfo
diff --git a/src/arrow/docs/source/python/api/formats.rst b/src/arrow/docs/source/python/api/formats.rst
new file mode 100644
index 000000000..fdc28040a
--- /dev/null
+++ b/src/arrow/docs/source/python/api/formats.rst
@@ -0,0 +1,101 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Tabular File Formats
+====================
+
+.. _api.csv:
+
+CSV Files
+---------
+
+.. currentmodule:: pyarrow.csv
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ConvertOptions
+   CSVStreamingReader
+   CSVWriter
+   ISO8601
+   ParseOptions
+   ReadOptions
+   WriteOptions
+   open_csv
+   read_csv
+   write_csv
+
+.. _api.feather:
+
+Feather Files
+-------------
+
+.. currentmodule:: pyarrow.feather
+
+.. autosummary::
+   :toctree: ../generated/
+
+   read_feather
+   read_table
+   write_feather
+
+.. _api.json:
+
+JSON Files
+----------
+
+.. currentmodule:: pyarrow.json
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ReadOptions
+   ParseOptions
+   read_json
+
+.. _api.parquet:
+
+Parquet Files
+-------------
+
+.. currentmodule:: pyarrow.parquet
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ParquetDataset
+   ParquetFile
+   ParquetWriter
+   read_table
+   read_metadata
+   read_pandas
+   read_schema
+   write_metadata
+   write_table
+   write_to_dataset
+
+.. _api.orc:
+
+ORC Files
+---------
+
+.. currentmodule:: pyarrow.orc
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ORCFile
diff --git a/src/arrow/docs/source/python/api/ipc.rst b/src/arrow/docs/source/python/api/ipc.rst
new file mode 100644
index 000000000..83ff53de7
--- /dev/null
+++ b/src/arrow/docs/source/python/api/ipc.rst
@@ -0,0 +1,69 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+.. _api.ipc:
+
+Serialization and IPC
+=====================
+
+Inter-Process Communication
+---------------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ipc.new_file
+   ipc.open_file
+   ipc.new_stream
+   ipc.open_stream
+   ipc.read_message
+   ipc.read_record_batch
+   ipc.get_record_batch_size
+   ipc.read_tensor
+   ipc.write_tensor
+   ipc.get_tensor_size
+   ipc.IpcWriteOptions
+   ipc.Message
+   ipc.MessageReader
+   ipc.RecordBatchFileReader
+   ipc.RecordBatchFileWriter
+   ipc.RecordBatchStreamReader
+   ipc.RecordBatchStreamWriter
+
+Serialization
+-------------
+
+.. warning::
+
+   The serialization functionality is deprecated in pyarrow 2.0, and will
+   be removed in a future version. Use the standard library ``pickle`` or
+   the IPC functionality of pyarrow (see :ref:`ipc`).
+
+
+.. autosummary::
+   :toctree: ../generated/
+
+   serialize
+   serialize_to
+   deserialize
+   deserialize_components
+   deserialize_from
+   read_serialized
+   SerializedPyObject
+   SerializationContext
diff --git a/src/arrow/docs/source/python/api/memory.rst b/src/arrow/docs/source/python/api/memory.rst
new file mode 100644
index 000000000..f4382ba23
--- /dev/null
+++ b/src/arrow/docs/source/python/api/memory.rst
@@ -0,0 +1,73 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+.. _api.memory:
+
+Buffers and Memory
+==================
+
+In-Memory Buffers
+-----------------
+
+Factory Functions
+~~~~~~~~~~~~~~~~~
+
+.. autosummary::
+   :toctree: ../generated/
+
+   allocate_buffer
+   py_buffer
+   foreign_buffer
+
+Classes
+~~~~~~~
+
+.. autosummary::
+   :toctree: ../generated/
+
+   Buffer
+   ResizableBuffer
+
+Miscellaneous
+~~~~~~~~~~~~~
+
+.. autosummary::
+   :toctree: ../generated/
+
+   Codec
+   compress
+   decompress
+
+.. _api.memory_pool:
+
+Memory Pools
+------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   MemoryPool
+   default_memory_pool
+   jemalloc_memory_pool
+   mimalloc_memory_pool
+   system_memory_pool
+   jemalloc_set_decay_ms
+   set_memory_pool
+   log_memory_allocations
+   total_allocated_bytes
diff --git a/src/arrow/docs/source/python/api/misc.rst b/src/arrow/docs/source/python/api/misc.rst
new file mode 100644
index 000000000..c13b80620
--- /dev/null
+++ b/src/arrow/docs/source/python/api/misc.rst
@@ -0,0 +1,40 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+Miscellaneous
+=============
+
+Multi-Threading
+---------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   cpu_count
+   set_cpu_count
+
+Using with C extensions
+-----------------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   get_include
+   get_libraries
+   get_library_dirs
diff --git a/src/arrow/docs/source/python/api/plasma.rst b/src/arrow/docs/source/python/api/plasma.rst
new file mode 100644
index 000000000..8df9e4e21
--- /dev/null
+++ b/src/arrow/docs/source/python/api/plasma.rst
@@ -0,0 +1,33 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.plasma
+
+.. _api.plasma:
+
+Plasma In-Memory Object Store
+=============================
+
+Classes
+-------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ObjectID
+   PlasmaClient
+   PlasmaBuffer
diff --git a/src/arrow/docs/source/python/api/tables.rst b/src/arrow/docs/source/python/api/tables.rst
new file mode 100644
index 000000000..6e7a3b6e1
--- /dev/null
+++ b/src/arrow/docs/source/python/api/tables.rst
@@ -0,0 +1,55 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+.. _api.table:
+
+Tables and Tensors
+==================
+
+Factory Functions
+-----------------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   chunked_array
+   concat_arrays
+   concat_tables
+   record_batch
+   table
+
+Classes
+-------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   ChunkedArray
+   RecordBatch
+   Table
+
+.. _api.tensor:
+
+Tensors
+-------
+
+.. autosummary::
+   :toctree: ../generated/
+
+   Tensor
diff --git a/src/arrow/docs/source/python/benchmarks.rst b/src/arrow/docs/source/python/benchmarks.rst
new file mode 100644
index 000000000..aee83b778
--- /dev/null
+++ b/src/arrow/docs/source/python/benchmarks.rst
@@ -0,0 +1,56 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _python-benchmarks:
+
+Benchmarks
+==========
+
+The ``pyarrow`` package comes with a suite of benchmarks meant to
+run with `ASV`_.  You'll need to install the ``asv`` package first
+(``pip install asv`` or ``conda install -c conda-forge asv``).
+
+Running the benchmarks
+----------------------
+
+To run the benchmarks for a locally-built Arrow, run ``asv dev`` or
+``asv run --python=same``.
+
+We use conda environments as part of running the benchmarks. To use the ``asv``
+setup, you must set the ``$CONDA_HOME`` environment variable to point to the
+root of your conda installation.
+
+Running for arbitrary Git revisions
+-----------------------------------
+
+ASV allows to store results and generate graphs of the benchmarks over
+the project's evolution.  You need to have the latest development version of ASV:
+
+.. code::
+
+    pip install git+https://github.com/airspeed-velocity/asv
+
+Now you should be ready to run ``asv run`` or whatever other command
+suits your needs.  Note that this can be quite long, as each Arrow needs
+to be rebuilt for each Git revision you're running the benchmarks for.
+
+Compatibility
+-------------
+
+We only expect the benchmarking setup to work on a Unix-like system with bash.
+
+.. _asv: https://asv.readthedocs.org/
diff --git a/src/arrow/docs/source/python/compute.rst b/src/arrow/docs/source/python/compute.rst
new file mode 100644
index 000000000..133520de9
--- /dev/null
+++ b/src/arrow/docs/source/python/compute.rst
@@ -0,0 +1,69 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.compute
+.. _compute:
+
+=================
+Compute Functions
+=================
+
+Arrow supports logical compute operations over inputs of possibly
+varying types.  Many compute functions support both array (chunked or not)
+and scalar inputs, but some will mandate either.  For example,
+``sort_indices`` requires its first and only input to be an array.
+
+Below are a few simple examples:
+
+   >>> import pyarrow as pa
+   >>> import pyarrow.compute as pc
+   >>> a = pa.array([1, 1, 2, 3])
+   >>> pc.sum(a)
+   <pyarrow.Int64Scalar: 7>
+   >>> b = pa.array([4, 1, 2, 8])
+   >>> pc.equal(a, b)
+   <pyarrow.lib.BooleanArray object at 0x7f686e4eef30>
+   [
+     false,
+     true,
+     true,
+     false
+   ]
+   >>> x, y = pa.scalar(7.8), pa.scalar(9.3)
+   >>> pc.multiply(x, y)
+   <pyarrow.DoubleScalar: 72.54>
+
+These functions can do more than just element-by-element operations.
+Here is an example of sorting a table:
+
+    >>> import pyarrow as pa
+    >>> import pyarrow.compute as pc
+    >>> t = pa.table({'x':[1,2,3],'y':[3,2,1]})
+    >>> i = pc.sort_indices(t, sort_keys=[('y', 'ascending')])
+    >>> i
+    <pyarrow.lib.UInt64Array object at 0x7fcee5df75e8>
+    [
+      2,
+      1,
+      0
+    ]
+
+
+
+.. seealso::
+
+   :ref:`Available compute functions (C++ documentation) <compute-function-list>`.
diff --git a/src/arrow/docs/source/python/csv.rst b/src/arrow/docs/source/python/csv.rst
new file mode 100644
index 000000000..1724c63f4
--- /dev/null
+++ b/src/arrow/docs/source/python/csv.rst
@@ -0,0 +1,170 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.csv
+.. _csv:
+
+Reading and Writing CSV files
+=============================
+
+Arrow supports reading and writing columnar data from/to CSV files.
+The features currently offered are the following:
+
+* multi-threaded or single-threaded reading
+* automatic decompression of input files (based on the filename extension,
+  such as ``my_data.csv.gz``)
+* fetching column names from the first row in the CSV file
+* column-wise type inference and conversion to one of ``null``, ``int64``,
+  ``float64``, ``date32``, ``time32[s]``, ``timestamp[s]``, ``timestamp[ns]``,
+  ``string`` or ``binary`` data
+* opportunistic dictionary encoding of ``string`` and ``binary`` columns
+  (disabled by default)
+* detecting various spellings of null values such as ``NaN`` or ``#N/A``
+* writing CSV files with options to configure the exact output format
+
+Usage
+-----
+
+CSV reading and writing functionality is available through the
+:mod:`pyarrow.csv` module.  In many cases, you will simply call the
+:func:`read_csv` function with the file path you want to read from::
+
+   >>> from pyarrow import csv
+   >>> fn = 'tips.csv.gz'
+   >>> table = csv.read_csv(fn)
+   >>> table
+   pyarrow.Table
+   total_bill: double
+   tip: double
+   sex: string
+   smoker: string
+   day: string
+   time: string
+   size: int64
+   >>> len(table)
+   244
+   >>> df = table.to_pandas()
+   >>> df.head()
+      total_bill   tip     sex smoker  day    time  size
+   0       16.99  1.01  Female     No  Sun  Dinner     2
+   1       10.34  1.66    Male     No  Sun  Dinner     3
+   2       21.01  3.50    Male     No  Sun  Dinner     3
+   3       23.68  3.31    Male     No  Sun  Dinner     2
+   4       24.59  3.61  Female     No  Sun  Dinner     4
+
+To write CSV files, just call :func:`write_csv` with a
+:class:`pyarrow.RecordBatch` or :class:`pyarrow.Table` and a path or
+file-like object::
+
+  >>> import pyarrow as pa
+  >>> import pyarrow.csv as csv
+  >>> csv.write_csv(table, "tips.csv")
+  >>> with pa.CompressedOutputStream("tips.csv.gz", "gzip") as out:
+  ...     csv.write_csv(table, out)
+
+.. note:: The writer does not yet support all Arrow types.
+
+Customized parsing
+------------------
+
+To alter the default parsing settings in case of reading CSV files with an
+unusual structure, you should create a :class:`ParseOptions` instance
+and pass it to :func:`read_csv`.
+
+Customized conversion
+---------------------
+
+To alter how CSV data is converted to Arrow types and data, you should create
+a :class:`ConvertOptions` instance and pass it to :func:`read_csv`::
+
+   import pyarrow as pa
+   import pyarrow.csv as csv
+
+   table = csv.read_csv('tips.csv.gz', convert_options=pa.csv.ConvertOptions(
+       column_types={
+           'total_bill': pa.decimal128(precision=10, scale=2),
+           'tip': pa.decimal128(precision=10, scale=2),
+       }
+   ))
+
+
+Incremental reading
+-------------------
+
+For memory-constrained environments, it is also possible to read a CSV file
+one batch at a time, using :func:`open_csv`.
+
+There are a few caveats:
+
+1. For now, the incremental reader is always single-threaded (regardless of
+   :attr:`ReadOptions.use_threads`)
+
+2. Type inference is done on the first block and types are frozen afterwards;
+   to make sure the right data types are inferred, either set
+   :attr:`ReadOptions.block_size` to a large enough value, or use
+   :attr:`ConvertOptions.column_types` to set the desired data types explicitly.
+
+Character encoding
+------------------
+
+By default, CSV files are expected to be encoded in UTF8.  Non-UTF8 data
+is accepted for ``binary`` columns.  The encoding can be changed using
+the :class:`ReadOptions` class.
+
+Customized writing
+------------------
+
+To alter the default write settings in case of writing CSV files with
+different conventions, you can create a :class:`WriteOptions` instance and
+pass it to :func:`write_csv`::
+
+  >>> import pyarrow as pa
+  >>> import pyarrow.csv as csv
+  >>> # Omit the header row (include_header=True is the default)
+  >>> options = csv.WriteOptions(include_header=False)
+  >>> csv.write_csv(table, "data.csv", options)
+
+Incremental writing
+-------------------
+
+To write CSV files one batch at a time, create a :class:`CSVWriter`. This
+requires the output (a path or file-like object), the schema of the data to
+be written, and optionally write options as described above::
+
+  >>> import pyarrow as pa
+  >>> import pyarrow.csv as csv
+  >>> with csv.CSVWriter("data.csv", table.schema) as writer:
+  >>>     writer.write_table(table)
+
+Performance
+-----------
+
+Due to the structure of CSV files, one cannot expect the same levels of
+performance as when reading dedicated binary formats like
+:ref:`Parquet <Parquet>`.  Nevertheless, Arrow strives to reduce the
+overhead of reading CSV files.  A reasonable expectation is at least
+100 MB/s per core on a performant desktop or laptop computer (measured
+in source CSV bytes, not target Arrow data bytes).
+
+Performance options can be controlled through the :class:`ReadOptions` class.
+Multi-threaded reading is the default for highest performance, distributing
+the workload efficiently over all available cores.
+
+.. note::
+   The number of concurrent threads is automatically inferred by Arrow.
+   You can inspect and change it using the :func:`~pyarrow.cpu_count()`
+   and :func:`~pyarrow.set_cpu_count()` functions, respectively.
diff --git a/src/arrow/docs/source/python/cuda.rst b/src/arrow/docs/source/python/cuda.rst
new file mode 100644
index 000000000..b0150c1c5
--- /dev/null
+++ b/src/arrow/docs/source/python/cuda.rst
@@ -0,0 +1,159 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.cuda
+
+CUDA Integration
+================
+
+Arrow is not limited to CPU buffers (located in the computer's main memory,
+also named "host memory").  It also has provisions for accessing buffers
+located on a CUDA-capable GPU device (in "device memory").
+
+.. note::
+   This functionality is optional and must have been enabled at build time.
+   If this is not done by your package manager, you might have to build Arrow
+   yourself.
+
+CUDA Contexts
+-------------
+
+A CUDA context represents access to a particular CUDA-capable device.
+For example, this is creating a CUDA context accessing CUDA device number 0::
+
+   >>> from pyarrow import cuda
+   >>> ctx = cuda.Context(0)
+   >>>
+
+CUDA Buffers
+------------
+
+A CUDA buffer can be created by copying data from host memory to the memory
+of a CUDA device, using the :meth:`Context.buffer_from_data` method.
+The source data can be any Python buffer-like object, including Arrow buffers::
+
+   >>> import numpy as np
+   >>> arr = np.arange(4, dtype=np.int32)
+   >>> arr.nbytes
+   16
+   >>> cuda_buf = ctx.buffer_from_data(arr)
+   >>> type(cuda_buf)
+   pyarrow._cuda.CudaBuffer
+   >>> cuda_buf.size     # The buffer's size in bytes
+   16
+   >>> cuda_buf.address  # The buffer's address in device memory
+   30088364544
+   >>> cuda_buf.context.device_number
+   0
+
+Conversely, you can copy back a CUDA buffer to device memory, getting a regular
+CPU buffer::
+
+   >>> buf = cuda_buf.copy_to_host()
+   >>> type(buf)
+   pyarrow.lib.Buffer
+   >>> np.frombuffer(buf, dtype=np.int32)
+   array([0, 1, 2, 3], dtype=int32)
+
+.. warning::
+   Many Arrow functions expect a CPU buffer but will not check the buffer's
+   actual type.  You will get a crash if you pass a CUDA buffer to such a
+   function::
+
+      >>> pa.py_buffer(b"x" * 16).equals(cuda_buf)
+      Segmentation fault
+
+Numba Integration
+-----------------
+
+There is not much you can do directly with Arrow CUDA buffers from Python,
+but they support interoperation with `Numba <https://numba.pydata.org/>`_,
+a JIT compiler which can turn Python code into optimized CUDA kernels.
+
+Arrow to Numba
+~~~~~~~~~~~~~~
+
+First let's define a Numba CUDA kernel operating on an ``int32`` array.  Here,
+we will simply increment each array element (assuming the array is writable)::
+
+   import numba.cuda
+
+   @numba.cuda.jit
+   def increment_by_one(an_array):
+       pos = numba.cuda.grid(1)
+       if pos < an_array.size:
+           an_array[pos] += 1
+
+Then we need to wrap our CUDA buffer into a Numba "device array" with the right
+array metadata (shape, strides and datatype).  This is necessary so that Numba
+can identify the array's characteristics and compile the kernel with the
+appropriate type declarations.
+
+In this case the metadata can simply be got from the original Numpy array.
+Note the GPU data isn't copied, just pointed to::
+
+   >>> from numba.cuda.cudadrv.devicearray import DeviceNDArray
+   >>> device_arr = DeviceNDArray(arr.shape, arr.strides, arr.dtype, gpu_data=cuda_buf.to_numba())
+
+(ideally we could have defined an Arrow array in CPU memory, copied it to CUDA
+memory without losing type information, and then invoked the Numba kernel on it
+without constructing the DeviceNDArray by hand; this is not yet possible)
+
+Finally we can run the Numba CUDA kernel on the Numba device array (here
+with a 16x16 grid size)::
+
+   >>> increment_by_one[16, 16](device_arr)
+
+And the results can be checked by copying back the CUDA buffer to CPU memory::
+
+   >>> np.frombuffer(cuda_buf.copy_to_host(), dtype=np.int32)
+   array([1, 2, 3, 4], dtype=int32)
+
+Numba to Arrow
+~~~~~~~~~~~~~~
+
+Conversely, a Numba-created device array can be viewed as an Arrow CUDA buffer,
+using the :meth:`CudaBuffer.from_numba` factory method.
+
+For the sake of example, let's first create a Numba device array::
+
+   >>> arr = np.arange(10, 14, dtype=np.int32)
+   >>> arr
+   array([10, 11, 12, 13], dtype=int32)
+   >>> device_arr = numba.cuda.to_device(arr)
+
+Then we can create a CUDA buffer pointing the device array's memory.
+We don't need to pass a CUDA context explicitly this time: the appropriate
+CUDA context is automatically retrieved and adapted from the Numba object.
+
+::
+
+   >>> cuda_buf = cuda.CudaBuffer.from_numba(device_arr.gpu_data)
+   >>> cuda_buf.size
+   16
+   >>> cuda_buf.address
+   30088364032
+   >>> cuda_buf.context.device_number
+   0
+
+Of course, we can copy the CUDA buffer back to host memory::
+
+   >>> np.frombuffer(cuda_buf.copy_to_host(), dtype=np.int32)
+   array([10, 11, 12, 13], dtype=int32)
+
+.. seealso::
+   Documentation for Numba's `CUDA support <https://numba.pydata.org/numba-doc/latest/cuda/index.html>`_.
diff --git a/src/arrow/docs/source/python/data.rst b/src/arrow/docs/source/python/data.rst
new file mode 100644
index 000000000..b8a90039f
--- /dev/null
+++ b/src/arrow/docs/source/python/data.rst
@@ -0,0 +1,434 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+.. _data:
+
+Data Types and In-Memory Data Model
+===================================
+
+Apache Arrow defines columnar array data structures by composing type metadata
+with memory buffers, like the ones explained in the documentation on
+:ref:`Memory and IO <io>`. These data structures are exposed in Python through
+a series of interrelated classes:
+
+* **Type Metadata**: Instances of ``pyarrow.DataType``, which describe a logical
+  array type
+* **Schemas**: Instances of ``pyarrow.Schema``, which describe a named
+  collection of types. These can be thought of as the column types in a
+  table-like object.
+* **Arrays**: Instances of ``pyarrow.Array``, which are atomic, contiguous
+  columnar data structures composed from Arrow Buffer objects
+* **Record Batches**: Instances of ``pyarrow.RecordBatch``, which are a
+  collection of Array objects with a particular Schema
+* **Tables**: Instances of ``pyarrow.Table``, a logical table data structure in
+  which each column consists of one or more ``pyarrow.Array`` objects of the
+  same type.
+
+We will examine these in the sections below in a series of examples.
+
+.. _data.types:
+
+Type Metadata
+-------------
+
+Apache Arrow defines language agnostic column-oriented data structures for
+array data. These include:
+
+* **Fixed-length primitive types**: numbers, booleans, date and times, fixed
+  size binary, decimals, and other values that fit into a given number
+* **Variable-length primitive types**: binary, string
+* **Nested types**: list, struct, and union
+* **Dictionary type**: An encoded categorical type (more on this later)
+
+Each logical data type in Arrow has a corresponding factory function for
+creating an instance of that type object in Python:
+
+.. ipython:: python
+
+   import pyarrow as pa
+   t1 = pa.int32()
+   t2 = pa.string()
+   t3 = pa.binary()
+   t4 = pa.binary(10)
+   t5 = pa.timestamp('ms')
+
+   t1
+   print(t1)
+   print(t4)
+   print(t5)
+
+We use the name **logical type** because the **physical** storage may be the
+same for one or more types. For example, ``int64``, ``float64``, and
+``timestamp[ms]`` all occupy 64 bits per value.
+
+These objects are `metadata`; they are used for describing the data in arrays,
+schemas, and record batches. In Python, they can be used in functions where the
+input data (e.g. Python objects) may be coerced to more than one Arrow type.
+
+The :class:`~pyarrow.Field` type is a type plus a name and optional
+user-defined metadata:
+
+.. ipython:: python
+
+   f0 = pa.field('int32_field', t1)
+   f0
+   f0.name
+   f0.type
+
+Arrow supports **nested value types** like list, struct, and union. When
+creating these, you must pass types or fields to indicate the data types of the
+types' children. For example, we can define a list of int32 values with:
+
+.. ipython:: python
+
+   t6 = pa.list_(t1)
+   t6
+
+A `struct` is a collection of named fields:
+
+.. ipython:: python
+
+   fields = [
+       pa.field('s0', t1),
+       pa.field('s1', t2),
+       pa.field('s2', t4),
+       pa.field('s3', t6),
+   ]
+
+   t7 = pa.struct(fields)
+   print(t7)
+
+For convenience, you can pass ``(name, type)`` tuples directly instead of
+:class:`~pyarrow.Field` instances:
+
+.. ipython:: python
+
+   t8 = pa.struct([('s0', t1), ('s1', t2), ('s2', t4), ('s3', t6)])
+   print(t8)
+   t8 == t7
+
+
+See :ref:`Data Types API <api.types>` for a full listing of data type
+functions.
+
+.. _data.schema:
+
+Schemas
+-------
+
+The :class:`~pyarrow.Schema` type is similar to the ``struct`` array type; it
+defines the column names and types in a record batch or table data
+structure. The :func:`pyarrow.schema` factory function makes new Schema objects in
+Python:
+
+.. ipython:: python
+
+   my_schema = pa.schema([('field0', t1),
+                          ('field1', t2),
+                          ('field2', t4),
+                          ('field3', t6)])
+   my_schema
+
+In some applications, you may not create schemas directly, only using the ones
+that are embedded in :ref:`IPC messages <ipc>`.
+
+.. _data.array:
+
+Arrays
+------
+
+For each data type, there is an accompanying array data structure for holding
+memory buffers that define a single contiguous chunk of columnar array
+data. When you are using PyArrow, this data may come from IPC tools, though it
+can also be created from various types of Python sequences (lists, NumPy
+arrays, pandas data).
+
+A simple way to create arrays is with ``pyarrow.array``, which is similar to
+the ``numpy.array`` function.  By default PyArrow will infer the data type
+for you:
+
+.. ipython:: python
+
+   arr = pa.array([1, 2, None, 3])
+   arr
+
+But you may also pass a specific data type to override type inference:
+
+.. ipython:: python
+
+   pa.array([1, 2], type=pa.uint16())
+
+The array's ``type`` attribute is the corresponding piece of type metadata:
+
+.. ipython:: python
+
+   arr.type
+
+Each in-memory array has a known length and null count (which will be 0 if
+there are no null values):
+
+.. ipython:: python
+
+   len(arr)
+   arr.null_count
+
+Scalar values can be selected with normal indexing.  ``pyarrow.array`` converts
+``None`` values to Arrow nulls; we return the special ``pyarrow.NA`` value for
+nulls:
+
+.. ipython:: python
+
+   arr[0]
+   arr[2]
+
+Arrow data is immutable, so values can be selected but not assigned.
+
+Arrays can be sliced without copying:
+
+.. ipython:: python
+
+   arr[1:3]
+
+None values and NAN handling
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As mentioned in the above section, the Python object ``None`` is always
+converted to an Arrow null element on the conversion to ``pyarrow.Array``. For
+the float NaN value which is either represented by the Python object
+``float('nan')`` or ``numpy.nan`` we normally convert it to a *valid* float
+value during the conversion. If an integer input is supplied to
+``pyarrow.array`` that contains ``np.nan``, ``ValueError`` is raised.
+
+To handle better compatibility with Pandas, we support interpreting NaN values as
+null elements. This is enabled automatically on all ``from_pandas`` function and
+can be enable on the other conversion functions by passing ``from_pandas=True``
+as a function parameter.
+
+List arrays
+~~~~~~~~~~~
+
+``pyarrow.array`` is able to infer the type of simple nested data structures
+like lists:
+
+.. ipython:: python
+
+   nested_arr = pa.array([[], None, [1, 2], [None, 1]])
+   print(nested_arr.type)
+
+Struct arrays
+~~~~~~~~~~~~~
+
+For other kinds of nested arrays, such as struct arrays, you currently need
+to pass the type explicitly.  Struct arrays can be initialized from a
+sequence of Python dicts or tuples:
+
+.. ipython:: python
+
+   ty = pa.struct([('x', pa.int8()),
+                   ('y', pa.bool_())])
+   pa.array([{'x': 1, 'y': True}, {'x': 2, 'y': False}], type=ty)
+   pa.array([(3, True), (4, False)], type=ty)
+
+When initializing a struct array, nulls are allowed both at the struct
+level and at the individual field level.  If initializing from a sequence
+of Python dicts, a missing dict key is handled as a null value:
+
+.. ipython:: python
+
+   pa.array([{'x': 1}, None, {'y': None}], type=ty)
+
+You can also construct a struct array from existing arrays for each of the
+struct's components.  In this case, data storage will be shared with the
+individual arrays, and no copy is involved:
+
+.. ipython:: python
+
+   xs = pa.array([5, 6, 7], type=pa.int16())
+   ys = pa.array([False, True, True])
+   arr = pa.StructArray.from_arrays((xs, ys), names=('x', 'y'))
+   arr.type
+   arr
+
+Union arrays
+~~~~~~~~~~~~
+
+The union type represents a nested array type where each value can be one
+(and only one) of a set of possible types.  There are two possible
+storage types for union arrays: sparse and dense.
+
+In a sparse union array, each of the child arrays has the same length
+as the resulting union array.  They are adjuncted with a ``int8`` "types"
+array that tells, for each value, from which child array it must be
+selected:
+
+.. ipython:: python
+
+   xs = pa.array([5, 6, 7])
+   ys = pa.array([False, False, True])
+   types = pa.array([0, 1, 1], type=pa.int8())
+   union_arr = pa.UnionArray.from_sparse(types, [xs, ys])
+   union_arr.type
+   union_arr
+
+In a dense union array, you also pass, in addition to the ``int8`` "types"
+array, a ``int32`` "offsets" array that tells, for each value, at
+each offset in the selected child array it can be found:
+
+.. ipython:: python
+
+   xs = pa.array([5, 6, 7])
+   ys = pa.array([False, True])
+   types = pa.array([0, 1, 1, 0, 0], type=pa.int8())
+   offsets = pa.array([0, 0, 1, 1, 2], type=pa.int32())
+   union_arr = pa.UnionArray.from_dense(types, offsets, [xs, ys])
+   union_arr.type
+   union_arr
+
+.. _data.dictionary:
+
+Dictionary Arrays
+~~~~~~~~~~~~~~~~~
+
+The **Dictionary** type in PyArrow is a special array type that is similar to a
+factor in R or a ``pandas.Categorical``. It enables one or more record batches
+in a file or stream to transmit integer *indices* referencing a shared
+**dictionary** containing the distinct values in the logical array. This is
+particularly often used with strings to save memory and improve performance.
+
+The way that dictionaries are handled in the Apache Arrow format and the way
+they appear in C++ and Python is slightly different. We define a special
+:class:`~.DictionaryArray` type with a corresponding dictionary type. Let's
+consider an example:
+
+.. ipython:: python
+
+   indices = pa.array([0, 1, 0, 1, 2, 0, None, 2])
+   dictionary = pa.array(['foo', 'bar', 'baz'])
+
+   dict_array = pa.DictionaryArray.from_arrays(indices, dictionary)
+   dict_array
+
+Here we have:
+
+.. ipython:: python
+
+   print(dict_array.type)
+   dict_array.indices
+   dict_array.dictionary
+
+When using :class:`~.DictionaryArray` with pandas, the analogue is
+``pandas.Categorical`` (more on this later):
+
+.. ipython:: python
+
+   dict_array.to_pandas()
+
+.. _data.record_batch:
+
+Record Batches
+--------------
+
+A **Record Batch** in Apache Arrow is a collection of equal-length array
+instances. Let's consider a collection of arrays:
+
+.. ipython:: python
+
+   data = [
+       pa.array([1, 2, 3, 4]),
+       pa.array(['foo', 'bar', 'baz', None]),
+       pa.array([True, None, False, True])
+   ]
+
+A record batch can be created from this list of arrays using
+``RecordBatch.from_arrays``:
+
+.. ipython:: python
+
+   batch = pa.RecordBatch.from_arrays(data, ['f0', 'f1', 'f2'])
+   batch.num_columns
+   batch.num_rows
+   batch.schema
+
+   batch[1]
+
+A record batch can be sliced without copying memory like an array:
+
+.. ipython:: python
+
+   batch2 = batch.slice(1, 3)
+   batch2[1]
+
+.. _data.table:
+
+Tables
+------
+
+The PyArrow :class:`~.Table` type is not part of the Apache Arrow
+specification, but is rather a tool to help with wrangling multiple record
+batches and array pieces as a single logical dataset. As a relevant example, we
+may receive multiple small record batches in a socket stream, then need to
+concatenate them into contiguous memory for use in NumPy or pandas. The Table
+object makes this efficient without requiring additional memory copying.
+
+Considering the record batch we created above, we can create a Table containing
+one or more copies of the batch using ``Table.from_batches``:
+
+.. ipython:: python
+
+   batches = [batch] * 5
+   table = pa.Table.from_batches(batches)
+   table
+   table.num_rows
+
+The table's columns are instances of :class:`~.ChunkedArray`, which is a
+container for one or more arrays of the same type.
+
+.. ipython:: python
+
+   c = table[0]
+   c
+   c.num_chunks
+   c.chunk(0)
+
+As you'll see in the :ref:`pandas section <pandas_interop>`, we can convert
+these objects to contiguous NumPy arrays for use in pandas:
+
+.. ipython:: python
+
+   c.to_pandas()
+
+Multiple tables can also be concatenated together to form a single table using
+``pyarrow.concat_tables``, if the schemas are equal:
+
+.. ipython:: python
+
+   tables = [table] * 2
+   table_all = pa.concat_tables(tables)
+   table_all.num_rows
+   c = table_all[0]
+   c.num_chunks
+
+This is similar to ``Table.from_batches``, but uses tables as input instead of
+record batches. Record batches can be made into tables, but not the other way
+around, so if your data is already in table form, then use
+``pyarrow.concat_tables``.
+
+Custom Schema and Field Metadata
+--------------------------------
+
+TODO
diff --git a/src/arrow/docs/source/python/dataset.rst b/src/arrow/docs/source/python/dataset.rst
new file mode 100644
index 000000000..e2d8c900b
--- /dev/null
+++ b/src/arrow/docs/source/python/dataset.rst
@@ -0,0 +1,626 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.dataset
+
+.. _dataset:
+
+Tabular Datasets
+================
+
+.. warning::
+
+    The ``pyarrow.dataset`` module is experimental (specifically the classes),
+    and a stable API is not yet guaranteed.
+
+The ``pyarrow.dataset`` module provides functionality to efficiently work with
+tabular, potentially larger than memory, and multi-file datasets. This includes:
+
+* A unified interface that supports different sources and file formats
+  (Parquet, ORC, Feather / Arrow IPC, and CSV files) and different file systems
+  (local, cloud).
+* Discovery of sources (crawling directories, handle directory-based partitioned
+  datasets, basic schema normalization, ..)
+* Optimized reading with predicate pushdown (filtering rows), projection
+  (selecting and deriving columns), and optionally parallel reading.
+
+Currently, only Parquet, ORC, Feather / Arrow IPC, and CSV files are
+supported. The goal is to expand this in the future to other file formats and
+data sources (e.g. database connections).
+
+For those familiar with the existing :class:`pyarrow.parquet.ParquetDataset` for
+reading Parquet datasets: ``pyarrow.dataset``'s goal is similar but not specific
+to the Parquet format and not tied to Python: the same datasets API is exposed
+in the R bindings or Arrow. In addition ``pyarrow.dataset`` boasts improved
+performance and new features (e.g. filtering within files rather than only on
+partition keys).
+
+
+Reading Datasets
+----------------
+
+.. TODO Full blown example with NYC taxi data to show off, afterwards explain all parts:
+
+For the examples below, let's create a small dataset consisting
+of a directory with two parquet files:
+
+.. ipython:: python
+
+    import tempfile
+    import pathlib
+    import pyarrow as pa
+    import pyarrow.parquet as pq
+    import numpy as np
+
+    base = pathlib.Path(tempfile.gettempdir())
+    (base / "parquet_dataset").mkdir(exist_ok=True)
+
+    # creating an Arrow Table
+    table = pa.table({'a': range(10), 'b': np.random.randn(10), 'c': [1, 2] * 5})
+
+    # writing it into two parquet files
+    pq.write_table(table.slice(0, 5), base / "parquet_dataset/data1.parquet")
+    pq.write_table(table.slice(5, 10), base / "parquet_dataset/data2.parquet")
+
+Dataset discovery
+~~~~~~~~~~~~~~~~~
+
+A :class:`Dataset` object can be created with the :func:`dataset` function. We
+can pass it the path to the directory containing the data files:
+
+.. ipython:: python
+
+    import pyarrow.dataset as ds
+    dataset = ds.dataset(base / "parquet_dataset", format="parquet")
+    dataset
+
+In addition to searching a base directory, :func:`dataset` accepts a path to a
+single file or a list of file paths.
+
+Creating a :class:`Dataset` object does not begin reading the data itself. If
+needed, it only crawls the directory to find all the files:
+
+.. ipython:: python
+
+    dataset.files
+
+... and infers the dataset's schema (by default from the first file):
+
+.. ipython:: python
+
+    print(dataset.schema.to_string(show_field_metadata=False))
+
+Using the :meth:`Dataset.to_table` method we can read the dataset (or a portion
+of it) into a pyarrow Table (note that depending on the size of your dataset
+this can require a lot of memory, see below on filtering / iterative loading):
+
+.. ipython:: python
+
+    dataset.to_table()
+    # converting to pandas to see the contents of the scanned table
+    dataset.to_table().to_pandas()
+
+Reading different file formats
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The above examples use Parquet files as dataset sources but the Dataset API
+provides a consistent interface across multiple file formats and filesystems.
+Currently, Parquet, ORC, Feather / Arrow IPC, and CSV file formats are
+supported; more formats are planned in the future.
+
+If we save the table as Feather files instead of Parquet files:
+
+.. ipython:: python
+
+    import pyarrow.feather as feather
+
+    feather.write_feather(table, base / "data.feather")
+
+…then we can read the Feather file using the same functions, but with specifying
+``format="feather"``:
+
+.. ipython:: python
+
+    dataset = ds.dataset(base / "data.feather", format="feather")
+    dataset.to_table().to_pandas().head()
+
+Customizing file formats
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+The format name as a string, like::
+
+    ds.dataset(..., format="parquet")
+
+is short hand for a default constructed :class:`ParquetFileFormat`::
+
+    ds.dataset(..., format=ds.ParquetFileFormat())
+
+The :class:`FileFormat` objects can be customized using keywords. For example::
+
+    parquet_format = ds.ParquetFileFormat(read_options={'dictionary_columns': ['a']})
+    ds.dataset(..., format=parquet_format)
+
+Will configure column ``"a"`` to be dictionary encoded on scan.
+
+Filtering data
+--------------
+
+To avoid reading all data when only needing a subset, the ``columns`` and
+``filter`` keywords can be used.
+
+The ``columns`` keyword can be used to only read the specified columns:
+
+.. ipython:: python
+
+    dataset = ds.dataset(base / "parquet_dataset", format="parquet")
+    dataset.to_table(columns=['a', 'b']).to_pandas()
+
+With the ``filter`` keyword, rows which do not match the filter predicate will
+not be included in the returned table. The keyword expects a boolean
+:class:`Expression` referencing at least one of the columns:
+
+.. ipython:: python
+
+    dataset.to_table(filter=ds.field('a') >= 7).to_pandas()
+    dataset.to_table(filter=ds.field('c') == 2).to_pandas()
+
+The easiest way to construct those :class:`Expression` objects is by using the
+:func:`field` helper function. Any column - not just partition columns - can be
+referenced using the :func:`field` function (which creates a
+:class:`FieldExpression`). Operator overloads are provided to compose filters
+including the comparisons (equal, larger/less than, etc), set membership
+testing, and boolean combinations (``&``, ``|``, ``~``):
+
+.. ipython:: python
+
+    ds.field('a') != 3
+    ds.field('a').isin([1, 2, 3])
+    (ds.field('a') > ds.field('b')) & (ds.field('b') > 1)
+
+Note that :class:`Expression` objects can **not** be combined by python logical
+operators ``and``, ``or`` and ``not``.
+
+Projecting columns
+------------------
+
+The ``columns`` keyword can be used to read a subset of the columns of the
+dataset by passing it a list of column names. The keyword can also be used
+for more complex projections in combination with expressions.
+
+In this case, we pass it a dictionary with the keys being the resulting
+column names and the values the expression that is used to construct the column
+values:
+
+.. ipython:: python
+
+    projection = {
+        "a_renamed": ds.field("a"),
+        "b_as_float32": ds.field("b").cast("float32"),
+        "c_1": ds.field("c") == 1,
+    }
+    dataset.to_table(columns=projection).to_pandas().head()
+
+The dictionary also determines the column selection (only the keys in the
+dictionary will be present as columns in the resulting table). If you want
+to include a derived column in *addition* to the existing columns, you can
+build up the dictionary from the dataset schema:
+
+.. ipython:: python
+
+    projection = {col: ds.field(col) for col in dataset.schema.names}
+    projection.update({"b_large": ds.field("b") > 1})
+    dataset.to_table(columns=projection).to_pandas().head()
+
+
+Reading partitioned data
+------------------------
+
+Above, a dataset consisting of a flat directory with files was shown. However, a
+dataset can exploit a nested directory structure defining a partitioned dataset,
+where the sub-directory names hold information about which subset of the data is
+stored in that directory.
+
+For example, a dataset partitioned by year and month may look like on disk:
+
+.. code-block:: text
+
+   dataset_name/
+     year=2007/
+       month=01/
+          data0.parquet
+          data1.parquet
+          ...
+       month=02/
+          data0.parquet
+          data1.parquet
+          ...
+       month=03/
+       ...
+     year=2008/
+       month=01/
+       ...
+     ...
+
+The above partitioning scheme is using "/key=value/" directory names, as found
+in Apache Hive.
+
+Let's create a small partitioned dataset. The :func:`~pyarrow.parquet.write_to_dataset`
+function can write such hive-like partitioned datasets.
+
+.. ipython:: python
+
+    table = pa.table({'a': range(10), 'b': np.random.randn(10), 'c': [1, 2] * 5,
+                      'part': ['a'] * 5 + ['b'] * 5})
+    pq.write_to_dataset(table, str(base / "parquet_dataset_partitioned"),
+                        partition_cols=['part'])
+
+The above created a directory with two subdirectories ("part=a" and "part=b"),
+and the Parquet files written in those directories no longer include the "part"
+column.
+
+Reading this dataset with :func:`dataset`, we now specify that the dataset
+should use a hive-like partitioning scheme with the ``partitioning`` keyword:
+
+.. ipython:: python
+
+    dataset = ds.dataset(str(base / "parquet_dataset_partitioned"), format="parquet",
+                         partitioning="hive")
+    dataset.files
+
+Although the partition fields are not included in the actual Parquet files,
+they will be added back to the resulting table when scanning this dataset:
+
+.. ipython:: python
+
+    dataset.to_table().to_pandas().head(3)
+
+We can now filter on the partition keys, which avoids loading files
+altogether if they do not match the filter:
+
+.. ipython:: python
+
+    dataset.to_table(filter=ds.field("part") == "b").to_pandas()
+
+
+Different partitioning schemes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The above example uses a hive-like directory scheme, such as "/year=2009/month=11/day=15".
+We specified this passing the ``partitioning="hive"`` keyword. In this case,
+the types of the partition keys are inferred from the file paths.
+
+It is also possible to explicitly define the schema of the partition keys
+using the :func:`partitioning` function. For example:
+
+.. code-block:: python
+
+    part = ds.partitioning(
+        pa.schema([("year", pa.int16()), ("month", pa.int8()), ("day", pa.int32())]),
+        flavor="hive"
+    )
+    dataset = ds.dataset(..., partitioning=part)
+
+"Directory partitioning" is also supported, where the segments in the file path
+represent the values of the partition keys without including the name (the
+field name are implicit in the segment's index). For example, given field names
+"year", "month", and "day", one path might be "/2019/11/15".
+
+Since the names are not included in the file paths, these must be specified
+when constructing a directory partitioning:
+
+.. code-block:: python
+
+    part = ds.partitioning(field_names=["year", "month", "day"])
+
+Directory partitioning also supports providing a full schema rather than inferring
+types from file paths.
+
+
+Reading from cloud storage
+--------------------------
+
+In addition to local files, pyarrow also supports reading from cloud storage.
+Currently, :class:`HDFS <pyarrow.fs.HadoopFileSystem>` and
+:class:`Amazon S3-compatible storage <pyarrow.fs.S3FileSystem>` are supported.
+
+When passing a file URI, the file system will be inferred. For example,
+specifying a S3 path:
+
+.. code-block:: python
+
+    dataset = ds.dataset("s3://ursa-labs-taxi-data/", partitioning=["year", "month"])
+
+Typically, you will want to customize the connection parameters, and then
+a file system object can be created and passed to the ``filesystem`` keyword:
+
+.. code-block:: python
+
+    from pyarrow import fs
+
+    s3  = fs.S3FileSystem(region="us-east-2")
+    dataset = ds.dataset("ursa-labs-taxi-data/", filesystem=s3,
+                         partitioning=["year", "month"])
+
+The currently available classes are :class:`~pyarrow.fs.S3FileSystem` and
+:class:`~pyarrow.fs.HadoopFileSystem`. See the :ref:`filesystem` docs for more
+details.
+
+
+Reading from Minio
+------------------
+
+In addition to cloud storage, pyarrow also supports reading from a
+`MinIO <https://github.com/minio/minio>`_ object storage instance emulating S3
+APIs. Paired with `toxiproxy <https://github.com/shopify/toxiproxy>`_, this is
+useful for testing or benchmarking.
+
+.. code-block:: python
+
+    from pyarrow import fs
+
+    # By default, MinIO will listen for unencrypted HTTP traffic.
+    minio = fs.S3FileSystem(scheme="http", endpoint="localhost:9000")
+    dataset = ds.dataset("ursa-labs-taxi-data/", filesystem=minio,
+                         partitioning=["year", "month"])
+
+
+Working with Parquet Datasets
+-----------------------------
+
+While the Datasets API provides a unified interface to different file formats,
+some specific methods exist for Parquet Datasets.
+
+Some processing frameworks such as Dask (optionally) use a ``_metadata`` file
+with partitioned datasets which includes information about the schema and the
+row group metadata of the full dataset. Using such a file can give a more
+efficient creation of a parquet Dataset, since it does not need to infer the
+schema and crawl the directories for all Parquet files (this is especially the
+case for filesystems where accessing files is expensive). The
+:func:`parquet_dataset` function allows us to create a Dataset from a partitioned
+dataset with a ``_metadata`` file:
+
+.. code-block:: python
+
+    dataset = ds.parquet_dataset("/path/to/dir/_metadata")
+
+By default, the constructed :class:`Dataset` object for Parquet datasets maps
+each fragment to a single Parquet file. If you want fragments mapping to each
+row group of a Parquet file, you can use the ``split_by_row_group()`` method of
+the fragments:
+
+.. code-block:: python
+
+    fragments = list(dataset.get_fragments())
+    fragments[0].split_by_row_group()
+
+This method returns a list of new Fragments mapping to each row group of
+the original Fragment (Parquet file). Both ``get_fragments()`` and
+``split_by_row_group()`` accept an optional filter expression to get a
+filtered list of fragments.
+
+
+Manual specification of the Dataset
+-----------------------------------
+
+The :func:`dataset` function allows easy creation of a Dataset viewing a directory,
+crawling all subdirectories for files and partitioning information. However
+sometimes discovery is not required and the dataset's files and partitions
+are already known (for example, when this information is stored in metadata).
+In this case it is possible to create a Dataset explicitly without any
+automatic discovery or inference.
+
+For the example here, we are going to use a dataset where the file names contain
+additional partitioning information:
+
+.. ipython:: python
+
+    # creating a dummy dataset: directory with two files
+    table = pa.table({'col1': range(3), 'col2': np.random.randn(3)})
+    (base / "parquet_dataset_manual").mkdir(exist_ok=True)
+    pq.write_table(table, base / "parquet_dataset_manual" / "data_2018.parquet")
+    pq.write_table(table, base / "parquet_dataset_manual" / "data_2019.parquet")
+
+To create a Dataset from a list of files, we need to specify the paths, schema,
+format, filesystem, and partition expressions manually:
+
+.. ipython:: python
+
+    from pyarrow import fs
+
+    schema = pa.schema([("year", pa.int64()), ("col1", pa.int64()), ("col2", pa.float64())])
+
+    dataset = ds.FileSystemDataset.from_paths(
+        ["data_2018.parquet", "data_2019.parquet"], schema=schema, format=ds.ParquetFileFormat(),
+        filesystem=fs.SubTreeFileSystem(str(base / "parquet_dataset_manual"), fs.LocalFileSystem()),
+        partitions=[ds.field('year') == 2018, ds.field('year') == 2019])
+
+Since we specified the "partition expressions" for our files, this information
+is materialized as columns when reading the data and can be used for filtering:
+
+.. ipython:: python
+
+    dataset.to_table().to_pandas()
+    dataset.to_table(filter=ds.field('year') == 2019).to_pandas()
+
+Another benefit of manually listing the files is that the order of the files
+controls the order of the data.  When performing an ordered read (or a read to
+a table) then the rows returned will match the order of the files given.  This
+only applies when the dataset is constructed with a list of files.  There
+are no order guarantees given when the files are instead discovered by scanning
+a directory.
+
+Iterative (out of core or streaming) reads
+------------------------------------------
+
+The previous examples have demonstrated how to read the data into a table using :func:`~Dataset.to_table`.  This is
+useful if the dataset is small or there is only a small amount of data that needs to
+be read.  The dataset API contains additional methods to read and process large amounts
+of data in a streaming fashion.
+
+The easiest way to do this is to use the method :meth:`Dataset.to_batches`.  This
+method returns an iterator of record batches.  For example, we can use this method to
+calculate the average of a column without loading the entire column into memory:
+
+.. ipython:: python
+
+    import pyarrow.compute as pc
+
+    col2_sum = 0
+    count = 0
+    for batch in dataset.to_batches(columns=["col2"], filter=~ds.field("col2").is_null()):
+        col2_sum += pc.sum(batch.column("col2")).as_py()
+        count += batch.num_rows
+    mean_a = col2_sum/count
+
+Customizing the batch size
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+An iterative read of a dataset is often called a "scan" of the dataset and pyarrow
+uses an object called a :class:`Scanner` to do this.  A Scanner is created for you
+automatically by the :func:`~Dataset.to_table` and :func:`~Dataset.to_batches` method of the dataset.
+Any arguments you pass to these methods will be passed on to the Scanner constructor.
+
+One of those parameters is the ``batch_size``.  This controls the maximum size of the
+batches returned by the scanner.  Batches can still be smaller than the ``batch_size``
+if the dataset consists of small files or those files themselves consist of small
+row groups.  For example, a parquet file with 10,000 rows per row group will yield
+batches with, at most, 10,000 rows unless the ``batch_size`` is set to a smaller value.
+
+The default batch size is one million rows and this is typically a good default but
+you may want to customize it if you are reading a large number of columns.
+
+Writing Datasets
+----------------
+
+The dataset API also simplifies writing data to a dataset using :func:`write_dataset` .  This can be useful when
+you want to partition your data or you need to write a large amount of data.  A
+basic dataset write is similar to writing a table except that you specify a directory
+instead of a filename.
+
+.. ipython:: python
+
+    base = pathlib.Path(tempfile.gettempdir())
+    dataset_root = base / "sample_dataset"
+    dataset_root.mkdir(exist_ok=True)
+
+    table = pa.table({"a": range(10), "b": np.random.randn(10), "c": [1, 2] * 5})
+    ds.write_dataset(table, dataset_root, format="parquet")
+
+The above example will create a single file named part-0.parquet in our sample_dataset
+directory.
+
+.. warning::
+
+    If you run the example again it will replace the existing part-0.parquet file.
+    Appending files to an existing dataset requires specifying a new
+    ``basename_template`` for each call to ``ds.write_dataset``
+    to avoid overwrite.
+
+Writing partitioned data
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+A partitioning object can be used to specify how your output data should be partitioned.
+This uses the same kind of partitioning objects we used for reading datasets.  To write
+our above data out to a partitioned directory we only need to specify how we want the
+dataset to be partitioned.  For example:
+
+.. ipython:: python
+
+    part = ds.partitioning(
+        pa.schema([("c", pa.int16())]), flavor="hive"
+    )
+    ds.write_dataset(table, dataset_root, format="parquet", partitioning=part)
+
+This will create two files.  Half our data will be in the dataset_root/c=1 directory and
+the other half will be in the dataset_root/c=2 directory.
+
+Writing large amounts of data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The above examples wrote data from a table.  If you are writing a large amount of data
+you may not be able to load everything into a single in-memory table.  Fortunately, the
+:func:`~Dataset.write_dataset` method also accepts an iterable of record batches.  This makes it really
+simple, for example, to repartition a large dataset without loading the entire dataset
+into memory:
+
+.. ipython:: python
+
+    old_part = ds.partitioning(
+        pa.schema([("c", pa.int16())]), flavor="hive"
+    )
+    new_part = ds.partitioning(
+        pa.schema([("c", pa.int16())]), flavor=None
+    )
+    input_dataset = ds.dataset(dataset_root, partitioning=old_part)
+    new_root = base / "repartitioned_dataset"
+    # A scanner can act as an iterator of record batches but you could also receive
+    # data from the network (e.g. via flight), from your own scanning, or from any
+    # other method that yields record batches.  In addition, you can pass a dataset
+    # into write_dataset directly but this method is useful if you want to customize
+    # the scanner (e.g. to filter the input dataset or set a maximum batch size)
+    scanner = input_dataset.scanner(use_async=True)
+
+    ds.write_dataset(scanner, new_root, format="parquet", partitioning=new_part)
+
+After the above example runs our data will be in dataset_root/1 and dataset_root/2
+directories.  In this simple example we are not changing the structure of the data
+(only the directory naming schema) but you could also use this mechnaism to change
+which columns are used to partition the dataset.  This is useful when you expect to
+query your data in specific ways and you can utilize partitioning to reduce the
+amount of data you need to read.
+
+Customizing & inspecting written files
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By default the dataset API will create files named "part-i.format" where "i" is a integer
+generated during the write and "format" is the file format specified in the write_dataset
+call.  For simple datasets it may be possible to know which files will be created but for
+larger or partitioned datasets it is not so easy.  The ``file_visitor`` keyword can be used 
+to supply a visitor that will be called as each file is created:
+
+.. ipython:: python
+
+    def file_visitor(written_file):
+        print(f"path={written_file.path}")
+        print(f"metadata={written_file.metadata}")
+
+.. ipython:: python
+
+    ds.write_dataset(table, base / "dataset_visited", format="parquet", partitioning=part,
+                     file_visitor=file_visitor)
+
+This will allow you to collect the filenames that belong to the dataset and store them elsewhere
+which can be useful when you want to avoid scanning directories the next time you need to read
+the data.  It can also be used to generate the _metadata index file used by other tools such as
+dask or spark to create an index of the dataset.
+
+Configuring format-specific parameters during a write
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In addition to the common options shared by all formats there are also format specific options
+that are unique to a particular format.  For example, to allow truncated timestamps while writing
+Parquet files:
+
+.. ipython:: python
+
+    dataset_root = base / "sample_dataset2"
+    dataset_root.mkdir(exist_ok=True)
+
+    parquet_format = ds.ParquetFileFormat()
+    write_options = parquet_format.make_write_options(allow_truncated_timestamps=True)
+    ds.write_dataset(table, dataset_root, format="parquet", partitioning=part,
+                     file_options=write_options)
diff --git a/src/arrow/docs/source/python/extending.rst b/src/arrow/docs/source/python/extending.rst
new file mode 100644
index 000000000..5e00e7905
--- /dev/null
+++ b/src/arrow/docs/source/python/extending.rst
@@ -0,0 +1,483 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+.. cpp:namespace:: arrow
+
+.. _extending:
+
+Using pyarrow from C++ and Cython Code
+======================================
+
+pyarrow provides both a Cython and C++ API, allowing your own native code
+to interact with pyarrow objects.
+
+C++ API
+-------
+
+.. default-domain:: cpp
+
+The Arrow C++ header files are bundled with a pyarrow installation.
+To get the absolute path to this directory (like ``numpy.get_include()``), use:
+
+.. code-block:: python
+
+   import pyarrow as pa
+   pa.get_include()
+
+Assuming the path above is on your compiler's include path, the pyarrow API
+can be included using the following directive:
+
+.. code-block:: cpp
+
+   #include <arrow/python/pyarrow.h>
+
+This will not include other parts of the Arrow API, which you will need
+to include yourself (for example ``arrow/api.h``).
+
+When building C extensions that use the Arrow C++ libraries, you must add
+appropriate linker flags. We have provided functions ``pyarrow.get_libraries``
+and ``pyarrow.get_library_dirs`` which return a list of library names and
+likely library install locations (if you installed pyarrow with pip or
+conda). These must be included when declaring your C extensions with
+setuptools (see below).
+
+Initializing the API
+~~~~~~~~~~~~~~~~~~~~
+
+.. function:: int import_pyarrow()
+
+   Initialize inner pointers of the pyarrow API.  On success, 0 is
+   returned.  Otherwise, -1 is returned and a Python exception is set.
+
+   It is mandatory to call this function before calling any other function
+   in the pyarrow C++ API.  Failing to do so will likely lead to crashes.
+
+Wrapping and Unwrapping
+~~~~~~~~~~~~~~~~~~~~~~~
+
+pyarrow provides the following functions to go back and forth between
+Python wrappers (as exposed by the pyarrow Python API) and the underlying
+C++ objects.
+
+.. function:: bool arrow::py::is_array(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Array` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Array` instance.
+
+.. function:: bool arrow::py::is_batch(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`RecordBatch` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.RecordBatch` instance.
+
+.. function:: bool arrow::py::is_buffer(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Buffer` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Buffer` instance.
+
+.. function:: bool arrow::py::is_data_type(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`DataType` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.DataType` instance.
+
+.. function:: bool arrow::py::is_field(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Field` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Field` instance.
+
+.. function:: bool arrow::py::is_scalar(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Scalar` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Scalar` instance.
+
+.. function:: bool arrow::py::is_schema(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Schema` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Schema` instance.
+
+.. function:: bool arrow::py::is_table(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Table` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Table` instance.
+
+.. function:: bool arrow::py::is_tensor(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :class:`Tensor` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.Tensor` instance.
+
+.. function:: bool arrow::py::is_sparse_coo_tensor(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :type:`SparseCOOTensor` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.SparseCOOTensor` instance.
+
+.. function:: bool arrow::py::is_sparse_csc_matrix(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :type:`SparseCSCMatrix` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.SparseCSCMatrix` instance.
+
+.. function:: bool arrow::py::is_sparse_csf_tensor(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :type:`SparseCSFTensor` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.SparseCSFTensor` instance.
+
+.. function:: bool arrow::py::is_sparse_csr_matrix(PyObject* obj)
+
+   Return whether *obj* wraps an Arrow C++ :type:`SparseCSRMatrix` pointer;
+   in other words, whether *obj* is a :py:class:`pyarrow.SparseCSRMatrix` instance.
+
+
+The following functions expect a pyarrow object, unwrap the underlying
+Arrow C++ API pointer, and return it as a :class:`Result` object.  An error
+may be returned if the input object doesn't have the expected type.
+
+.. function:: Result<std::shared_ptr<Array>> arrow::py::unwrap_array(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :class:`Array` pointer from *obj*.
+
+.. function:: Result<std::shared_ptr<RecordBatch>> arrow::py::unwrap_batch(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :class:`RecordBatch` pointer from *obj*.
+
+.. function:: Result<std::shared_ptr<Buffer>> arrow::py::unwrap_buffer(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :class:`Buffer` pointer from *obj*.
+
+.. function:: Result<std::shared_ptr<DataType>> arrow::py::unwrap_data_type(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :class:`DataType` pointer from *obj*.
+
+.. function:: Result<std::shared_ptr<Field>> arrow::py::unwrap_field(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :class:`Field` pointer from *obj*.
+
+.. function:: Result<std::shared_ptr<Scalar>> arrow::py::unwrap_scalar(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :class:`Scalar` pointer from *obj*.
+
+.. function:: Result<std::shared_ptr<Schema>> arrow::py::unwrap_schema(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :class:`Schema` pointer from *obj*.
+
+.. function:: Result<std::shared_ptr<Table>> arrow::py::unwrap_table(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :class:`Table` pointer from *obj*.
+
+.. function:: Result<std::shared_ptr<Tensor>> arrow::py::unwrap_tensor(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :class:`Tensor` pointer from *obj*.
+
+.. function:: Result<std::shared_ptr<SparseCOOTensor>> arrow::py::unwrap_sparse_coo_tensor(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :type:`SparseCOOTensor` pointer from *obj*.
+
+.. function:: Result<std::shared_ptr<SparseCSCMatrix>> arrow::py::unwrap_sparse_csc_matrix(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :type:`SparseCSCMatrix` pointer from *obj*.
+
+.. function:: Result<std::shared_ptr<SparseCSFTensor>> arrow::py::unwrap_sparse_csf_tensor(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :type:`SparseCSFTensor` pointer from *obj*.
+
+.. function:: Result<std::shared_ptr<SparseCSRMatrix>> arrow::py::unwrap_sparse_csr_matrix(PyObject* obj)
+
+   Unwrap and return the Arrow C++ :type:`SparseCSRMatrix` pointer from *obj*.
+
+
+The following functions take an Arrow C++ API pointer and wrap it in a
+pyarray object of the corresponding type.  A new reference is returned.
+On error, NULL is returned and a Python exception is set.
+
+.. function:: PyObject* arrow::py::wrap_array(const std::shared_ptr<Array>& array)
+
+   Wrap the Arrow C++ *array* in a :py:class:`pyarrow.Array` instance.
+
+.. function:: PyObject* arrow::py::wrap_batch(const std::shared_ptr<RecordBatch>& batch)
+
+   Wrap the Arrow C++ record *batch* in a :py:class:`pyarrow.RecordBatch` instance.
+
+.. function:: PyObject* arrow::py::wrap_buffer(const std::shared_ptr<Buffer>& buffer)
+
+   Wrap the Arrow C++ *buffer* in a :py:class:`pyarrow.Buffer` instance.
+
+.. function:: PyObject* arrow::py::wrap_data_type(const std::shared_ptr<DataType>& data_type)
+
+   Wrap the Arrow C++ *data_type* in a :py:class:`pyarrow.DataType` instance.
+
+.. function:: PyObject* arrow::py::wrap_field(const std::shared_ptr<Field>& field)
+
+   Wrap the Arrow C++ *field* in a :py:class:`pyarrow.Field` instance.
+
+.. function:: PyObject* arrow::py::wrap_scalar(const std::shared_ptr<Scalar>& scalar)
+
+   Wrap the Arrow C++ *scalar* in a :py:class:`pyarrow.Scalar` instance.
+
+.. function:: PyObject* arrow::py::wrap_schema(const std::shared_ptr<Schema>& schema)
+
+   Wrap the Arrow C++ *schema* in a :py:class:`pyarrow.Schema` instance.
+
+.. function:: PyObject* arrow::py::wrap_table(const std::shared_ptr<Table>& table)
+
+   Wrap the Arrow C++ *table* in a :py:class:`pyarrow.Table` instance.
+
+.. function:: PyObject* arrow::py::wrap_tensor(const std::shared_ptr<Tensor>& tensor)
+
+   Wrap the Arrow C++ *tensor* in a :py:class:`pyarrow.Tensor` instance.
+
+.. function:: PyObject* arrow::py::wrap_sparse_coo_tensor(const std::shared_ptr<SparseCOOTensor>& sparse_tensor)
+
+   Wrap the Arrow C++ *sparse_tensor* in a :py:class:`pyarrow.SparseCOOTensor` instance.
+
+.. function:: PyObject* arrow::py::wrap_sparse_csc_matrix(const std::shared_ptr<SparseCSCMatrix>& sparse_tensor)
+
+   Wrap the Arrow C++ *sparse_tensor* in a :py:class:`pyarrow.SparseCSCMatrix` instance.
+
+.. function:: PyObject* arrow::py::wrap_sparse_csf_tensor(const std::shared_ptr<SparseCSFTensor>& sparse_tensor)
+
+   Wrap the Arrow C++ *sparse_tensor* in a :py:class:`pyarrow.SparseCSFTensor` instance.
+
+.. function:: PyObject* arrow::py::wrap_sparse_csr_matrix(const std::shared_ptr<SparseCSRMatrix>& sparse_tensor)
+
+   Wrap the Arrow C++ *sparse_tensor* in a :py:class:`pyarrow.SparseCSRMatrix` instance.
+
+
+Cython API
+----------
+
+.. default-domain:: py
+
+The Cython API more or less mirrors the C++ API, but the calling convention
+can be different as required by Cython.  In Cython, you don't need to
+initialize the API as that will be handled automatically by the ``cimport``
+directive.
+
+.. note::
+   Classes from the Arrow C++ API are renamed when exposed in Cython, to
+   avoid named clashes with the corresponding Python classes.  For example,
+   C++ Arrow arrays have the ``CArray`` type and ``Array`` is the
+   corresponding Python wrapper class.
+
+Wrapping and Unwrapping
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The following functions expect a pyarrow object, unwrap the underlying
+Arrow C++ API pointer, and return it.  NULL is returned (without setting
+an exception) if the input is not of the right type.
+
+.. function:: pyarrow_unwrap_array(obj) -> shared_ptr[CArray]
+
+   Unwrap the Arrow C++ :cpp:class:`Array` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_batch(obj) -> shared_ptr[CRecordBatch]
+
+   Unwrap the Arrow C++ :cpp:class:`RecordBatch` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_buffer(obj) -> shared_ptr[CBuffer]
+
+   Unwrap the Arrow C++ :cpp:class:`Buffer` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_data_type(obj) -> shared_ptr[CDataType]
+
+   Unwrap the Arrow C++ :cpp:class:`CDataType` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_field(obj) -> shared_ptr[CField]
+
+   Unwrap the Arrow C++ :cpp:class:`Field` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_scalar(obj) -> shared_ptr[CScalar]
+
+   Unwrap the Arrow C++ :cpp:class:`Scalar` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_schema(obj) -> shared_ptr[CSchema]
+
+   Unwrap the Arrow C++ :cpp:class:`Schema` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_table(obj) -> shared_ptr[CTable]
+
+   Unwrap the Arrow C++ :cpp:class:`Table` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_tensor(obj) -> shared_ptr[CTensor]
+
+   Unwrap the Arrow C++ :cpp:class:`Tensor` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_sparse_coo_tensor(obj) -> shared_ptr[CSparseCOOTensor]
+
+   Unwrap the Arrow C++ :cpp:type:`SparseCOOTensor` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_sparse_csc_matrix(obj) -> shared_ptr[CSparseCSCMatrix]
+
+   Unwrap the Arrow C++ :cpp:type:`SparseCSCMatrix` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_sparse_csf_tensor(obj) -> shared_ptr[CSparseCSFTensor]
+
+   Unwrap the Arrow C++ :cpp:type:`SparseCSFTensor` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_sparse_csr_matrix(obj) -> shared_ptr[CSparseCSRMatrix]
+
+   Unwrap the Arrow C++ :cpp:type:`SparseCSRMatrix` pointer from *obj*.
+
+
+The following functions take a Arrow C++ API pointer and wrap it in a
+pyarray object of the corresponding type.  An exception is raised on error.
+
+.. function:: pyarrow_wrap_array(const shared_ptr[CArray]& array) -> object
+
+   Wrap the Arrow C++ *array* in a Python :class:`pyarrow.Array` instance.
+
+.. function:: pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& batch) -> object
+
+   Wrap the Arrow C++ record *batch* in a Python :class:`pyarrow.RecordBatch` instance.
+
+.. function:: pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buffer) -> object
+
+   Wrap the Arrow C++ *buffer* in a Python :class:`pyarrow.Buffer` instance.
+
+.. function:: pyarrow_wrap_data_type(const shared_ptr[CDataType]& data_type) -> object
+
+   Wrap the Arrow C++ *data_type* in a Python :class:`pyarrow.DataType` instance.
+
+.. function:: pyarrow_wrap_field(const shared_ptr[CField]& field) -> object
+
+   Wrap the Arrow C++ *field* in a Python :class:`pyarrow.Field` instance.
+
+.. function:: pyarrow_wrap_resizable_buffer(const shared_ptr[CResizableBuffer]& buffer) -> object
+
+   Wrap the Arrow C++ resizable *buffer* in a Python :class:`pyarrow.ResizableBuffer` instance.
+
+.. function:: pyarrow_wrap_scalar(const shared_ptr[CScalar]& scalar) -> object
+
+   Wrap the Arrow C++ *scalar* in a Python :class:`pyarrow.Scalar` instance.
+
+.. function:: pyarrow_wrap_schema(const shared_ptr[CSchema]& schema) -> object
+
+   Wrap the Arrow C++ *schema* in a Python :class:`pyarrow.Schema` instance.
+
+.. function:: pyarrow_wrap_table(const shared_ptr[CTable]& table) -> object
+
+   Wrap the Arrow C++ *table* in a Python :class:`pyarrow.Table` instance.
+
+.. function:: pyarrow_wrap_tensor(const shared_ptr[CTensor]& tensor) -> object
+
+   Wrap the Arrow C++ *tensor* in a Python :class:`pyarrow.Tensor` instance.
+
+.. function:: pyarrow_wrap_sparse_coo_tensor(const shared_ptr[CSparseCOOTensor]& sparse_tensor) -> object
+
+   Wrap the Arrow C++ *COO sparse tensor* in a Python :class:`pyarrow.SparseCOOTensor` instance.
+
+.. function:: pyarrow_wrap_sparse_csc_matrix(const shared_ptr[CSparseCSCMatrix]& sparse_tensor) -> object
+
+   Wrap the Arrow C++ *CSC sparse tensor* in a Python :class:`pyarrow.SparseCSCMatrix` instance.
+
+.. function:: pyarrow_wrap_sparse_csf_tensor(const shared_ptr[CSparseCSFTensor]& sparse_tensor) -> object
+
+   Wrap the Arrow C++ *COO sparse tensor* in a Python :class:`pyarrow.SparseCSFTensor` instance.
+
+.. function:: pyarrow_wrap_sparse_csr_matrix(const shared_ptr[CSparseCSRMatrix]& sparse_tensor) -> object
+
+   Wrap the Arrow C++ *CSR sparse tensor* in a Python :class:`pyarrow.SparseCSRMatrix` instance.
+
+
+Example
+~~~~~~~
+
+The following Cython module shows how to unwrap a Python object and call
+the underlying C++ object's API.
+
+.. code-block:: python
+
+   # distutils: language=c++
+
+   from pyarrow.lib cimport *
+
+
+   def get_array_length(obj):
+       # Just an example function accessing both the pyarrow Cython API
+       # and the Arrow C++ API
+       cdef shared_ptr[CArray] arr = pyarrow_unwrap_array(obj)
+       if arr.get() == NULL:
+           raise TypeError("not an array")
+       return arr.get().length()
+
+To build this module, you will need a slightly customized ``setup.py`` file
+(this is assuming the file above is named ``example.pyx``):
+
+.. code-block:: python
+
+    from setuptools import setup
+    from Cython.Build import cythonize
+
+    import os
+    import numpy as np
+    import pyarrow as pa
+
+
+    ext_modules = cythonize("example.pyx")
+
+    for ext in ext_modules:
+        # The Numpy C headers are currently required
+        ext.include_dirs.append(np.get_include())
+        ext.include_dirs.append(pa.get_include())
+        ext.libraries.extend(pa.get_libraries())
+        ext.library_dirs.extend(pa.get_library_dirs())
+
+        if os.name == 'posix':
+            ext.extra_compile_args.append('-std=c++11')
+
+        # Try uncommenting the following line on Linux
+        # if you get weird linker errors or runtime crashes
+        # ext.define_macros.append(("_GLIBCXX_USE_CXX11_ABI", "0"))
+
+
+    setup(ext_modules=ext_modules)
+
+
+Compile the extension:
+
+.. code-block:: bash
+
+    python setup.py build_ext --inplace
+
+Building Extensions against PyPI Wheels
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The Python wheels have the Arrow C++ libraries bundled in the top level
+``pyarrow/`` install directory. On Linux and macOS, these libraries have an ABI
+tag like ``libarrow.so.17`` which means that linking with ``-larrow`` using the
+linker path provided by ``pyarrow.get_library_dirs()`` will not work right out
+of the box. To fix this, you must run ``pyarrow.create_library_symlinks()``
+once as a user with write access to the directory where pyarrow is
+installed. This function will attempt to create symlinks like
+``pyarrow/libarrow.so``. For example:
+
+.. code-block:: bash
+
+   pip install pyarrow
+   python -c "import pyarrow; pyarrow.create_library_symlinks()"
+
+Toolchain Compatibility (Linux)
+"""""""""""""""""""""""""""""""
+
+The Python wheels for Linux are built using the
+`PyPA manylinux images <https://quay.io/organization/pypa>`_ which use
+the CentOS `devtoolset-8` or `devtoolset-9` depending on which manylinux
+wheel version (2010 or 2014) is being used. In addition to the other notes
+above, if you are compiling C++ using these shared libraries, you will need
+to make sure you use a compatible toolchain as well or you might see a
+segfault during runtime.
+
+Also, if you encounter errors when linking or loading the library, consider
+setting the ``_GLIBCXX_USE_CXX11_ABI`` preprocessor macro to ``0``
+(for example by adding ``-D_GLIBCXX_USE_CXX11_ABI=0`` to ``CFLAGS``).
diff --git a/src/arrow/docs/source/python/extending_types.rst b/src/arrow/docs/source/python/extending_types.rst
new file mode 100644
index 000000000..689724a4a
--- /dev/null
+++ b/src/arrow/docs/source/python/extending_types.rst
@@ -0,0 +1,324 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+.. _extending_types:
+
+Extending pyarrow
+=================
+
+Controlling conversion to pyarrow.Array with the ``__arrow_array__`` protocol
+-----------------------------------------------------------------------------
+
+The :func:`pyarrow.array` function has built-in support for Python sequences,
+numpy arrays and pandas 1D objects (Series, Index, Categorical, ..) to convert
+those to Arrow arrays. This can be extended for other array-like objects
+by implementing the ``__arrow_array__`` method (similar to numpy's ``__array__``
+protocol).
+
+For example, to support conversion of your duck array class to an Arrow array,
+define the ``__arrow_array__`` method to return an Arrow array::
+
+    class MyDuckArray:
+
+        ...
+
+        def __arrow_array__(self, type=None):
+            # convert the underlying array values to a pyarrow Array
+            import pyarrow
+            return pyarrow.array(..., type=type)
+
+The ``__arrow_array__`` method takes an optional `type` keyword which is passed
+through from :func:`pyarrow.array`. The method is allowed to return either
+a :class:`~pyarrow.Array` or a :class:`~pyarrow.ChunkedArray`.
+
+
+Defining extension types ("user-defined types")
+-----------------------------------------------
+
+Arrow has the notion of extension types in the metadata specification as a
+possibility to extend the built-in types. This is done by annotating any of the
+built-in Arrow logical types (the "storage type") with a custom type name and
+optional serialized representation ("ARROW:extension:name" and
+"ARROW:extension:metadata" keys in the Field’s custom_metadata of an IPC
+message).
+See the :ref:`format_metadata_extension_types` section of the metadata
+specification for more details.
+
+Pyarrow allows you to define such extension types from Python.
+
+There are currently two ways:
+
+* Subclassing :class:`PyExtensionType`: the (de)serialization is based on pickle.
+  This is a good option for an extension type that is only used from Python.
+* Subclassing :class:`ExtensionType`: this allows to give a custom
+  Python-independent name and serialized metadata, that can potentially be
+  recognized by other (non-Python) Arrow implementations such as PySpark.
+
+For example, we could define a custom UUID type for 128-bit numbers which can
+be represented as ``FixedSizeBinary`` type with 16 bytes.
+Using the first approach, we create a ``UuidType`` subclass, and implement the
+``__reduce__`` method to ensure the class can be properly pickled::
+
+    class UuidType(pa.PyExtensionType):
+
+        def __init__(self):
+            pa.PyExtensionType.__init__(self, pa.binary(16))
+
+        def __reduce__(self):
+            return UuidType, ()
+
+This can now be used to create arrays and tables holding the extension type::
+
+    >>> uuid_type = UuidType()
+    >>> uuid_type.extension_name
+    'arrow.py_extension_type'
+    >>> uuid_type.storage_type
+    FixedSizeBinaryType(fixed_size_binary[16])
+
+    >>> import uuid
+    >>> storage_array = pa.array([uuid.uuid4().bytes for _ in range(4)], pa.binary(16))
+    >>> arr = pa.ExtensionArray.from_storage(uuid_type, storage_array)
+    >>> arr
+    <pyarrow.lib.ExtensionArray object at 0x7f75c2f300a0>
+    [
+      A6861959108644B797664AEEE686B682,
+      718747F48E5F4058A7261E2B6B228BE8,
+      7FE201227D624D96A5CD8639DEF2A68B,
+      C6CA8C7F95744BFD9462A40B3F57A86C
+    ]
+
+This array can be included in RecordBatches, sent over IPC and received in
+another Python process. The custom UUID type will be preserved there, as long
+as the definition of the class is available (the type can be unpickled).
+
+For example, creating a RecordBatch and writing it to a stream using the
+IPC protocol::
+
+    >>> batch = pa.RecordBatch.from_arrays([arr], ["ext"])
+    >>> sink = pa.BufferOutputStream()
+    >>> with pa.RecordBatchStreamWriter(sink, batch.schema) as writer:
+    ...    writer.write_batch(batch)
+    >>> buf = sink.getvalue()
+
+and then reading it back yields the proper type::
+
+    >>> with pa.ipc.open_stream(buf) as reader:
+    ...    result = reader.read_all()
+    >>> result.column('ext').type
+    UuidType(extension<arrow.py_extension_type>)
+
+We can define the same type using the other option::
+
+    class UuidType(pa.ExtensionType):
+
+        def __init__(self):
+            pa.ExtensionType.__init__(self, pa.binary(16), "my_package.uuid")
+
+        def __arrow_ext_serialize__(self):
+            # since we don't have a parameterized type, we don't need extra
+            # metadata to be deserialized
+            return b''
+
+        @classmethod
+        def __arrow_ext_deserialize__(self, storage_type, serialized):
+            # return an instance of this subclass given the serialized
+            # metadata.
+            return UuidType()
+
+This is a slightly longer implementation (you need to implement the special
+methods ``__arrow_ext_serialize__`` and ``__arrow_ext_deserialize__``), and the
+extension type needs to be registered to be received through IPC (using
+:func:`register_extension_type`), but it has
+now a unique name::
+
+    >>> uuid_type = UuidType()
+    >>> uuid_type.extension_name
+    'my_package.uuid'
+
+    >>> pa.register_extension_type(uuid_type)
+
+The receiving application doesn't need to be Python but can still recognize
+the extension type as a "uuid" type, if it has implemented its own extension
+type to receive it.
+If the type is not registered in the receiving application, it will fall back
+to the storage type.
+
+Parameterized extension type
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The above example used a fixed storage type with no further metadata. But
+more flexible, parameterized extension types are also possible.
+
+The example given here implements an extension type for the `pandas "period"
+data type <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#time-span-representation>`__,
+representing time spans (e.g., a frequency of a day, a month, a quarter, etc).
+It is stored as an int64 array which is interpreted as the number of time spans
+of the given frequency since 1970.
+
+::
+
+    class PeriodType(pa.ExtensionType):
+
+        def __init__(self, freq):
+            # attributes need to be set first before calling
+            # super init (as that calls serialize)
+            self._freq = freq
+            pa.ExtensionType.__init__(self, pa.int64(), 'my_package.period')
+
+        @property
+        def freq(self):
+            return self._freq
+
+        def __arrow_ext_serialize__(self):
+            return "freq={}".format(self.freq).encode()
+
+        @classmethod
+        def __arrow_ext_deserialize__(cls, storage_type, serialized):
+            # return an instance of this subclass given the serialized
+            # metadata.
+            serialized = serialized.decode()
+            assert serialized.startswith("freq=")
+            freq = serialized.split('=')[1]
+            return PeriodType(freq)
+
+Here, we ensure to store all information in the serialized metadata that is
+needed to reconstruct the instance (in the ``__arrow_ext_deserialize__`` class
+method), in this case the frequency string.
+
+Note that, once created, the data type instance is considered immutable. If,
+in the example above, the ``freq`` parameter would change after instantiation,
+the reconstruction of the type instance after IPC will be incorrect.
+In the example above, the ``freq`` parameter is therefore stored in a private
+attribute with a public read-only property to access it.
+
+Parameterized extension types are also possible using the pickle-based type
+subclassing :class:`PyExtensionType`. The equivalent example for the period
+data type from above would look like::
+
+    class PeriodType(pa.PyExtensionType):
+
+        def __init__(self, freq):
+            self._freq = freq
+            pa.PyExtensionType.__init__(self, pa.int64())
+
+        @property
+        def freq(self):
+            return self._freq
+
+        def __reduce__(self):
+            return PeriodType, (self.freq,)
+
+Also the storage type does not need to be fixed but can be parameterized.
+
+Custom extension array class
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+By default, all arrays with an extension type are constructed or deserialized into
+a built-in :class:`ExtensionArray` object. Nevertheless, one could want to subclass
+:class:`ExtensionArray` in order to add some custom logic specific to the extension
+type. Arrow allows to do so by adding a special method ``__arrow_ext_class__`` to the
+definition of the extension type.
+
+For instance, let us consider the example from the `Numpy Quickstart <https://docs.scipy.org/doc/numpy-1.13.0/user/quickstart.html>`_ of points in 3D space.
+We can store these as a fixed-size list, where we wish to be able to extract
+the data as a 2-D Numpy array ``(N, 3)`` without any copy::
+
+    class Point3DArray(pa.ExtensionArray):
+        def to_numpy_array(self):
+            return self.storage.flatten().to_numpy().reshape((-1, 3))
+
+
+    class Point3DType(pa.PyExtensionType):
+        def __init__(self):
+            pa.PyExtensionType.__init__(self, pa.list_(pa.float32(), 3))
+
+        def __reduce__(self):
+            return Point3DType, ()
+
+        def __arrow_ext_class__(self):
+            return Point3DArray
+
+Arrays built using this extension type now have the expected custom array class::
+
+    >>> storage = pa.array([[1, 2, 3], [4, 5, 6]], pa.list_(pa.float32(), 3))
+    >>> arr = pa.ExtensionArray.from_storage(Point3DType(), storage)
+    >>> arr
+    <__main__.Point3DArray object at 0x7f40dea80670>
+    [
+        [
+            1,
+            2,
+            3
+        ],
+        [
+            4,
+            5,
+            6
+        ]
+    ]
+
+The additional methods in the extension class are then available to the user::
+
+    >>> arr.to_numpy_array()
+    array([[1., 2., 3.],
+       [4., 5., 6.]], dtype=float32)
+
+
+This array can be sent over IPC, received in another Python process, and the custom
+extension array class will be preserved (as long as the definitions of the classes above
+are available).
+
+The same ``__arrow_ext_class__`` specialization can be used with custom types defined
+by subclassing :class:`ExtensionType`.
+
+
+Conversion to pandas
+~~~~~~~~~~~~~~~~~~~~
+
+The conversion to pandas (in :meth:`Table.to_pandas`) of columns with an
+extension type can controlled in case there is a corresponding
+`pandas extension array <https://pandas.pydata.org/pandas-docs/stable/development/extending.html#extension-types>`__
+for your extension type.
+
+For this, the :meth:`ExtensionType.to_pandas_dtype` method needs to be
+implemented, and should return a ``pandas.api.extensions.ExtensionDtype``
+subclass instance.
+
+Using the pandas period type from above as example, this would look like::
+
+    class PeriodType(pa.ExtensionType):
+        ...
+
+        def to_pandas_dtype(self):
+            import pandas as pd
+            return pd.PeriodDtype(freq=self.freq)
+
+Secondly, the pandas ``ExtensionDtype`` on its turn needs to have the
+``__from_arrow__`` method implemented: a method that given a pyarrow Array
+or ChunkedArray of the extension type can construct the corresponding
+pandas ``ExtensionArray``. This method should have the following signature::
+
+
+    class MyExtensionDtype(pd.api.extensions.ExtensionDtype):
+        ...
+
+        def __from_arrow__(self, array: pyarrow.Array/ChunkedArray) -> pandas.ExtensionArray:
+            ...
+
+This way, you can control the conversion of a pyarrow ``Array`` of your pyarrow
+extension type to a pandas ``ExtensionArray`` that can be stored in a DataFrame.
diff --git a/src/arrow/docs/source/python/feather.rst b/src/arrow/docs/source/python/feather.rst
new file mode 100644
index 000000000..026ea987a
--- /dev/null
+++ b/src/arrow/docs/source/python/feather.rst
@@ -0,0 +1,109 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+.. _feather:
+
+Feather File Format
+===================
+
+Feather is a portable file format for storing Arrow tables or data frames (from
+languages like Python or R) that utilizes the :ref:`Arrow IPC format <ipc>`
+internally. Feather was created early in the Arrow project as a proof of
+concept for fast, language-agnostic data frame storage for Python (pandas) and
+R. There are two file format versions for Feather:
+
+* Version 2 (V2), the default version, which is exactly represented as the
+  Arrow IPC file format on disk. V2 files support storing all Arrow data types
+  as well as compression with LZ4 or ZSTD. V2 was first made available in
+  Apache Arrow 0.17.0.
+* Version 1 (V1), a legacy version available starting in 2016, replaced by
+  V2. V1 files are distinct from Arrow IPC files and lack many features, such
+  as the ability to store all Arrow data types. V1 files also lack compression
+  support. We intend to maintain read support for V1 for the foreseeable
+  future.
+
+The ``pyarrow.feather`` module contains the read and write functions for the
+format. :func:`~pyarrow.feather.write_feather` accepts either a
+:class:`~pyarrow.Table` or ``pandas.DataFrame`` object:
+
+.. code-block:: python
+
+   import pyarrow.feather as feather
+   feather.write_feather(df, '/path/to/file')
+
+:func:`~pyarrow.feather.read_feather` reads a Feather file as a
+``pandas.DataFrame``. :func:`~pyarrow.feather.read_table` reads a Feather file
+as a :class:`~pyarrow.Table`. Internally, :func:`~pyarrow.feather.read_feather`
+simply calls :func:`~pyarrow.feather.read_table` and the result is converted to
+pandas:
+
+.. code-block:: python
+
+   # Result is pandas.DataFrame
+   read_df = feather.read_feather('/path/to/file')
+
+   # Result is pyarrow.Table
+   read_arrow = feather.read_table('/path/to/file')
+
+These functions can read and write with file-paths or file-like objects. For
+example:
+
+.. code-block:: python
+
+   with open('/path/to/file', 'wb') as f:
+       feather.write_feather(df, f)
+
+   with open('/path/to/file', 'rb') as f:
+       read_df = feather.read_feather(f)
+
+A file input to ``read_feather`` must support seeking.
+
+Using Compression
+-----------------
+
+As of Apache Arrow version 0.17.0, Feather V2 files (the default version)
+support two fast compression libraries, LZ4 (using the frame format) and
+ZSTD. LZ4 is used by default if it is available (which it should be if you
+obtained pyarrow through a normal package manager):
+
+.. code-block:: python
+
+   # Uses LZ4 by default
+   feather.write_feather(df, file_path)
+
+   # Use LZ4 explicitly
+   feather.write_feather(df, file_path, compression='lz4')
+
+   # Use ZSTD
+   feather.write_feather(df, file_path, compression='zstd')
+
+   # Do not compress
+   feather.write_feather(df, file_path, compression='uncompressed')
+
+Note that the default LZ4 compression generally yields much smaller files
+without sacrificing much read or write performance. In some instances,
+LZ4-compressed files may be faster to read and write than uncompressed due to
+reduced disk IO requirements.
+
+Writing Version 1 (V1) Files
+----------------------------
+
+For compatibility with libraries without support for Version 2 files, you can
+write the version 1 format by passing ``version=1`` to ``write_feather``. We
+intend to maintain read support for V1 for the foreseeable future.
diff --git a/src/arrow/docs/source/python/filesystems.rst b/src/arrow/docs/source/python/filesystems.rst
new file mode 100644
index 000000000..1ddb4dfa2
--- /dev/null
+++ b/src/arrow/docs/source/python/filesystems.rst
@@ -0,0 +1,305 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _filesystem:
+
+.. currentmodule:: pyarrow.fs
+
+Filesystem Interface
+====================
+
+PyArrow comes with an abstract filesystem interface, as well as concrete
+implementations for various storage types.
+
+The filesystem interface provides input and output streams as well as
+directory operations.  A simplified view of the underlying data
+storage is exposed.  Data paths are represented as *abstract paths*, which
+are ``/``-separated, even on Windows, and shouldn't include special path
+components such as ``.`` and ``..``.  Symbolic links, if supported by the
+underlying storage, are automatically dereferenced.  Only basic
+:class:`metadata <FileInfo>` about file entries, such as the file size
+and modification time, is made available.
+
+The core interface is represented by the base class :class:`FileSystem`.
+
+Pyarrow implements natively the following filesystem subclasses:
+
+* :ref:`filesystem-localfs` (:class:`LocalFileSystem`)
+* :ref:`filesystem-s3` (:class:`S3FileSystem`)
+* :ref:`filesystem-hdfs` (:class:`HadoopFileSystem`)
+
+It is also possible to use your own fsspec-compliant filesystem with pyarrow functionalities as described in the section :ref:`filesystem-fsspec`.
+
+
+.. _filesystem-usage:
+
+Usage
+-----
+
+Instantiating a filesystem
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A FileSystem object can be created with one of the constructors (and check the
+respective constructor for its options)::
+
+   >>> from pyarrow import fs
+   >>> local = fs.LocalFileSystem()
+
+or alternatively inferred from a URI::
+
+   >>> s3, path = fs.FileSystem.from_uri("s3://my-bucket")
+   >>> s3
+   <pyarrow._s3fs.S3FileSystem at 0x7f6760cbf4f0>
+   >>> path
+   'my-bucket'
+
+
+Reading and writing files
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Several of the IO-related functions in PyArrow accept either a URI (and infer
+the filesystem) or an explicit ``filesystem`` argument to specify the filesystem
+to read or write from. For example, the :meth:`pyarrow.parquet.read_table`
+function can be used in the following ways::
+
+   import pyarrow.parquet as pq
+
+   # using a URI -> filesystem is inferred
+   pq.read_table("s3://my-bucket/data.parquet")
+   # using a path and filesystem
+   s3 = fs.S3FileSystem(..)
+   pq.read_table("my-bucket/data.parquet", filesystem=s3)
+
+The filesystem interface further allows to open files for reading (input) or
+writing (output) directly, which can be combined with functions that work with
+file-like objects. For example::
+
+   import pyarrow as pa
+
+   local = fs.LocalFileSystem()
+
+   with local.open_output_stream("test.arrow") as file:
+      with pa.RecordBatchFileWriter(file, table.schema) as writer:
+         writer.write_table(table)
+
+
+Listing files
+~~~~~~~~~~~~~
+
+Inspecting the directories and files on a filesystem can be done with the
+:meth:`FileSystem.get_file_info` method. To list the contents of a directory,
+use the :class:`FileSelector` object to specify the selection::
+
+   >>> local.get_file_info(fs.FileSelector("dataset/", recursive=True))
+   [<FileInfo for 'dataset/part=B': type=FileType.Directory>,
+    <FileInfo for 'dataset/part=B/data0.parquet': type=FileType.File, size=1564>,
+    <FileInfo for 'dataset/part=A': type=FileType.Directory>,
+    <FileInfo for 'dataset/part=A/data0.parquet': type=FileType.File, size=1564>]
+
+This returns a list of :class:`FileInfo` objects, containing information about
+the type (file or directory), the size, the date last modified, etc.
+
+You can also get this information for a single explicit path (or list of
+paths)::
+
+   >>> local.get_file_info('test.arrow')
+   <FileInfo for 'test.arrow': type=FileType.File, size=3250>
+
+   >>> local.get_file_info('non_existent')
+   <FileInfo for 'non_existent': type=FileType.NotFound>
+
+
+.. _filesystem-localfs:
+
+Local FS
+--------
+
+The :class:`LocalFileSystem` allows you to access files on the local machine.
+
+Example how to write to disk and read it back::
+
+   >>> from pyarrow import fs
+   >>> local = fs.LocalFileSystem()
+   >>> with local.open_output_stream('/tmp/pyarrowtest.dat') as stream:
+           stream.write(b'data')
+   4
+   >>> with local.open_input_stream('/tmp/pyarrowtest.dat') as stream:
+           print(stream.readall())
+   b'data'
+
+
+.. _filesystem-s3:
+
+S3
+--
+
+PyArrow implements natively a S3 filesystem for S3 compatible storage.
+
+The :class:`S3FileSystem` constructor has several options to configure the S3
+connection (e.g. credentials, the region, an endpoint override, etc). In
+addition, the constructor will also inspect configured S3 credentials as
+supported by AWS (for example the ``AWS_ACCESS_KEY_ID`` and
+``AWS_SECRET_ACCESS_KEY`` environment variables).
+
+Example how you can read contents from a S3 bucket::
+
+   >>> from pyarrow import fs
+   >>> s3 = fs.S3FileSystem(region='eu-west-3')
+
+   # List all contents in a bucket, recursively
+   >>> s3.get_file_info(fs.FileSelector('my-test-bucket', recursive=True))
+   [<FileInfo for 'my-test-bucket/File1': type=FileType.File, size=10>,
+    <FileInfo for 'my-test-bucket/File5': type=FileType.File, size=10>,
+    <FileInfo for 'my-test-bucket/Dir1': type=FileType.Directory>,
+    <FileInfo for 'my-test-bucket/Dir2': type=FileType.Directory>,
+    <FileInfo for 'my-test-bucket/EmptyDir': type=FileType.Directory>,
+    <FileInfo for 'my-test-bucket/Dir1/File2': type=FileType.File, size=11>,
+    <FileInfo for 'my-test-bucket/Dir1/Subdir': type=FileType.Directory>,
+    <FileInfo for 'my-test-bucket/Dir2/Subdir': type=FileType.Directory>,
+    <FileInfo for 'my-test-bucket/Dir2/Subdir/File3': type=FileType.File, size=10>]
+
+   # Open a file for reading and download its contents
+   >>> f = s3.open_input_stream('my-test-bucket/Dir1/File2')
+   >>> f.readall()
+   b'some data'
+
+.. seealso::
+
+   See the `AWS docs <https://docs.aws.amazon.com/sdk-for-cpp/v1/developer-guide/credentials.html>`__
+   for the different ways to configure the AWS credentials.
+
+
+.. _filesystem-hdfs:
+
+Hadoop Distributed File System (HDFS)
+-------------------------------------
+
+PyArrow comes with bindings to the Hadoop File System (based on C++ bindings
+using ``libhdfs``, a JNI-based interface to the Java Hadoop client). You connect
+using the :class:`HadoopFileSystem` constructor:
+
+.. code-block:: python
+
+   from pyarrow import fs
+   hdfs = fs.HadoopFileSystem(host, port, user=user, kerb_ticket=ticket_cache_path)
+
+The ``libhdfs`` library is loaded **at runtime** (rather than at link / library
+load time, since the library may not be in your LD_LIBRARY_PATH), and relies on
+some environment variables.
+
+* ``HADOOP_HOME``: the root of your installed Hadoop distribution. Often has
+  `lib/native/libhdfs.so`.
+
+* ``JAVA_HOME``: the location of your Java SDK installation.
+
+* ``ARROW_LIBHDFS_DIR`` (optional): explicit location of ``libhdfs.so`` if it is
+  installed somewhere other than ``$HADOOP_HOME/lib/native``.
+
+* ``CLASSPATH``: must contain the Hadoop jars. You can set these using:
+
+  .. code-block:: shell
+
+      export CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath --glob`
+      # or on Windows
+      %HADOOP_HOME%/bin/hadoop classpath --glob > %CLASSPATH%
+
+  In contrast to the legacy HDFS filesystem with ``pa.hdfs.connect``, setting
+  ``CLASSPATH`` is not optional (pyarrow will not attempt to infer it).
+
+.. _filesystem-fsspec:
+
+Using fsspec-compatible filesystems with Arrow
+----------------------------------------------
+
+The filesystems mentioned above are natively supported by Arrow C++ / PyArrow.
+The Python ecosystem, however, also has several filesystem packages. Those
+packages following the `fsspec`_ interface can be used in PyArrow as well.
+
+Functions accepting a filesystem object will also accept an fsspec subclass.
+For example::
+
+   # creating an fsspec-based filesystem object for Google Cloud Storage
+   import gcsfs
+   fs = gcsfs.GCSFileSystem(project='my-google-project')
+
+   # using this to read a partitioned dataset
+   import pyarrow.dataset as ds
+   ds.dataset("data/", filesystem=fs)
+   
+Similarly for Azure Blob Storage::
+
+   import adlfs
+   # ... load your credentials and configure the filesystem
+   fs = adlfs.AzureBlobFileSystem(account_name=account_name, account_key=account_key)
+
+   import pyarrow.dataset as ds
+   ds.dataset("mycontainer/data/", filesystem=fs)
+
+Under the hood, the fsspec filesystem object is wrapped into a python-based
+PyArrow filesystem (:class:`PyFileSystem`) using :class:`FSSpecHandler`.
+You can also manually do this to get an object with the PyArrow FileSystem
+interface::
+
+   from pyarrow.fs import PyFileSystem, FSSpecHandler
+   pa_fs = PyFileSystem(FSSpecHandler(fs))
+
+Then all the functionalities of :class:`FileSystem` are accessible::
+
+   # write data
+   with pa_fs.open_output_stream('mycontainer/pyarrowtest.dat') as stream:
+      stream.write(b'data')
+
+   # read data
+   with pa_fs.open_input_stream('mycontainer/pyarrowtest.dat') as stream:
+      print(stream.readall())
+   #b'data'
+
+   # read a partitioned dataset
+   ds.dataset("data/", filesystem=pa_fs)
+
+
+Using Arrow filesystems with fsspec
+-----------------------------------
+
+The Arrow FileSystem interface has a limited, developer-oriented API surface.
+This is sufficient for basic interactions and for using this with
+Arrow's IO functionality. On the other hand, the `fsspec`_ interface provides
+a very large API with many helper methods. If you want to use those, or if you
+need to interact with a package that expects fsspec-compatible filesystem
+objects, you can wrap an Arrow FileSystem object with fsspec.
+
+Starting with ``fsspec`` version 2021.09, the ``ArrowFSWrapper`` can be used
+for this::
+
+   >>> from pyarrow import fs
+   >>> local = fs.LocalFileSystem()
+   >>> from fsspec.implementations.arrow import ArrowFSWrapper
+   >>> local_fsspec = ArrowFSWrapper(local)
+
+The resulting object now has an fsspec-compatible interface, while being backed
+by the Arrow FileSystem under the hood.
+Example usage to create a directory and file, and list the content::
+
+   >>> local_fsspec.mkdir("./test")
+   >>> local_fsspec.touch("./test/file.txt")
+   >>> local_fsspec.ls("./test/")
+   ['./test/file.txt']
+
+For more information, see the `fsspec`_ documentation.
+
+
+.. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/
diff --git a/src/arrow/docs/source/python/filesystems_deprecated.rst b/src/arrow/docs/source/python/filesystems_deprecated.rst
new file mode 100644
index 000000000..04887e977
--- /dev/null
+++ b/src/arrow/docs/source/python/filesystems_deprecated.rst
@@ -0,0 +1,95 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Filesystem Interface (legacy)
+=============================
+
+.. warning::
+   This section documents the deprecated filesystem layer.  You should
+   use the :ref:`new filesystem layer <filesystem>` instead.
+
+.. _hdfs:
+
+Hadoop File System (HDFS)
+-------------------------
+
+PyArrow comes with bindings to a C++-based interface to the Hadoop File
+System. You connect like so:
+
+.. code-block:: python
+
+   import pyarrow as pa
+   fs = pa.hdfs.connect(host, port, user=user, kerb_ticket=ticket_cache_path)
+   with fs.open(path, 'rb') as f:
+       # Do something with f
+
+By default, ``pyarrow.hdfs.HadoopFileSystem`` uses libhdfs, a JNI-based
+interface to the Java Hadoop client. This library is loaded **at runtime**
+(rather than at link / library load time, since the library may not be in your
+LD_LIBRARY_PATH), and relies on some environment variables.
+
+* ``HADOOP_HOME``: the root of your installed Hadoop distribution. Often has
+  `lib/native/libhdfs.so`.
+
+* ``JAVA_HOME``: the location of your Java SDK installation.
+
+* ``ARROW_LIBHDFS_DIR`` (optional): explicit location of ``libhdfs.so`` if it is
+  installed somewhere other than ``$HADOOP_HOME/lib/native``.
+
+* ``CLASSPATH``: must contain the Hadoop jars. You can set these using:
+
+.. code-block:: shell
+
+    export CLASSPATH=`$HADOOP_HOME/bin/hdfs classpath --glob`
+
+If ``CLASSPATH`` is not set, then it will be set automatically if the
+``hadoop`` executable is in your system path, or if ``HADOOP_HOME`` is set.
+
+You can also use libhdfs3, a thirdparty C++ library for HDFS from Pivotal Labs:
+
+.. code-block:: python
+
+   fs = pa.hdfs.connect(host, port, user=user, kerb_ticket=ticket_cache_path,
+                       driver='libhdfs3')
+
+HDFS API
+~~~~~~~~
+
+.. currentmodule:: pyarrow
+
+.. autosummary::
+   :toctree: generated/
+
+   hdfs.connect
+   HadoopFileSystem.cat
+   HadoopFileSystem.chmod
+   HadoopFileSystem.chown
+   HadoopFileSystem.delete
+   HadoopFileSystem.df
+   HadoopFileSystem.disk_usage
+   HadoopFileSystem.download
+   HadoopFileSystem.exists
+   HadoopFileSystem.get_capacity
+   HadoopFileSystem.get_space_used
+   HadoopFileSystem.info
+   HadoopFileSystem.ls
+   HadoopFileSystem.mkdir
+   HadoopFileSystem.open
+   HadoopFileSystem.rename
+   HadoopFileSystem.rm
+   HadoopFileSystem.upload
+   HdfsFile
diff --git a/src/arrow/docs/source/python/getstarted.rst b/src/arrow/docs/source/python/getstarted.rst
new file mode 100644
index 000000000..36e4707ad
--- /dev/null
+++ b/src/arrow/docs/source/python/getstarted.rst
@@ -0,0 +1,145 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _getstarted:
+
+Getting Started
+===============
+
+Arrow manages data in arrays (:class:`pyarrow.Array`), which can be
+grouped in tables (:class:`pyarrow.Table`) to represent columns of data
+in tabular data.
+
+Arrow also provides support for various formats to get those tabular
+data in and out of disk and networks. Most commonly used formats are
+Parquet (:ref:`parquet`) and the IPC format (:ref:`ipc`). 
+
+Creating Arrays and Tables
+--------------------------
+
+Arrays in Arrow are collections of data of uniform type. That allows
+Arrow to use the best performing implementation to store the data and
+perform computations on it. So each array is meant to have data and
+a type
+
+.. ipython:: python
+
+    import pyarrow as pa
+
+    days = pa.array([1, 12, 17, 23, 28], type=pa.int8())
+
+Multiple arrays can be combined in tables to form the columns
+in tabular data when attached to a column name
+
+.. ipython:: python
+
+    months = pa.array([1, 3, 5, 7, 1], type=pa.int8())
+    years = pa.array([1990, 2000, 1995, 2000, 1995], type=pa.int16())
+
+    birthdays_table = pa.table([days, months, years],
+                               names=["days", "months", "years"])
+    
+    birthdays_table
+
+See :ref:`data` for more details.
+
+Saving and Loading Tables
+-------------------------
+
+Once you have tabular data, Arrow provides out of the box
+the features to save and restore that data for common formats
+like Parquet:
+
+.. ipython:: python   
+
+    import pyarrow.parquet as pq
+
+    pq.write_table(birthdays_table, 'birthdays.parquet')
+
+Once you have your data on disk, loading it back is a single function call,
+and Arrow is heavily optimized for memory and speed so loading
+data will be as quick as possible
+
+.. ipython:: python
+
+    reloaded_birthdays = pq.read_table('birthdays.parquet')
+
+    reloaded_birthdays
+
+Saving and loading back data in arrow is usually done through
+:ref:`Parquet <parquet>`, :ref:`IPC format <ipc>` (:ref:`feather`), 
+:ref:`CSV <csv>` or :ref:`Line-Delimited JSON <json>` formats.
+
+Performing Computations
+-----------------------
+
+Arrow ships with a bunch of compute functions that can be applied
+to its arrays and tables, so through the compute functions 
+it's possible to apply transformations to the data
+
+.. ipython:: python
+
+    import pyarrow.compute as pc
+
+    pc.value_counts(birthdays_table["years"])
+
+See :ref:`compute` for a list of available compute functions and
+how to use them.
+
+Working with large data
+-----------------------
+
+Arrow also provides the :class:`pyarrow.dataset` API to work with
+large data, which will handle for you partitioning of your data in
+smaller chunks
+
+.. ipython:: python
+
+    import pyarrow.dataset as ds
+
+    ds.write_dataset(birthdays_table, "savedir", format="parquet", 
+                     partitioning=ds.partitioning(
+                        pa.schema([birthdays_table.schema.field("years")])
+                    ))
+
+Loading back the partitioned dataset will detect the chunks
+
+.. ipython:: python
+
+    birthdays_dataset = ds.dataset("savedir", format="parquet", partitioning=["years"])
+
+    birthdays_dataset.files
+
+and will lazily load chunks of data only when iterating over them
+
+.. ipython:: python
+
+    import datetime
+
+    current_year = datetime.datetime.utcnow().year
+    for table_chunk in birthdays_dataset.to_batches():
+        print("AGES", pc.subtract(current_year, table_chunk["years"]))
+
+For further details on how to work with big datasets, how to filter them,
+how to project them, etc., refer to :ref:`dataset` documentation.
+
+Continuining from here
+----------------------
+
+For digging further into Arrow, you might want to read the 
+:doc:`PyArrow Documentation <./index>` itself or the 
+`Arrow Python Cookbook <https://arrow.apache.org/cookbook/py/>`_
diff --git a/src/arrow/docs/source/python/getting_involved.rst b/src/arrow/docs/source/python/getting_involved.rst
new file mode 100644
index 000000000..7159bdfb0
--- /dev/null
+++ b/src/arrow/docs/source/python/getting_involved.rst
@@ -0,0 +1,35 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Getting Involved
+================
+
+Right now the primary audience for Apache Arrow are the developers of data
+systems; most people will use Apache Arrow indirectly through systems that use
+it for internal data handling and interoperating with other Arrow-enabled
+systems.
+
+Even if you do not plan to contribute to Apache Arrow itself or Arrow
+integrations in other projects, we'd be happy to have you involved:
+
+ * Join the mailing list: send an email to
+   `dev-subscribe@arrow.apache.org <mailto:dev-subscribe@arrow.apache.org>`_.
+   Share your ideas and use cases for the project or read through the
+   `Archive <http://mail-archives.apache.org/mod_mbox/arrow-dev/>`_.
+ * Follow our activity on `JIRA <https://issues.apache.org/jira/browse/ARROW>`_
+ * Learn the `Format / Specification
+   <https://github.com/apache/arrow/tree/master/format>`_
diff --git a/src/arrow/docs/source/python/index.rst b/src/arrow/docs/source/python/index.rst
new file mode 100644
index 000000000..0ffa40545
--- /dev/null
+++ b/src/arrow/docs/source/python/index.rst
@@ -0,0 +1,62 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+PyArrow - Apache Arrow Python bindings
+======================================
+
+This is the documentation of the Python API of Apache Arrow.
+
+Apache Arrow is a development platform for in-memory analytics. 
+It contains a set of technologies that enable big data systems to store, process and move data fast. 
+
+See the :doc:`parent documentation <../index>` for additional details on
+the Arrow Project itself, on the Arrow format and the other language bindings.
+
+The Arrow Python bindings (also named "PyArrow") have first-class integration
+with NumPy, pandas, and built-in Python objects. They are based on the C++
+implementation of Arrow.
+
+Here will we detail the usage of the Python API for Arrow and the leaf
+libraries that add additional functionality such as reading Apache Parquet
+files into Arrow structures.
+
+.. toctree::
+   :maxdepth: 2
+
+   install
+   getstarted
+   data
+   compute
+   memory
+   ipc
+   filesystems
+   filesystems_deprecated
+   plasma
+   numpy
+   pandas
+   timestamps
+   csv
+   feather
+   json
+   parquet
+   dataset
+   cuda
+   extending_types
+   extending
+   api
+   getting_involved
+   benchmarks
diff --git a/src/arrow/docs/source/python/install.rst b/src/arrow/docs/source/python/install.rst
new file mode 100644
index 000000000..3c23d8a0f
--- /dev/null
+++ b/src/arrow/docs/source/python/install.rst
@@ -0,0 +1,90 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+Installing PyArrow
+==================
+
+System Compatibility
+--------------------
+
+PyArrow is regularly built and tested on Windows, macOS and various Linux
+distributions (including Ubuntu 16.04, Ubuntu 18.04).  We strongly recommend
+using a 64-bit system.
+
+Python Compatibility
+--------------------
+
+PyArrow is currently compatible with Python 3.6, 3.7, 3.8, and 3.9.
+
+Using Conda
+-----------
+
+Install the latest version of PyArrow from
+`conda-forge <https://conda-forge.org/>`_ using `Conda <https://conda.io>`_:
+
+.. code-block:: bash
+
+    conda install -c conda-forge pyarrow
+
+Using Pip
+---------
+
+Install the latest version from `PyPI <https://pypi.org/>`_ (Windows, Linux,
+and macOS):
+
+.. code-block:: bash
+
+    pip install pyarrow
+
+If you encounter any importing issues of the pip wheels on Windows, you may
+need to install the `Visual C++ Redistributable for Visual Studio 2015
+<https://www.microsoft.com/en-us/download/details.aspx?id=48145>`_.
+
+.. warning::
+   On Linux, you will need pip >= 19.0 to detect the prebuilt binary packages.
+
+Installing from source
+----------------------
+
+See :ref:`python-development`.
+
+Installing Nightly Packages
+---------------------------
+
+.. warning::
+    These packages are not official releases. Use them at your own risk.
+
+PyArrow has nightly wheels and conda packages for testing purposes.
+
+These may be suitable for downstream libraries in their continuous integration
+setup to maintain compatibility with the upcoming PyArrow features,
+deprecations and/or feature removals.
+
+Install the development version of PyArrow from `arrow-nightlies
+<https://anaconda.org/arrow-nightlies/pyarrow>`_ conda channel:
+
+.. code-block:: bash
+
+    conda install -c arrow-nightlies pyarrow
+
+Install the development version from an `alternative PyPI
+<https://gemfury.com/arrow-nightlies>`_ index:
+
+.. code-block:: bash
+
+    pip install --extra-index-url https://pypi.fury.io/arrow-nightlies/ \
+        --prefer-binary --pre pyarrow
diff --git a/src/arrow/docs/source/python/ipc.rst b/src/arrow/docs/source/python/ipc.rst
new file mode 100644
index 000000000..0ba557b64
--- /dev/null
+++ b/src/arrow/docs/source/python/ipc.rst
@@ -0,0 +1,385 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+
+.. _ipc:
+
+Streaming, Serialization, and IPC
+=================================
+
+Writing and Reading Streams
+---------------------------
+
+Arrow defines two types of binary formats for serializing record batches:
+
+* **Streaming format**: for sending an arbitrary length sequence of record
+  batches. The format must be processed from start to end, and does not support
+  random access
+
+* **File or Random Access format**: for serializing a fixed number of record
+  batches. Supports random access, and thus is very useful when used with
+  memory maps
+
+To follow this section, make sure to first read the section on :ref:`Memory and
+IO <io>`.
+
+Using streams
+~~~~~~~~~~~~~
+
+First, let's create a small record batch:
+
+.. ipython:: python
+
+   import pyarrow as pa
+
+   data = [
+       pa.array([1, 2, 3, 4]),
+       pa.array(['foo', 'bar', 'baz', None]),
+       pa.array([True, None, False, True])
+   ]
+
+   batch = pa.record_batch(data, names=['f0', 'f1', 'f2'])
+   batch.num_rows
+   batch.num_columns
+
+Now, we can begin writing a stream containing some number of these batches. For
+this we use :class:`~pyarrow.RecordBatchStreamWriter`, which can write to a
+writeable ``NativeFile`` object or a writeable Python object. For convenience,
+this one can be created with :func:`~pyarrow.ipc.new_stream`:
+
+.. ipython:: python
+
+   sink = pa.BufferOutputStream()
+   
+   with pa.ipc.new_stream(sink, batch.schema) as writer:
+      for i in range(5):
+         writer.write_batch(batch)
+
+Here we used an in-memory Arrow buffer stream (``sink``), 
+but this could have been a socket or some other IO sink.
+
+When creating the ``StreamWriter``, we pass the schema, since the schema
+(column names and types) must be the same for all of the batches sent in this
+particular stream. Now we can do:
+
+.. ipython:: python
+
+   buf = sink.getvalue()
+   buf.size
+
+Now ``buf`` contains the complete stream as an in-memory byte buffer. We can
+read such a stream with :class:`~pyarrow.RecordBatchStreamReader` or the
+convenience function ``pyarrow.ipc.open_stream``:
+
+.. ipython:: python
+
+   with pa.ipc.open_stream(buf) as reader:
+         schema = reader.schema
+         batches = [b for b in reader]
+   
+   schema
+   len(batches)
+
+We can check the returned batches are the same as the original input:
+
+.. ipython:: python
+
+   batches[0].equals(batch)
+
+An important point is that if the input source supports zero-copy reads
+(e.g. like a memory map, or ``pyarrow.BufferReader``), then the returned
+batches are also zero-copy and do not allocate any new memory on read.
+
+Writing and Reading Random Access Files
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The :class:`~pyarrow.RecordBatchFileWriter` has the same API as
+:class:`~pyarrow.RecordBatchStreamWriter`. You can create one with
+:func:`~pyarrow.ipc.new_file`:
+
+.. ipython:: python
+
+   sink = pa.BufferOutputStream()
+   
+   with pa.ipc.new_file(sink, batch.schema) as writer:
+      for i in range(10):
+         writer.write_batch(batch)
+
+   buf = sink.getvalue()
+   buf.size
+
+The difference between :class:`~pyarrow.RecordBatchFileReader` and
+:class:`~pyarrow.RecordBatchStreamReader` is that the input source must have a
+``seek`` method for random access. The stream reader only requires read
+operations. We can also use the :func:`~pyarrow.ipc.open_file` method to open a file:
+
+.. ipython:: python
+
+   with pa.ipc.open_file(buf) as reader:
+      num_record_batches = reader.num_record_batches
+      b = reader.get_batch(3)
+
+Because we have access to the entire payload, we know the number of record
+batches in the file, and can read any at random.
+
+.. ipython:: python
+
+   num_record_batches
+   b.equals(batch)
+
+Reading from Stream and File Format for pandas
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The stream and file reader classes have a special ``read_pandas`` method to
+simplify reading multiple record batches and converting them to a single
+DataFrame output:
+
+.. ipython:: python
+
+   with pa.ipc.open_file(buf) as reader:
+      df = reader.read_pandas()
+   
+   df[:5]
+
+Efficiently Writing and Reading Arrow Data
+------------------------------------------
+
+Being optimized for zero copy and memory mapped data, Arrow allows to easily
+read and write arrays consuming the minimum amount of resident memory.
+
+When writing and reading raw Arrow data, we can use the Arrow File Format
+or the Arrow Streaming Format.
+
+To dump an array to file, you can use the :meth:`~pyarrow.ipc.new_file`
+which will provide a new :class:`~pyarrow.ipc.RecordBatchFileWriter` instance
+that can be used to write batches of data to that file.
+
+For example to write an array of 10M integers, we could write it in 1000 chunks
+of 10000 entries:
+
+.. ipython:: python
+
+      BATCH_SIZE = 10000
+      NUM_BATCHES = 1000
+
+      schema = pa.schema([pa.field('nums', pa.int32())])
+
+      with pa.OSFile('bigfile.arrow', 'wb') as sink:
+         with pa.ipc.new_file(sink, schema) as writer:
+            for row in range(NUM_BATCHES):
+                  batch = pa.record_batch([pa.array(range(BATCH_SIZE), type=pa.int32())], schema)
+                  writer.write(batch)
+
+record batches support multiple columns, so in practice we always write the
+equivalent of a :class:`~pyarrow.Table`.
+
+Writing in batches is effective because we in theory need to keep in memory only
+the current batch we are writing. But when reading back, we can be even more effective
+by directly mapping the data from disk and avoid allocating any new memory on read.
+
+Under normal conditions, reading back our file will consume a few hundred megabytes
+of memory:
+
+.. ipython:: python
+
+      with pa.OSFile('bigfile.arrow', 'rb') as source:
+         loaded_array = pa.ipc.open_file(source).read_all()
+
+      print("LEN:", len(loaded_array))
+      print("RSS: {}MB".format(pa.total_allocated_bytes() >> 20))
+
+To more efficiently read big data from disk, we can memory map the file, so that
+Arrow can directly reference the data mapped from disk and avoid having to
+allocate its own memory.
+In such case the operating system will be able to page in the mapped memory
+lazily and page it out without any write back cost when under pressure,
+allowing to more easily read arrays bigger than the total memory.
+
+.. ipython:: python
+
+      with pa.memory_map('bigfile.arrow', 'rb') as source:
+         loaded_array = pa.ipc.open_file(source).read_all()
+      print("LEN:", len(loaded_array))
+      print("RSS: {}MB".format(pa.total_allocated_bytes() >> 20))
+
+.. note::
+
+   Other high level APIs like :meth:`~pyarrow.parquet.read_table` also provide a
+   ``memory_map`` option. But in those cases, the memory mapping can't help with
+   reducing resident memory consumption. See :ref:`parquet_mmap` for details.
+
+Arbitrary Object Serialization
+------------------------------
+
+.. warning::
+
+   The custom serialization functionality is deprecated in pyarrow 2.0, and
+   will be removed in a future version.
+
+   While the serialization functions in this section utilize the Arrow stream
+   protocol internally, they do not produce data that is compatible with the
+   above ``ipc.open_file`` and ``ipc.open_stream`` functions.
+
+   For arbitrary objects, you can use the standard library ``pickle``
+   functionality instead. For pyarrow objects, you can use the IPC
+   serialization format through the ``pyarrow.ipc`` module, as explained
+   above.
+
+   PyArrow serialization was originally meant to provide a higher-performance
+   alternative to ``pickle`` thanks to zero-copy semantics.  However,
+   ``pickle`` protocol 5 gained support for zero-copy using out-of-band
+   buffers, and can be used instead for similar benefits.
+
+In ``pyarrow`` we are able to serialize and deserialize many kinds of Python
+objects.  As an example, consider a dictionary containing NumPy arrays:
+
+.. ipython:: python
+
+   import numpy as np
+
+   data = {
+       i: np.random.randn(500, 500)
+       for i in range(100)
+   }
+
+We use the ``pyarrow.serialize`` function to convert this data to a byte
+buffer:
+
+.. ipython:: python
+   :okwarning:
+
+   buf = pa.serialize(data).to_buffer()
+   type(buf)
+   buf.size
+
+``pyarrow.serialize`` creates an intermediate object which can be converted to
+a buffer (the ``to_buffer`` method) or written directly to an output stream.
+
+``pyarrow.deserialize`` converts a buffer-like object back to the original
+Python object:
+
+.. ipython:: python
+   :okwarning:
+
+   restored_data = pa.deserialize(buf)
+   restored_data[0]
+
+
+Serializing Custom Data Types
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If an unrecognized data type is encountered when serializing an object,
+``pyarrow`` will fall back on using ``pickle`` for converting that type to a
+byte string. There may be a more efficient way, though.
+
+Consider a class with two members, one of which is a NumPy array:
+
+.. code-block:: python
+
+   class MyData:
+       def __init__(self, name, data):
+           self.name = name
+           self.data = data
+
+We write functions to convert this to and from a dictionary with simpler types:
+
+.. code-block:: python
+
+   def _serialize_MyData(val):
+       return {'name': val.name, 'data': val.data}
+
+   def _deserialize_MyData(data):
+       return MyData(data['name'], data['data']
+
+then, we must register these functions in a ``SerializationContext`` so that
+``MyData`` can be recognized:
+
+.. code-block:: python
+
+   context = pa.SerializationContext()
+   context.register_type(MyData, 'MyData',
+                         custom_serializer=_serialize_MyData,
+                         custom_deserializer=_deserialize_MyData)
+
+Lastly, we use this context as an additional argument to ``pyarrow.serialize``:
+
+.. code-block:: python
+
+   buf = pa.serialize(val, context=context).to_buffer()
+   restored_val = pa.deserialize(buf, context=context)
+
+The ``SerializationContext`` also has convenience methods ``serialize`` and
+``deserialize``, so these are equivalent statements:
+
+.. code-block:: python
+
+   buf = context.serialize(val).to_buffer()
+   restored_val = context.deserialize(buf)
+
+Component-based Serialization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For serializing Python objects containing some number of NumPy arrays, Arrow
+buffers, or other data types, it may be desirable to transport their serialized
+representation without having to produce an intermediate copy using the
+``to_buffer`` method. To motivate this, suppose we have a list of NumPy arrays:
+
+.. ipython:: python
+
+   import numpy as np
+   data = [np.random.randn(10, 10) for i in range(5)]
+
+The call ``pa.serialize(data)`` does not copy the memory inside each of these
+NumPy arrays. This serialized representation can be then decomposed into a
+dictionary containing a sequence of ``pyarrow.Buffer`` objects containing
+metadata for each array and references to the memory inside the arrays. To do
+this, use the ``to_components`` method:
+
+.. ipython:: python
+   :okwarning:
+
+   serialized = pa.serialize(data)
+   components = serialized.to_components()
+
+The particular details of the output of ``to_components`` are not too
+important. The objects in the ``'data'`` field are ``pyarrow.Buffer`` objects,
+which are zero-copy convertible to Python ``memoryview`` objects:
+
+.. ipython:: python
+
+   memoryview(components['data'][0])
+
+A memoryview can be converted back to a Arrow ``Buffer`` with
+``pyarrow.py_buffer``:
+
+.. ipython:: python
+
+   mv = memoryview(components['data'][0])
+   buf = pa.py_buffer(mv)
+
+An object can be reconstructed from its component-based representation using
+``deserialize_components``:
+
+.. ipython:: python
+   :okwarning:
+
+   restored_data = pa.deserialize_components(components)
+   restored_data[0]
+
+``deserialize_components`` is also available as a method on
+``SerializationContext`` objects.
diff --git a/src/arrow/docs/source/python/json.rst b/src/arrow/docs/source/python/json.rst
new file mode 100644
index 000000000..99ecbc19a
--- /dev/null
+++ b/src/arrow/docs/source/python/json.rst
@@ -0,0 +1,117 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow.json
+.. _json:
+
+Reading JSON files
+==================
+
+Arrow supports reading columnar data from line-delimited JSON files. 
+In this context, a JSON file consists of multiple JSON objects, one per line,
+representing individual data rows.  For example, this file represents
+two rows of data with four columns "a", "b", "c", "d":
+
+.. code-block:: json
+
+   {"a": 1, "b": 2.0, "c": "foo", "d": false}
+   {"a": 4, "b": -5.5, "c": null, "d": true}
+
+The features currently offered are the following:
+
+* multi-threaded or single-threaded reading
+* automatic decompression of input files (based on the filename extension,
+  such as ``my_data.json.gz``)
+* sophisticated type inference (see below)
+
+.. note::
+   Currently only the line-delimited JSON format is supported.
+
+
+Usage
+-----
+
+JSON reading functionality is available through the :mod:`pyarrow.json` module.
+In many cases, you will simply call the :func:`read_json` function
+with the file path you want to read from::
+
+   >>> from pyarrow import json
+   >>> fn = 'my_data.json'
+   >>> table = json.read_json(fn)
+   >>> table
+   pyarrow.Table
+   a: int64
+   b: double
+   c: string
+   d: bool
+   >>> table.to_pandas()
+      a    b     c      d
+   0  1  2.0   foo  False
+   1  4 -5.5  None   True
+
+
+Automatic Type Inference
+------------------------
+
+Arrow :ref:`data types <data.types>` are inferred from the JSON types and
+values of each column:
+
+* JSON null values convert to the ``null`` type, but can fall back to any
+  other type.
+* JSON booleans convert to ``bool_``.
+* JSON numbers convert to ``int64``, falling back to ``float64`` if a
+  non-integer is encountered.
+* JSON strings of the kind "YYYY-MM-DD" and "YYYY-MM-DD hh:mm:ss" convert
+  to ``timestamp[s]``, falling back to ``utf8`` if a conversion error occurs.
+* JSON arrays convert to a ``list`` type, and inference proceeds recursively
+  on the JSON arrays' values.
+* Nested JSON objects convert to a ``struct`` type, and inference proceeds
+  recursively on the JSON objects' values.
+
+Thus, reading this JSON file:
+
+.. code-block:: json
+
+   {"a": [1, 2], "b": {"c": true, "d": "1991-02-03"}}
+   {"a": [3, 4, 5], "b": {"c": false, "d": "2019-04-01"}}
+
+returns the following data::
+
+   >>> table = json.read_json("my_data.json")
+   >>> table
+   pyarrow.Table
+   a: list<item: int64>
+     child 0, item: int64
+   b: struct<c: bool, d: timestamp[s]>
+     child 0, c: bool
+     child 1, d: timestamp[s]
+   >>> table.to_pandas()
+              a                                       b
+   0     [1, 2]   {'c': True, 'd': 1991-02-03 00:00:00}
+   1  [3, 4, 5]  {'c': False, 'd': 2019-04-01 00:00:00}
+
+
+Customized parsing
+------------------
+
+To alter the default parsing settings in case of reading JSON files with an
+unusual structure, you should create a :class:`ParseOptions` instance
+and pass it to :func:`read_json`.  For example, you can pass an explicit
+:ref:`schema <data.schema>` in order to bypass automatic type inference.
+
+Similarly, you can choose performance settings by passing a
+:class:`ReadOptions` instance to :func:`read_json`.
diff --git a/src/arrow/docs/source/python/memory.rst b/src/arrow/docs/source/python/memory.rst
new file mode 100644
index 000000000..4febc668c
--- /dev/null
+++ b/src/arrow/docs/source/python/memory.rst
@@ -0,0 +1,298 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+.. highlight:: python
+
+.. _io:
+
+========================
+Memory and IO Interfaces
+========================
+
+This section will introduce you to the major concepts in PyArrow's memory
+management and IO systems:
+
+* Buffers
+* Memory pools
+* File-like and stream-like objects
+
+Referencing and Allocating Memory
+=================================
+
+pyarrow.Buffer
+--------------
+
+The :class:`Buffer` object wraps the C++ :cpp:class:`arrow::Buffer` type
+which is the primary tool for memory management in Apache Arrow in C++. It permits
+higher-level array classes to safely interact with memory which they may or may
+not own. ``arrow::Buffer`` can be zero-copy sliced to permit Buffers to cheaply
+reference other Buffers, while preserving memory lifetime and clean
+parent-child relationships.
+
+There are many implementations of ``arrow::Buffer``, but they all provide a
+standard interface: a data pointer and length. This is similar to Python's
+built-in `buffer protocol` and ``memoryview`` objects.
+
+A :class:`Buffer` can be created from any Python object implementing
+the buffer protocol by calling the :func:`py_buffer` function. Let's consider
+a bytes object:
+
+.. ipython:: python
+
+   import pyarrow as pa
+
+   data = b'abcdefghijklmnopqrstuvwxyz'
+   buf = pa.py_buffer(data)
+   buf
+   buf.size
+
+Creating a Buffer in this way does not allocate any memory; it is a zero-copy
+view on the memory exported from the ``data`` bytes object.
+
+External memory, under the form of a raw pointer and size, can also be
+referenced using the :func:`foreign_buffer` function.
+
+Buffers can be used in circumstances where a Python buffer or memoryview is
+required, and such conversions are zero-copy:
+
+.. ipython:: python
+
+   memoryview(buf)
+
+The Buffer's :meth:`~Buffer.to_pybytes` method converts the Buffer's data to a
+Python bytestring (thus making a copy of the data):
+
+.. ipython:: python
+
+   buf.to_pybytes()
+
+Memory Pools
+------------
+
+All memory allocations and deallocations (like ``malloc`` and ``free`` in C)
+are tracked in an instance of :class:`MemoryPool`. This means that we can
+then precisely track amount of memory that has been allocated:
+
+.. ipython:: python
+
+   pa.total_allocated_bytes()
+
+Let's allocate a resizable :class:`Buffer` from the default pool:
+
+.. ipython:: python
+
+   buf = pa.allocate_buffer(1024, resizable=True)
+   pa.total_allocated_bytes()
+   buf.resize(2048)
+   pa.total_allocated_bytes()
+
+The default allocator requests memory in a minimum increment of 64 bytes. If
+the buffer is garbaged-collected, all of the memory is freed:
+
+.. ipython:: python
+
+   buf = None
+   pa.total_allocated_bytes()
+
+Besides the default built-in memory pool, there may be additional memory pools
+to choose (such as `mimalloc <https://github.com/microsoft/mimalloc>`_)
+from depending on how Arrow was built.  One can get the backend
+name for a memory pool::
+
+   >>> pa.default_memory_pool().backend_name
+   'jemalloc'
+
+.. seealso::
+   :ref:`API documentation for memory pools <api.memory_pool>`.
+
+.. seealso::
+   On-GPU buffers using Arrow's optional :doc:`CUDA integration <cuda>`.
+
+
+Input and Output
+================
+
+.. _io.native_file:
+
+The Arrow C++ libraries have several abstract interfaces for different kinds of
+IO objects:
+
+* Read-only streams
+* Read-only files supporting random access
+* Write-only streams
+* Write-only files supporting random access
+* File supporting reads, writes, and random access
+
+In the interest of making these objects behave more like Python's built-in
+``file`` objects, we have defined a :class:`~pyarrow.NativeFile` base class
+which implements the same API as regular Python file objects.
+
+:class:`~pyarrow.NativeFile` has some important features which make it
+preferable to using Python files with PyArrow where possible:
+
+* Other Arrow classes can access the internal C++ IO objects natively, and do
+  not need to acquire the Python GIL
+* Native C++ IO may be able to do zero-copy IO, such as with memory maps
+
+There are several kinds of :class:`~pyarrow.NativeFile` options available:
+
+* :class:`~pyarrow.OSFile`, a native file that uses your operating system's
+  file descriptors
+* :class:`~pyarrow.MemoryMappedFile`, for reading (zero-copy) and writing with
+  memory maps
+* :class:`~pyarrow.BufferReader`, for reading :class:`~pyarrow.Buffer` objects
+  as a file
+* :class:`~pyarrow.BufferOutputStream`, for writing data in-memory, producing a
+  Buffer at the end
+* :class:`~pyarrow.FixedSizeBufferWriter`, for writing data into an already
+  allocated Buffer
+* :class:`~pyarrow.HdfsFile`, for reading and writing data to the Hadoop Filesystem
+* :class:`~pyarrow.PythonFile`, for interfacing with Python file objects in C++
+* :class:`~pyarrow.CompressedInputStream` and
+  :class:`~pyarrow.CompressedOutputStream`, for on-the-fly compression or
+  decompression to/from another stream
+
+There are also high-level APIs to make instantiating common kinds of streams
+easier.
+
+High-Level API
+--------------
+
+Input Streams
+~~~~~~~~~~~~~
+
+The :func:`~pyarrow.input_stream` function allows creating a readable
+:class:`~pyarrow.NativeFile` from various kinds of sources.
+
+* If passed a :class:`~pyarrow.Buffer` or a ``memoryview`` object, a
+  :class:`~pyarrow.BufferReader` will be returned:
+
+   .. ipython:: python
+
+      buf = memoryview(b"some data")
+      stream = pa.input_stream(buf)
+      stream.read(4)
+
+* If passed a string or file path, it will open the given file on disk
+  for reading, creating a :class:`~pyarrow.OSFile`.  Optionally, the file
+  can be compressed: if its filename ends with a recognized extension
+  such as ``.gz``, its contents will automatically be decompressed on
+  reading.
+
+  .. ipython:: python
+
+     import gzip
+     with gzip.open('example.gz', 'wb') as f:
+         f.write(b'some data\n' * 3)
+
+     stream = pa.input_stream('example.gz')
+     stream.read()
+
+* If passed a Python file object, it will wrapped in a :class:`PythonFile`
+  such that the Arrow C++ libraries can read data from it (at the expense
+  of a slight overhead).
+
+Output Streams
+~~~~~~~~~~~~~~
+
+:func:`~pyarrow.output_stream` is the equivalent function for output streams
+and allows creating a writable :class:`~pyarrow.NativeFile`.  It has the same
+features as explained above for :func:`~pyarrow.input_stream`, such as being
+able to write to buffers or do on-the-fly compression.
+
+.. ipython:: python
+
+   with pa.output_stream('example1.dat') as stream:
+       stream.write(b'some data')
+
+   f = open('example1.dat', 'rb')
+   f.read()
+
+
+On-Disk and Memory Mapped Files
+-------------------------------
+
+PyArrow includes two ways to interact with data on disk: standard operating
+system-level file APIs, and memory-mapped files. In regular Python we can
+write:
+
+.. ipython:: python
+
+   with open('example2.dat', 'wb') as f:
+       f.write(b'some example data')
+
+Using pyarrow's :class:`~pyarrow.OSFile` class, you can write:
+
+.. ipython:: python
+
+   with pa.OSFile('example3.dat', 'wb') as f:
+       f.write(b'some example data')
+
+For reading files, you can use :class:`~pyarrow.OSFile` or
+:class:`~pyarrow.MemoryMappedFile`. The difference between these is that
+:class:`~pyarrow.OSFile` allocates new memory on each read, like Python file
+objects. In reads from memory maps, the library constructs a buffer referencing
+the mapped memory without any memory allocation or copying:
+
+.. ipython:: python
+
+   file_obj = pa.OSFile('example2.dat')
+   mmap = pa.memory_map('example3.dat')
+   file_obj.read(4)
+   mmap.read(4)
+
+The ``read`` method implements the standard Python file ``read`` API. To read
+into Arrow Buffer objects, use ``read_buffer``:
+
+.. ipython:: python
+
+   mmap.seek(0)
+   buf = mmap.read_buffer(4)
+   print(buf)
+   buf.to_pybytes()
+
+Many tools in PyArrow, particular the Apache Parquet interface and the file and
+stream messaging tools, are more efficient when used with these ``NativeFile``
+types than with normal Python file objects.
+
+.. ipython:: python
+   :suppress:
+
+   buf = mmap = file_obj = None
+   !rm example.dat
+   !rm example2.dat
+
+In-Memory Reading and Writing
+-----------------------------
+
+To assist with serialization and deserialization of in-memory data, we have
+file interfaces that can read and write to Arrow Buffers.
+
+.. ipython:: python
+
+   writer = pa.BufferOutputStream()
+   writer.write(b'hello, friends')
+
+   buf = writer.getvalue()
+   buf
+   buf.size
+   reader = pa.BufferReader(buf)
+   reader.seek(7)
+   reader.read(7)
+
+These have similar semantics to Python's built-in ``io.BytesIO``.
diff --git a/src/arrow/docs/source/python/numpy.rst b/src/arrow/docs/source/python/numpy.rst
new file mode 100644
index 000000000..870f9cb73
--- /dev/null
+++ b/src/arrow/docs/source/python/numpy.rst
@@ -0,0 +1,75 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _numpy_interop:
+
+NumPy Integration
+=================
+
+PyArrow allows converting back and forth from
+`NumPy <https://www.numpy.org/>`_ arrays to Arrow :ref:`Arrays <data.array>`.
+
+NumPy to Arrow
+--------------
+
+To convert a NumPy array to Arrow, one can simply call the :func:`pyarrow.array`
+factory function.
+
+.. code-block:: pycon
+
+   >>> import numpy as np
+   >>> import pyarrow as pa
+   >>> data = np.arange(10, dtype='int16')
+   >>> arr = pa.array(data)
+   >>> arr
+   <pyarrow.lib.Int16Array object at 0x7fb1d1e6ae58>
+   [
+     0,
+     1,
+     2,
+     3,
+     4,
+     5,
+     6,
+     7,
+     8,
+     9
+   ]
+
+Converting from NumPy supports a wide range of input dtypes, including
+structured dtypes or strings.
+
+Arrow to NumPy
+--------------
+
+In the reverse direction, it is possible to produce a view of an Arrow Array
+for use with NumPy using the :meth:`~pyarrow.Array.to_numpy` method.
+This is limited to primitive types for which NumPy has the same physical
+representation as Arrow, and assuming the Arrow data has no nulls.
+
+.. code-block:: pycon
+
+   >>> import numpy as np
+   >>> import pyarrow as pa
+   >>> arr = pa.array([4, 5, 6], type=pa.int32())
+   >>> view = arr.to_numpy()
+   >>> view
+   array([4, 5, 6], dtype=int32)
+
+For more complex data types, you have to use the :meth:`~pyarrow.Array.to_pandas`
+method (which will construct a Numpy array with Pandas semantics for, e.g.,
+representation of null values).
diff --git a/src/arrow/docs/source/python/pandas.rst b/src/arrow/docs/source/python/pandas.rst
new file mode 100644
index 000000000..aa030cfff
--- /dev/null
+++ b/src/arrow/docs/source/python/pandas.rst
@@ -0,0 +1,480 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. _pandas_interop:
+
+Pandas Integration
+==================
+
+To interface with `pandas <https://pandas.pydata.org/>`_, PyArrow provides
+various conversion routines to consume pandas structures and convert back
+to them.
+
+.. note::
+   While pandas uses NumPy as a backend, it has enough peculiarities
+   (such as a different type system, and support for null values) that this
+   is a separate topic from :ref:`numpy_interop`.
+
+To follow examples in this document, make sure to run:
+
+.. ipython:: python
+
+   import pandas as pd
+   import pyarrow as pa
+
+DataFrames
+----------
+
+The equivalent to a pandas DataFrame in Arrow is a :ref:`Table <data.table>`.
+Both consist of a set of named columns of equal length. While pandas only
+supports flat columns, the Table also provides nested columns, thus it can
+represent more data than a DataFrame, so a full conversion is not always possible.
+
+Conversion from a Table to a DataFrame is done by calling
+:meth:`pyarrow.Table.to_pandas`. The inverse is then achieved by using
+:meth:`pyarrow.Table.from_pandas`.
+
+.. code-block:: python
+
+    import pyarrow as pa
+    import pandas as pd
+
+    df = pd.DataFrame({"a": [1, 2, 3]})
+    # Convert from pandas to Arrow
+    table = pa.Table.from_pandas(df)
+    # Convert back to pandas
+    df_new = table.to_pandas()
+
+    # Infer Arrow schema from pandas
+    schema = pa.Schema.from_pandas(df)
+
+By default ``pyarrow`` tries to preserve and restore the ``.index``
+data as accurately as possible. See the section below for more about
+this, and how to disable this logic.
+
+Series
+------
+
+In Arrow, the most similar structure to a pandas Series is an Array.
+It is a vector that contains data of the same type as linear memory. You can
+convert a pandas Series to an Arrow Array using :meth:`pyarrow.Array.from_pandas`.
+As Arrow Arrays are always nullable, you can supply an optional mask using
+the ``mask`` parameter to mark all null-entries.
+
+Handling pandas Indexes
+-----------------------
+
+Methods like :meth:`pyarrow.Table.from_pandas` have a
+``preserve_index`` option which defines how to preserve (store) or not
+to preserve (to not store) the data in the ``index`` member of the
+corresponding pandas object. This data is tracked using schema-level
+metadata in the internal ``arrow::Schema`` object.
+
+The default of ``preserve_index`` is ``None``, which behaves as
+follows:
+
+* ``RangeIndex`` is stored as metadata-only, not requiring any extra
+  storage.
+* Other index types are stored as one or more physical data columns in
+  the resulting :class:`Table`
+
+To not store the index at all pass ``preserve_index=False``. Since
+storing a ``RangeIndex`` can cause issues in some limited scenarios
+(such as storing multiple DataFrame objects in a Parquet file), to
+force all index data to be serialized in the resulting table, pass
+``preserve_index=True``.
+
+Type differences
+----------------
+
+With the current design of pandas and Arrow, it is not possible to convert all
+column types unmodified. One of the main issues here is that pandas has no
+support for nullable columns of arbitrary type. Also ``datetime64`` is currently
+fixed to nanosecond resolution. On the other side, Arrow might be still missing
+support for some types.
+
+pandas -> Arrow Conversion
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
++------------------------+--------------------------+
+| Source Type (pandas)   | Destination Type (Arrow) |
++========================+==========================+
+| ``bool``               | ``BOOL``                 |
++------------------------+--------------------------+
+| ``(u)int{8,16,32,64}`` | ``(U)INT{8,16,32,64}``   |
++------------------------+--------------------------+
+| ``float32``            | ``FLOAT``                |
++------------------------+--------------------------+
+| ``float64``            | ``DOUBLE``               |
++------------------------+--------------------------+
+| ``str`` / ``unicode``  | ``STRING``               |
++------------------------+--------------------------+
+| ``pd.Categorical``     | ``DICTIONARY``           |
++------------------------+--------------------------+
+| ``pd.Timestamp``       | ``TIMESTAMP(unit=ns)``   |
++------------------------+--------------------------+
+| ``datetime.date``      | ``DATE``                 |
++------------------------+--------------------------+
+
+Arrow -> pandas Conversion
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
++-------------------------------------+--------------------------------------------------------+
+| Source Type (Arrow)                 | Destination Type (pandas)                              |
++=====================================+========================================================+
+| ``BOOL``                            | ``bool``                                               |
++-------------------------------------+--------------------------------------------------------+
+| ``BOOL`` *with nulls*               | ``object`` (with values ``True``, ``False``, ``None``) |
++-------------------------------------+--------------------------------------------------------+
+| ``(U)INT{8,16,32,64}``              | ``(u)int{8,16,32,64}``                                 |
++-------------------------------------+--------------------------------------------------------+
+| ``(U)INT{8,16,32,64}`` *with nulls* | ``float64``                                            |
++-------------------------------------+--------------------------------------------------------+
+| ``FLOAT``                           | ``float32``                                            |
++-------------------------------------+--------------------------------------------------------+
+| ``DOUBLE``                          | ``float64``                                            |
++-------------------------------------+--------------------------------------------------------+
+| ``STRING``                          | ``str``                                                |
++-------------------------------------+--------------------------------------------------------+
+| ``DICTIONARY``                      | ``pd.Categorical``                                     |
++-------------------------------------+--------------------------------------------------------+
+| ``TIMESTAMP(unit=*)``               | ``pd.Timestamp`` (``np.datetime64[ns]``)               |
++-------------------------------------+--------------------------------------------------------+
+| ``DATE``                            | ``object``(with ``datetime.date`` objects)             |
++-------------------------------------+--------------------------------------------------------+
+
+Categorical types
+~~~~~~~~~~~~~~~~~
+
+`Pandas categorical <https://pandas.pydata.org/pandas-docs/stable/user_guide/categorical.html>`_
+columns are converted to :ref:`Arrow dictionary arrays <data.dictionary>`,
+a special array type optimized to handle repeated and limited
+number of possible values.
+
+.. ipython:: python
+
+   df = pd.DataFrame({"cat": pd.Categorical(["a", "b", "c", "a", "b", "c"])})
+   df.cat.dtype.categories
+   df
+
+   table = pa.Table.from_pandas(df)
+   table
+
+We can inspect the :class:`~.ChunkedArray` of the created table and see the
+same categories of the Pandas DataFrame.
+
+.. ipython:: python
+
+   column = table[0]
+   chunk = column.chunk(0)
+   chunk.dictionary
+   chunk.indices
+
+Datetime (Timestamp) types
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+`Pandas Timestamps <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html>`_
+use the ``datetime64[ns]`` type in Pandas and are converted to an Arrow
+:class:`~.TimestampArray`.
+
+.. ipython:: python
+
+   df = pd.DataFrame({"datetime": pd.date_range("2020-01-01T00:00:00Z", freq="H", periods=3)})
+   df.dtypes
+   df
+
+   table = pa.Table.from_pandas(df)
+   table
+
+In this example the Pandas Timestamp is time zone aware
+(``UTC`` on this case), and this information is used to create the Arrow
+:class:`~.TimestampArray`.
+
+Date types
+~~~~~~~~~~
+
+While dates can be handled using the ``datetime64[ns]`` type in
+pandas, some systems work with object arrays of Python's built-in
+``datetime.date`` object:
+
+.. ipython:: python
+
+   from datetime import date
+   s = pd.Series([date(2018, 12, 31), None, date(2000, 1, 1)])
+   s
+
+When converting to an Arrow array, the ``date32`` type will be used by
+default:
+
+.. ipython:: python
+
+   arr = pa.array(s)
+   arr.type
+   arr[0]
+
+To use the 64-bit ``date64``, specify this explicitly:
+
+.. ipython:: python
+
+   arr = pa.array(s, type='date64')
+   arr.type
+
+When converting back with ``to_pandas``, object arrays of
+``datetime.date`` objects are returned:
+
+.. ipython:: python
+
+   arr.to_pandas()
+
+If you want to use NumPy's ``datetime64`` dtype instead, pass
+``date_as_object=False``:
+
+.. ipython:: python
+
+   s2 = pd.Series(arr.to_pandas(date_as_object=False))
+   s2.dtype
+
+.. warning::
+
+   As of Arrow ``0.13`` the parameter ``date_as_object`` is ``True``
+   by default. Older versions must pass ``date_as_object=True`` to
+   obtain this behavior
+
+Time types
+~~~~~~~~~~
+
+The builtin ``datetime.time`` objects inside Pandas data structures will be
+converted to an Arrow ``time64`` and :class:`~.Time64Array` respectively.
+
+.. ipython:: python
+
+   from datetime import time
+   s = pd.Series([time(1, 1, 1), time(2, 2, 2)])
+   s
+
+   arr = pa.array(s)
+   arr.type
+   arr
+
+When converting to pandas, arrays of ``datetime.time`` objects are returned:
+
+.. ipython:: python
+
+   arr.to_pandas()
+
+Nullable types
+--------------
+
+In Arrow all data types are nullable, meaning they support storing missing
+values. In pandas, however, not all data types have support for missing data.
+Most notably, the default integer data types do not, and will get casted
+to float when missing values are introduced. Therefore, when an Arrow array
+or table gets converted to pandas, integer columns will become float when
+missing values are present:
+
+.. code-block:: python
+
+   >>> arr = pa.array([1, 2, None])
+   >>> arr
+   <pyarrow.lib.Int64Array object at 0x7f07d467c640>
+   [
+     1,
+     2,
+     null
+   ]
+   >>> arr.to_pandas()
+   0    1.0
+   1    2.0
+   2    NaN
+   dtype: float64
+
+Pandas has experimental nullable data types
+(https://pandas.pydata.org/docs/user_guide/integer_na.html). Arrows supports
+round trip conversion for those:
+
+.. code-block:: python
+
+   >>> df = pd.DataFrame({'a': pd.Series([1, 2, None], dtype="Int64")})
+   >>> df
+         a
+   0     1
+   1     2
+   2  <NA>
+
+   >>> table = pa.table(df)
+   >>> table
+   Out[32]:
+   pyarrow.Table
+   a: int64
+   ----
+   a: [[1,2,null]]
+
+   >>> table.to_pandas()
+         a
+   0     1
+   1     2
+   2  <NA>
+
+   >>> table.to_pandas().dtypes
+   a    Int64
+   dtype: object
+
+This roundtrip conversion works because metadata about the original pandas
+DataFrame gets stored in the Arrow table. However, if you have Arrow data (or
+e.g. a Parquet file) not originating from a pandas DataFrame with nullable
+data types, the default conversion to pandas will not use those nullable
+dtypes.
+
+The :meth:`pyarrow.Table.to_pandas` method has a ``types_mapper`` keyword
+that can be used to override the default data type used for the resulting
+pandas DataFrame. This way, you can instruct Arrow to create a pandas
+DataFrame using nullable dtypes.
+
+.. code-block:: python
+
+   >>> table = pa.table({"a": [1, 2, None]})
+   >>> table.to_pandas()
+        a
+   0  1.0
+   1  2.0
+   2  NaN
+   >>> table.to_pandas(types_mapper={pa.int64(): pd.Int64Dtype()}.get)
+         a
+   0     1
+   1     2
+   2  <NA>
+
+The ``types_mapper`` keyword expects a function that will return the pandas
+data type to use given a pyarrow data type. By using the ``dict.get`` method,
+we can create such a function using a dictionary.
+
+If you want to use all currently supported nullable dtypes by pandas, this
+dictionary becomes:
+
+.. code-block:: python
+
+   dtype_mapping = {
+       pa.int8(): pd.Int8Dtype(),
+       pa.int16(): pd.Int16Dtype(),
+       pa.int32(): pd.Int32Dtype(),
+       pa.int64(): pd.Int64Dtype(),
+       pa.uint8(): pd.UInt8Dtype(),
+       pa.uint16(): pd.UInt16Dtype(),
+       pa.uint32(): pd.UInt32Dtype(),
+       pa.uint64(): pd.UInt64Dtype(),
+       pa.bool_(): pd.BooleanDtype(),
+       pa.float32(): pd.Float32Dtype(),
+       pa.float64(): pd.Float64Dtype(),
+       pa.string(): pd.StringDtype(),
+   }
+
+   df = table.to_pandas(types_mapper=dtype_mapping.get)
+
+
+When using the pandas API for reading Parquet files (``pd.read_parquet(..)``),
+this can also be achieved by passing ``use_nullable_dtypes``:
+
+.. code-block:: python
+
+   df = pd.read_parquet(path, use_nullable_dtypes=True)
+
+
+Memory Usage and Zero Copy
+--------------------------
+
+When converting from Arrow data structures to pandas objects using various
+``to_pandas`` methods, one must occasionally be mindful of issues related to
+performance and memory usage.
+
+Since pandas's internal data representation is generally different from the
+Arrow columnar format, zero copy conversions (where no memory allocation or
+computation is required) are only possible in certain limited cases.
+
+In the worst case scenario, calling ``to_pandas`` will result in two versions
+of the data in memory, one for Arrow and one for pandas, yielding approximately
+twice the memory footprint. We have implement some mitigations for this case,
+particularly when creating large ``DataFrame`` objects, that we describe below.
+
+Zero Copy Series Conversions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Zero copy conversions from ``Array`` or ``ChunkedArray`` to NumPy arrays or
+pandas Series are possible in certain narrow cases:
+
+* The Arrow data is stored in an integer (signed or unsigned ``int8`` through
+  ``int64``) or floating point type (``float16`` through ``float64``). This
+  includes many numeric types as well as timestamps.
+* The Arrow data has no null values (since these are represented using bitmaps
+  which are not supported by pandas).
+* For ``ChunkedArray``, the data consists of a single chunk,
+  i.e. ``arr.num_chunks == 1``. Multiple chunks will always require a copy
+  because of pandas's contiguousness requirement.
+
+In these scenarios, ``to_pandas`` or ``to_numpy`` will be zero copy. In all
+other scenarios, a copy will be required.
+
+Reducing Memory Use in ``Table.to_pandas``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As of this writing, pandas applies a data management strategy called
+"consolidation" to collect like-typed DataFrame columns in two-dimensional
+NumPy arrays, referred to internally as "blocks". We have gone to great effort
+to construct the precise "consolidated" blocks so that pandas will not perform
+any further allocation or copies after we hand off the data to
+``pandas.DataFrame``. The obvious downside of this consolidation strategy is
+that it forces a "memory doubling".
+
+To try to limit the potential effects of "memory doubling" during
+``Table.to_pandas``, we provide a couple of options:
+
+* ``split_blocks=True``, when enabled ``Table.to_pandas`` produces one internal
+  DataFrame "block" for each column, skipping the "consolidation" step. Note
+  that many pandas operations will trigger consolidation anyway, but the peak
+  memory use may be less than the worst case scenario of a full memory
+  doubling. As a result of this option, we are able to do zero copy conversions
+  of columns in the same cases where we can do zero copy with ``Array`` and
+  ``ChunkedArray``.
+* ``self_destruct=True``, this destroys the internal Arrow memory buffers in
+  each column ``Table`` object as they are converted to the pandas-compatible
+  representation, potentially releasing memory to the operating system as soon
+  as a column is converted. Note that this renders the calling ``Table`` object
+  unsafe for further use, and any further methods called will cause your Python
+  process to crash.
+
+Used together, the call
+
+.. code-block:: python
+
+   df = table.to_pandas(split_blocks=True, self_destruct=True)
+   del table  # not necessary, but a good practice
+
+will yield significantly lower memory usage in some scenarios. Without these
+options, ``to_pandas`` will always double memory.
+
+Note that ``self_destruct=True`` is not guaranteed to save memory. Since the
+conversion happens column by column, memory is also freed column by column. But
+if multiple columns share an underlying buffer, then no memory will be freed
+until all of those columns are converted. In particular, due to implementation
+details, data that comes from IPC or Flight is prone to this, as memory will be
+laid out as follows::
+
+  Record Batch 0: Allocation 0: array 0 chunk 0, array 1 chunk 0, ...
+  Record Batch 1: Allocation 1: array 0 chunk 1, array 1 chunk 1, ...
+  ...
+
+In this case, no memory can be freed until the entire table is converted, even
+with ``self_destruct=True``.
diff --git a/src/arrow/docs/source/python/parquet.rst b/src/arrow/docs/source/python/parquet.rst
new file mode 100644
index 000000000..82461ec5d
--- /dev/null
+++ b/src/arrow/docs/source/python/parquet.rst
@@ -0,0 +1,597 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+.. _parquet:
+
+Reading and Writing the Apache Parquet Format
+=============================================
+
+The `Apache Parquet <http://parquet.apache.org/>`_ project provides a
+standardized open-source columnar storage format for use in data analysis
+systems. It was created originally for use in `Apache Hadoop
+<http://hadoop.apache.org/>`_ with systems like `Apache Drill
+<http://drill.apache.org>`_, `Apache Hive <http://hive.apache.org>`_, `Apache
+Impala (incubating) <http://impala.apache.org>`_, and `Apache Spark
+<http://spark.apache.org>`_ adopting it as a shared standard for high
+performance data IO.
+
+Apache Arrow is an ideal in-memory transport layer for data that is being read
+or written with Parquet files. We have been concurrently developing the `C++
+implementation of Apache Parquet <http://github.com/apache/parquet-cpp>`_,
+which includes a native, multithreaded C++ adapter to and from in-memory Arrow
+data. PyArrow includes Python bindings to this code, which thus enables reading
+and writing Parquet files with pandas as well.
+
+Obtaining pyarrow with Parquet Support
+--------------------------------------
+
+If you installed ``pyarrow`` with pip or conda, it should be built with Parquet
+support bundled:
+
+.. ipython:: python
+
+   import pyarrow.parquet as pq
+
+If you are building ``pyarrow`` from source, you must use
+``-DARROW_PARQUET=ON`` when compiling the C++ libraries and enable the Parquet
+extensions when building ``pyarrow``. See the :ref:`Python Development
+<python-development>` page for more details.
+
+Reading and Writing Single Files
+--------------------------------
+
+The functions :func:`~.parquet.read_table` and :func:`~.parquet.write_table`
+read and write the :ref:`pyarrow.Table <data.table>` object, respectively.
+
+Let's look at a simple table:
+
+.. ipython:: python
+
+   import numpy as np
+   import pandas as pd
+   import pyarrow as pa
+
+   df = pd.DataFrame({'one': [-1, np.nan, 2.5],
+                      'two': ['foo', 'bar', 'baz'],
+                      'three': [True, False, True]},
+                      index=list('abc'))
+   table = pa.Table.from_pandas(df)
+
+We write this to Parquet format with ``write_table``:
+
+.. ipython:: python
+
+   import pyarrow.parquet as pq
+   pq.write_table(table, 'example.parquet')
+
+This creates a single Parquet file. In practice, a Parquet dataset may consist
+of many files in many directories. We can read a single file back with
+``read_table``:
+
+.. ipython:: python
+
+   table2 = pq.read_table('example.parquet')
+   table2.to_pandas()
+
+You can pass a subset of columns to read, which can be much faster than reading
+the whole file (due to the columnar layout):
+
+.. ipython:: python
+
+   pq.read_table('example.parquet', columns=['one', 'three'])
+
+When reading a subset of columns from a file that used a Pandas dataframe as the
+source, we use ``read_pandas`` to maintain any additional index column data:
+
+.. ipython:: python
+
+   pq.read_pandas('example.parquet', columns=['two']).to_pandas()
+
+We need not use a string to specify the origin of the file. It can be any of:
+
+* A file path as a string
+* A :ref:`NativeFile <io.native_file>` from PyArrow
+* A Python file object
+
+In general, a Python file object will have the worst read performance, while a
+string file path or an instance of :class:`~.NativeFile` (especially memory
+maps) will perform the best.
+
+.. _parquet_mmap:
+
+Reading Parquet and Memory Mapping
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Because Parquet data needs to be decoded from the Parquet format 
+and compression, it can't be directly mapped from disk.
+Thus the ``memory_map`` option might perform better on some systems
+but won't help much with resident memory consumption.
+
+.. code-block:: python
+
+      >>> pq_array = pa.parquet.read_table("area1.parquet", memory_map=True)
+      >>> print("RSS: {}MB".format(pa.total_allocated_bytes() >> 20))
+      RSS: 4299MB
+
+      >>> pq_array = pa.parquet.read_table("area1.parquet", memory_map=False)
+      >>> print("RSS: {}MB".format(pa.total_allocated_bytes() >> 20))
+      RSS: 4299MB   
+
+If you need to deal with Parquet data bigger than memory, 
+the :ref:`dataset` and partitioning is probably what you are looking for.
+
+Parquet file writing options
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+:func:`~pyarrow.parquet.write_table()` has a number of options to
+control various settings when writing a Parquet file.
+
+* ``version``, the Parquet format version to use.  ``'1.0'`` ensures
+  compatibility with older readers, while ``'2.4'`` and greater values
+  enable more Parquet types and encodings.
+* ``data_page_size``, to control the approximate size of encoded data
+  pages within a column chunk. This currently defaults to 1MB.
+* ``flavor``, to set compatibility options particular to a Parquet
+  consumer like ``'spark'`` for Apache Spark.
+
+See the :func:`~pyarrow.parquet.write_table()` docstring for more details.
+
+There are some additional data type handling-specific options
+described below.
+
+Omitting the DataFrame index
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When using ``pa.Table.from_pandas`` to convert to an Arrow table, by default
+one or more special columns are added to keep track of the index (row
+labels). Storing the index takes extra space, so if your index is not valuable,
+you may choose to omit it by passing ``preserve_index=False``
+
+.. ipython:: python
+
+   df = pd.DataFrame({'one': [-1, np.nan, 2.5],
+                      'two': ['foo', 'bar', 'baz'],
+                      'three': [True, False, True]},
+                      index=list('abc'))
+   df
+   table = pa.Table.from_pandas(df, preserve_index=False)
+
+Then we have:
+
+.. ipython:: python
+
+   pq.write_table(table, 'example_noindex.parquet')
+   t = pq.read_table('example_noindex.parquet')
+   t.to_pandas()
+
+Here you see the index did not survive the round trip.
+
+Finer-grained Reading and Writing
+---------------------------------
+
+``read_table`` uses the :class:`~.ParquetFile` class, which has other features:
+
+.. ipython:: python
+
+   parquet_file = pq.ParquetFile('example.parquet')
+   parquet_file.metadata
+   parquet_file.schema
+
+As you can learn more in the `Apache Parquet format
+<https://github.com/apache/parquet-format>`_, a Parquet file consists of
+multiple row groups. ``read_table`` will read all of the row groups and
+concatenate them into a single table. You can read individual row groups with
+``read_row_group``:
+
+.. ipython:: python
+
+   parquet_file.num_row_groups
+   parquet_file.read_row_group(0)
+
+We can similarly write a Parquet file with multiple row groups by using
+``ParquetWriter``:
+
+.. ipython:: python
+
+   with pq.ParquetWriter('example2.parquet', table.schema) as writer:
+      for i in range(3):
+         writer.write_table(table)
+
+   pf2 = pq.ParquetFile('example2.parquet')
+   pf2.num_row_groups
+
+Inspecting the Parquet File Metadata
+------------------------------------
+
+The ``FileMetaData`` of a Parquet file can be accessed through
+:class:`~.ParquetFile` as shown above:
+
+.. ipython:: python
+
+   parquet_file = pq.ParquetFile('example.parquet')
+   metadata = parquet_file.metadata
+
+or can also be read directly using :func:`~parquet.read_metadata`:
+
+.. ipython:: python
+
+   metadata = pq.read_metadata('example.parquet')
+   metadata
+
+The returned ``FileMetaData`` object allows to inspect the
+`Parquet file metadata <https://github.com/apache/parquet-format#metadata>`__,
+such as the row groups and column chunk metadata and statistics:
+
+.. ipython:: python
+
+   metadata.row_group(0)
+   metadata.row_group(0).column(0)
+
+.. ipython:: python
+   :suppress:
+
+   !rm example.parquet
+   !rm example_noindex.parquet
+   !rm example2.parquet
+   !rm example3.parquet
+
+Data Type Handling
+------------------
+
+Reading types as DictionaryArray
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``read_dictionary`` option in ``read_table`` and ``ParquetDataset`` will
+cause columns to be read as ``DictionaryArray``, which will become
+``pandas.Categorical`` when converted to pandas. This option is only valid for
+string and binary column types, and it can yield significantly lower memory use
+and improved performance for columns with many repeated string values.
+
+.. code-block:: python
+
+   pq.read_table(table, where, read_dictionary=['binary_c0', 'stringb_c2'])
+
+Storing timestamps
+~~~~~~~~~~~~~~~~~~
+
+Some Parquet readers may only support timestamps stored in millisecond
+(``'ms'``) or microsecond (``'us'``) resolution. Since pandas uses nanoseconds
+to represent timestamps, this can occasionally be a nuisance. By default
+(when writing version 1.0 Parquet files), the nanoseconds will be cast to
+microseconds ('us').
+
+In addition, We provide the ``coerce_timestamps`` option to allow you to select
+the desired resolution:
+
+.. code-block:: python
+
+   pq.write_table(table, where, coerce_timestamps='ms')
+
+If a cast to a lower resolution value may result in a loss of data, by default
+an exception will be raised. This can be suppressed by passing
+``allow_truncated_timestamps=True``:
+
+.. code-block:: python
+
+   pq.write_table(table, where, coerce_timestamps='ms',
+                  allow_truncated_timestamps=True)
+
+Timestamps with nanoseconds can be stored without casting when using the
+more recent Parquet format version 2.6:
+
+.. code-block:: python
+
+   pq.write_table(table, where, version='2.6')
+
+However, many Parquet readers do not yet support this newer format version, and
+therefore the default is to write version 1.0 files. When compatibility across
+different processing frameworks is required, it is recommended to use the
+default version 1.0.
+
+Older Parquet implementations use ``INT96`` based storage of
+timestamps, but this is now deprecated. This includes some older
+versions of Apache Impala and Apache Spark. To write timestamps in
+this format, set the ``use_deprecated_int96_timestamps`` option to
+``True`` in ``write_table``.
+
+.. code-block:: python
+
+   pq.write_table(table, where, use_deprecated_int96_timestamps=True)
+
+Compression, Encoding, and File Compatibility
+---------------------------------------------
+
+The most commonly used Parquet implementations use dictionary encoding when
+writing files; if the dictionaries grow too large, then they "fall back" to
+plain encoding. Whether dictionary encoding is used can be toggled using the
+``use_dictionary`` option:
+
+.. code-block:: python
+
+   pq.write_table(table, where, use_dictionary=False)
+
+The data pages within a column in a row group can be compressed after the
+encoding passes (dictionary, RLE encoding). In PyArrow we use Snappy
+compression by default, but Brotli, Gzip, and uncompressed are also supported:
+
+.. code-block:: python
+
+   pq.write_table(table, where, compression='snappy')
+   pq.write_table(table, where, compression='gzip')
+   pq.write_table(table, where, compression='brotli')
+   pq.write_table(table, where, compression='none')
+
+Snappy generally results in better performance, while Gzip may yield smaller
+files.
+
+These settings can also be set on a per-column basis:
+
+.. code-block:: python
+
+   pq.write_table(table, where, compression={'foo': 'snappy', 'bar': 'gzip'},
+                  use_dictionary=['foo', 'bar'])
+
+Partitioned Datasets (Multiple Files)
+------------------------------------------------
+
+Multiple Parquet files constitute a Parquet *dataset*. These may present in a
+number of ways:
+
+* A list of Parquet absolute file paths
+* A directory name containing nested directories defining a partitioned dataset
+
+A dataset partitioned by year and month may look like on disk:
+
+.. code-block:: text
+
+   dataset_name/
+     year=2007/
+       month=01/
+          0.parq
+          1.parq
+          ...
+       month=02/
+          0.parq
+          1.parq
+          ...
+       month=03/
+       ...
+     year=2008/
+       month=01/
+       ...
+     ...
+
+Writing to Partitioned Datasets
+-------------------------------
+
+You can write a partitioned dataset for any ``pyarrow`` file system that is a
+file-store (e.g. local, HDFS, S3). The default behaviour when no filesystem is
+added is to use the local filesystem.
+
+.. code-block:: python
+
+   # Local dataset write
+   pq.write_to_dataset(table, root_path='dataset_name',
+                       partition_cols=['one', 'two'])
+
+The root path in this case specifies the parent directory to which data will be
+saved. The partition columns are the column names by which to partition the
+dataset. Columns are partitioned in the order they are given. The partition
+splits are determined by the unique values in the partition columns.
+
+To use another filesystem you only need to add the filesystem parameter, the
+individual table writes are wrapped using ``with`` statements so the
+``pq.write_to_dataset`` function does not need to be.
+
+.. code-block:: python
+
+   # Remote file-system example
+   from pyarrow.fs import HadoopFileSystem
+   fs = HadoopFileSystem(host, port, user=user, kerb_ticket=ticket_cache_path)
+   pq.write_to_dataset(table, root_path='dataset_name',
+                       partition_cols=['one', 'two'], filesystem=fs)
+
+Compatibility Note: if using ``pq.write_to_dataset`` to create a table that
+will then be used by HIVE then partition column values must be compatible with
+the allowed character set of the HIVE version you are running.
+
+Writing ``_metadata`` and ``_common_medata`` files
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some processing frameworks such as Spark or Dask (optionally) use ``_metadata``
+and ``_common_metadata`` files with partitioned datasets.
+
+Those files include information about the schema of the full dataset (for
+``_common_metadata``) and potentially all row group metadata of all files in the
+partitioned dataset as well (for ``_metadata``). The actual files are
+metadata-only Parquet files. Note this is not a Parquet standard, but a
+convention set in practice by those frameworks.
+
+Using those files can give a more efficient creation of a parquet Dataset,
+since it can use the stored schema and and file paths of all row groups,
+instead of inferring the schema and crawling the directories for all Parquet
+files (this is especially the case for filesystems where accessing files
+is expensive).
+
+The :func:`~pyarrow.parquet.write_to_dataset` function does not automatically
+write such metadata files, but you can use it to gather the metadata and
+combine and write them manually:
+
+.. code-block:: python
+
+   # Write a dataset and collect metadata information of all written files
+   metadata_collector = []
+   pq.write_to_dataset(table, root_path, metadata_collector=metadata_collector)
+
+   # Write the ``_common_metadata`` parquet file without row groups statistics
+   pq.write_metadata(table.schema, root_path / '_common_metadata')
+
+   # Write the ``_metadata`` parquet file with row groups statistics of all files
+   pq.write_metadata(
+       table.schema, root_path / '_metadata',
+       metadata_collector=metadata_collector
+   )
+
+When not using the :func:`~pyarrow.parquet.write_to_dataset` function, but
+writing the individual files of the partitioned dataset using
+:func:`~pyarrow.parquet.write_table` or :class:`~pyarrow.parquet.ParquetWriter`,
+the ``metadata_collector`` keyword can also be used to collect the FileMetaData
+of the written files. In this case, you need to ensure to set the file path
+contained in the row group metadata yourself before combining the metadata, and
+the schemas of all different files and collected FileMetaData objects should be
+the same:
+
+.. code-block:: python
+
+   metadata_collector = []
+   pq.write_table(
+       table1, root_path / "year=2017/data1.parquet",
+       metadata_collector=metadata_collector
+   )
+
+   # set the file path relative to the root of the partitioned dataset
+   metadata_collector[-1].set_file_path("year=2017/data1.parquet")
+
+   # combine and write the metadata
+   metadata = metadata_collector[0]
+   for _meta in metadata_collector[1:]:
+       metadata.append_row_groups(_meta)
+   metadata.write_metadata_file(root_path / "_metadata")
+
+   # or use pq.write_metadata to combine and write in a single step
+   pq.write_metadata(
+       table1.schema, root_path / "_metadata",
+       metadata_collector=metadata_collector
+   )
+
+Reading from Partitioned Datasets
+------------------------------------------------
+
+The :class:`~.ParquetDataset` class accepts either a directory name or a list
+of file paths, and can discover and infer some common partition structures,
+such as those produced by Hive:
+
+.. code-block:: python
+
+   dataset = pq.ParquetDataset('dataset_name/')
+   table = dataset.read()
+
+You can also use the convenience function ``read_table`` exposed by
+``pyarrow.parquet`` that avoids the need for an additional Dataset object
+creation step.
+
+.. code-block:: python
+
+   table = pq.read_table('dataset_name')
+
+Note: the partition columns in the original table will have their types
+converted to Arrow dictionary types (pandas categorical) on load. Ordering of
+partition columns is not preserved through the save/load process. If reading
+from a remote filesystem into a pandas dataframe you may need to run
+``sort_index`` to maintain row ordering (as long as the ``preserve_index``
+option was enabled on write).
+
+.. note::
+
+   The ParquetDataset is being reimplemented based on the new generic Dataset
+   API (see the :ref:`dataset` docs for an overview). This is not yet the
+   default, but can already be enabled by passing the ``use_legacy_dataset=False``
+   keyword to :class:`ParquetDataset` or :func:`read_table`::
+
+      pq.ParquetDataset('dataset_name/', use_legacy_dataset=False)
+
+   Enabling this gives the following new features:
+
+   - Filtering on all columns (using row group statistics) instead of only on
+     the partition keys.
+   - More fine-grained partitioning: support for a directory partitioning scheme
+     in addition to the Hive-like partitioning (e.g. "/2019/11/15/" instead of
+     "/year=2019/month=11/day=15/"), and the ability to specify a schema for
+     the partition keys.
+   - General performance improvement and bug fixes.
+
+   It also has the following changes in behaviour:
+
+   - The partition keys need to be explicitly included in the ``columns``
+     keyword when you want to include them in the result while reading a
+     subset of the columns
+
+   This new implementation is already enabled in ``read_table``, and in the
+   future, this will be turned on by default for ``ParquetDataset``. The new
+   implementation does not yet cover all existing ParquetDataset features (e.g.
+   specifying the ``metadata``, or the ``pieces`` property API). Feedback is
+   very welcome.
+
+
+Using with Spark
+----------------
+
+Spark places some constraints on the types of Parquet files it will read. The
+option ``flavor='spark'`` will set these options automatically and also
+sanitize field characters unsupported by Spark SQL.
+
+Multithreaded Reads
+-------------------
+
+Each of the reading functions by default use multi-threading for reading
+columns in parallel. Depending on the speed of IO
+and how expensive it is to decode the columns in a particular file
+(particularly with GZIP compression), this can yield significantly higher data
+throughput.
+
+This can be disabled by specifying ``use_threads=False``.
+
+.. note::
+   The number of threads to use concurrently is automatically inferred by Arrow
+   and can be inspected using the :func:`~pyarrow.cpu_count()` function.
+
+Reading from cloud storage
+--------------------------
+
+In addition to local files, pyarrow supports other filesystems, such as cloud
+filesystems, through the ``filesystem`` keyword:
+
+.. code-block:: python
+
+    from pyarrow import fs
+
+    s3  = fs.S3FileSystem(region="us-east-2")
+    table = pq.read_table("bucket/object/key/prefix", filesystem=s3)
+
+Currently, :class:`HDFS <pyarrow.fs.HadoopFileSystem>` and
+:class:`Amazon S3-compatible storage <pyarrow.fs.S3FileSystem>` are
+supported. See the :ref:`filesystem` docs for more details. For those
+built-in filesystems, the filesystem can also be inferred from the file path,
+if specified as a URI:
+
+.. code-block:: python
+
+    table = pq.read_table("s3://bucket/object/key/prefix")
+
+Other filesystems can still be supported if there is an
+`fsspec <https://filesystem-spec.readthedocs.io/en/latest/>`__-compatible
+implementation available. See :ref:`filesystem-fsspec` for more details.
+One example is Azure Blob storage, which can be interfaced through the
+`adlfs <https://github.com/dask/adlfs>`__ package.
+
+.. code-block:: python
+
+    from adlfs import AzureBlobFileSystem
+
+    abfs = AzureBlobFileSystem(account_name="XXXX", account_key="XXXX", container_name="XXXX")
+    table = pq.read_table("file.parquet", filesystem=abfs)
diff --git a/src/arrow/docs/source/python/plasma.rst b/src/arrow/docs/source/python/plasma.rst
new file mode 100644
index 000000000..51c7b6eaf
--- /dev/null
+++ b/src/arrow/docs/source/python/plasma.rst
@@ -0,0 +1,462 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+.. currentmodule:: pyarrow
+.. _plasma:
+
+The Plasma In-Memory Object Store
+=================================
+
+.. note::
+
+   As present, Plasma is only supported for use on Linux and macOS.
+
+The Plasma API
+--------------
+
+Starting the Plasma store
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You can start the Plasma store by issuing a terminal command similar to the
+following:
+
+.. code-block:: bash
+
+  plasma_store -m 1000000000 -s /tmp/plasma
+
+The ``-m`` flag specifies the size of the store in bytes, and the ``-s`` flag
+specifies the socket that the store will listen at. Thus, the above command
+allows the Plasma store to use up to 1GB of memory, and sets the socket to
+``/tmp/plasma``.
+
+Leaving the current terminal window open as long as Plasma store should keep
+running. Messages, concerning such as disconnecting clients, may occasionally be
+printed to the screen. To stop running the Plasma store, you can press
+``Ctrl-C`` in the terminal.
+
+Creating a Plasma client
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+To start a Plasma client from Python, call ``plasma.connect`` using the same
+socket name:
+
+.. code-block:: python
+
+  import pyarrow.plasma as plasma
+  client = plasma.connect("/tmp/plasma")
+
+If the following error occurs from running the above Python code, that
+means that either the socket given is incorrect, or the ``./plasma_store`` is
+not currently running. Check to see if the Plasma store is still running.
+
+.. code-block:: shell
+
+  >>> client = plasma.connect("/tmp/plasma")
+  Connection to socket failed for pathname /tmp/plasma
+  Could not connect to socket /tmp/plasma
+
+
+Object IDs
+^^^^^^^^^^
+
+Each object in the Plasma store should be associated with a unique ID. The
+Object ID then serves as a key that any client can use to retrieve that object
+from the Plasma store. You can form an ``ObjectID`` object from a byte string of
+length 20.
+
+.. code-block:: shell
+
+  # Create an ObjectID.
+  >>> id = plasma.ObjectID(20 * b"a")
+
+  # The character "a" is encoded as 61 in hex.
+  >>> id
+  ObjectID(6161616161616161616161616161616161616161)
+
+The random generation of Object IDs is often good enough to ensure unique IDs.
+You can easily create a helper function that randomly generates object IDs as
+follows:
+
+.. code-block:: python
+
+  import numpy as np
+
+  def random_object_id():
+    return plasma.ObjectID(np.random.bytes(20))
+
+Putting and Getting Python Objects
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Plasma supports two APIs for creating and accessing objects: A high level
+API that allows storing and retrieving Python objects and a low level
+API that allows creating, writing and sealing buffers and operating on
+the binary data directly. In this section we describe the high level API.
+
+This is how you can put and get a Python object:
+
+.. code-block:: python
+
+    # Create a python object.
+    object_id = client.put("hello, world")
+
+    # Get the object.
+    client.get(object_id)
+
+This works with all Python objects supported by the Arrow Python object
+serialization.
+
+You can also get multiple objects at the same time (which can be more
+efficient since it avoids IPC round trips):
+
+.. code-block:: python
+
+    # Create multiple python objects.
+    object_id1 = client.put(1)
+    object_id2 = client.put(2)
+    object_id3 = client.put(3)
+
+    # Get the objects.
+    client.get([object_id1, object_id2, object_id3])
+
+Furthermore, it is possible to provide a timeout for the get call. If the
+object is not available within the timeout, the special object
+`pyarrow.ObjectNotAvailable` will be returned.
+
+Creating an Object Buffer
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Objects are created in Plasma in two stages. First, they are **created**, which
+allocates a buffer for the object. At this point, the client can write to the
+buffer and construct the object within the allocated buffer.
+
+To create an object for Plasma, you need to create an object ID, as well as
+give the object's maximum size in bytes.
+
+.. code-block:: python
+
+  # Create an object buffer.
+  object_id = plasma.ObjectID(20 * b"a")
+  object_size = 1000
+  buffer = memoryview(client.create(object_id, object_size))
+
+  # Write to the buffer.
+  for i in range(1000):
+    buffer[i] = i % 128
+
+When the client is done, the client **seals** the buffer, making the object
+immutable, and making it available to other Plasma clients.
+
+.. code-block:: python
+
+  # Seal the object. This makes the object immutable and available to other clients.
+  client.seal(object_id)
+
+
+Getting an Object Buffer
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+After an object has been sealed, any client who knows the object ID can get
+the object buffer.
+
+.. code-block:: python
+
+  # Create a different client. Note that this second client could be
+  # created in the same or in a separate, concurrent Python session.
+  client2 = plasma.connect("/tmp/plasma")
+
+  # Get the object in the second client. This blocks until the object has been sealed.
+  object_id2 = plasma.ObjectID(20 * b"a")
+  [buffer2] = client2.get_buffers([object_id])
+
+If the object has not been sealed yet, then the call to client.get_buffers will
+block until the object has been sealed by the client constructing the object.
+Using the ``timeout_ms`` argument to get, you can specify a timeout for this (in
+milliseconds). After the timeout, the interpreter will yield control back.
+
+.. code-block:: shell
+
+  >>> buffer
+  <memory at 0x7fdbdc96e708>
+  >>> buffer[1]
+  1
+  >>> buffer2
+  <plasma.plasma.PlasmaBuffer object at 0x7fdbf2770e88>
+  >>> view2 = memoryview(buffer2)
+  >>> view2[1]
+  1
+  >>> view2[129]
+  1
+  >>> bytes(buffer[1:4])
+  b'\x01\x02\x03'
+  >>> bytes(view2[1:4])
+  b'\x01\x02\x03'
+
+
+Listing objects in the store
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The objects in the store can be listed in the following way (note that
+this functionality is currently experimental and the concrete representation
+of the object info might change in the future):
+
+.. code-block:: python
+
+  import pyarrow.plasma as plasma
+  import time
+
+  client = plasma.connect("/tmp/plasma")
+
+  client.put("hello, world")
+  # Sleep a little so we get different creation times
+  time.sleep(2)
+  client.put("another object")
+  # Create an object that is not sealed yet
+  object_id = plasma.ObjectID.from_random()
+  client.create(object_id, 100)
+  print(client.list())
+
+  >>> {ObjectID(4cba8f80c54c6d265b46c2cdfcee6e32348b12be): {'construct_duration': 0,
+  >>>  'create_time': 1535223642,
+  >>>  'data_size': 460,
+  >>>  'metadata_size': 0,
+  >>>  'ref_count': 0,
+  >>>  'state': 'sealed'},
+  >>> ObjectID(a7598230b0c26464c9d9c99ae14773ee81485428): {'construct_duration': 0,
+  >>>  'create_time': 1535223644,
+  >>>  'data_size': 460,
+  >>>  'metadata_size': 0,
+  >>>  'ref_count': 0,
+  >>>  'state': 'sealed'},
+  >>> ObjectID(e603ab0c92098ebf08f90bfcea33ff98f6476870): {'construct_duration': -1,
+  >>>  'create_time': 1535223644,
+  >>>  'data_size': 100,
+  >>>  'metadata_size': 0,
+  >>>  'ref_count': 1,
+  >>>  'state': 'created'}}
+
+
+Using Arrow and Pandas with Plasma
+----------------------------------
+
+Storing Arrow Objects in Plasma
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To store an Arrow object in Plasma, we must first **create** the object and then
+**seal** it. However, Arrow objects such as ``Tensors`` may be more complicated
+to write than simple binary data.
+
+To create the object in Plasma, you still need an ``ObjectID`` and a size to
+pass in. To find out the size of your Arrow object, you can use pyarrow
+API such as ``pyarrow.ipc.get_tensor_size``.
+
+.. code-block:: python
+
+  import numpy as np
+  import pyarrow as pa
+
+  # Create a pyarrow.Tensor object from a numpy random 2-dimensional array
+  data = np.random.randn(10, 4)
+  tensor = pa.Tensor.from_numpy(data)
+
+  # Create the object in Plasma
+  object_id = plasma.ObjectID(np.random.bytes(20))
+  data_size = pa.ipc.get_tensor_size(tensor)
+  buf = client.create(object_id, data_size)
+
+To write the Arrow ``Tensor`` object into the buffer, you can use Plasma to
+convert the ``memoryview`` buffer into a ``pyarrow.FixedSizeBufferWriter``
+object. A ``pyarrow.FixedSizeBufferWriter`` is a format suitable for Arrow's
+``pyarrow.ipc.write_tensor``:
+
+.. code-block:: python
+
+  # Write the tensor into the Plasma-allocated buffer
+  stream = pa.FixedSizeBufferWriter(buf)
+  pa.ipc.write_tensor(tensor, stream)  # Writes tensor's 552 bytes to Plasma stream
+
+To finish storing the Arrow object in Plasma, call ``seal``:
+
+.. code-block:: python
+
+  # Seal the Plasma object
+  client.seal(object_id)
+
+Getting Arrow Objects from Plasma
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To read the object, first retrieve it as a ``PlasmaBuffer`` using its object ID.
+
+.. code-block:: python
+
+  # Get the arrow object by ObjectID.
+  [buf2] = client.get_buffers([object_id])
+
+To convert the ``PlasmaBuffer`` back into an Arrow ``Tensor``, first create a
+pyarrow ``BufferReader`` object from it. You can then pass the ``BufferReader``
+into ``pyarrow.ipc.read_tensor`` to reconstruct the Arrow ``Tensor`` object:
+
+.. code-block:: python
+
+  # Reconstruct the Arrow tensor object.
+  reader = pa.BufferReader(buf2)
+  tensor2 = pa.ipc.read_tensor(reader)
+
+Finally, you can use ``pyarrow.ipc.read_tensor`` to convert the Arrow object
+back into numpy data:
+
+.. code-block:: python
+
+  # Convert back to numpy
+  array = tensor2.to_numpy()
+
+Storing Pandas DataFrames in Plasma
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Storing a Pandas ``DataFrame`` still follows the **create** then **seal**
+process of storing an object in the Plasma store, however one cannot directly
+write the ``DataFrame`` to Plasma with Pandas alone. Plasma also needs to know
+the size of the ``DataFrame`` to allocate a buffer for.
+
+See :ref:`pandas_interop` for more information on using Arrow with Pandas.
+
+You can create the pyarrow equivalent of a Pandas ``DataFrame`` by using
+``pyarrow.from_pandas`` to convert it to a ``RecordBatch``.
+
+.. code-block:: python
+
+  import pyarrow as pa
+  import pandas as pd
+
+  # Create a Pandas DataFrame
+  d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
+       'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
+  df = pd.DataFrame(d)
+
+  # Convert the Pandas DataFrame into a PyArrow RecordBatch
+  record_batch = pa.RecordBatch.from_pandas(df)
+
+Creating the Plasma object requires an ``ObjectID`` and the size of the
+data. Now that we have converted the Pandas ``DataFrame`` into a PyArrow
+``RecordBatch``, use the ``MockOutputStream`` to determine the
+size of the Plasma object.
+
+.. code-block:: python
+
+  # Create the Plasma object from the PyArrow RecordBatch. Most of the work here
+  # is done to determine the size of buffer to request from the object store.
+  object_id = plasma.ObjectID(np.random.bytes(20))
+  mock_sink = pa.MockOutputStream()
+  with pa.RecordBatchStreamWriter(mock_sink, record_batch.schema) as stream_writer:
+      stream_writer.write_batch(record_batch)
+  data_size = mock_sink.size()
+  buf = client.create(object_id, data_size)
+
+The DataFrame can now be written to the buffer as follows.
+
+.. code-block:: python
+
+  # Write the PyArrow RecordBatch to Plasma
+  stream = pa.FixedSizeBufferWriter(buf)
+  with pa.RecordBatchStreamWriter(stream, record_batch.schema) as stream_writer:
+      stream_writer.write_batch(record_batch)
+
+Finally, seal the finished object for use by all clients:
+
+.. code-block:: python
+
+  # Seal the Plasma object
+  client.seal(object_id)
+
+Getting Pandas DataFrames from Plasma
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Since we store the Pandas DataFrame as a PyArrow ``RecordBatch`` object,
+to get the object back from the Plasma store, we follow similar steps
+to those specified in `Getting Arrow Objects from Plasma`_.
+
+We first have to convert the ``PlasmaBuffer`` returned from
+``client.get_buffers`` into an Arrow ``BufferReader`` object.
+
+.. code-block:: python
+
+  # Fetch the Plasma object
+  [data] = client.get_buffers([object_id])  # Get PlasmaBuffer from ObjectID
+  buffer = pa.BufferReader(data)
+
+From the ``BufferReader``, we can create a specific ``RecordBatchStreamReader``
+in Arrow to reconstruct the stored PyArrow ``RecordBatch`` object.
+
+.. code-block:: python
+
+  # Convert object back into an Arrow RecordBatch
+  reader = pa.RecordBatchStreamReader(buffer)
+  record_batch = reader.read_next_batch()
+
+The last step is to convert the PyArrow ``RecordBatch`` object back into
+the original Pandas ``DataFrame`` structure.
+
+.. code-block:: python
+
+  # Convert back into Pandas
+  result = record_batch.to_pandas()
+
+Using Plasma with Huge Pages
+----------------------------
+
+On Linux it is possible to use the Plasma store with huge pages for increased
+throughput. You first need to create a file system and activate huge pages with
+
+.. code-block:: shell
+
+  sudo mkdir -p /mnt/hugepages
+  gid=`id -g`
+  uid=`id -u`
+  sudo mount -t hugetlbfs -o uid=$uid -o gid=$gid none /mnt/hugepages
+  sudo bash -c "echo $gid > /proc/sys/vm/hugetlb_shm_group"
+  sudo bash -c "echo 20000 > /proc/sys/vm/nr_hugepages"
+
+Note that you only need root access to create the file system, not for
+running the object store. You can then start the Plasma store with the ``-d``
+flag for the mount point of the huge page file system and the ``-h`` flag
+which indicates that huge pages are activated:
+
+.. code-block:: shell
+
+  plasma_store -s /tmp/plasma -m 10000000000 -d /mnt/hugepages -h
+
+You can test this with the following script:
+
+.. code-block:: python
+
+  import numpy as np
+  import pyarrow as pa
+  import pyarrow.plasma as plasma
+  import time
+
+  client = plasma.connect("/tmp/plasma")
+
+  data = np.random.randn(100000000)
+  tensor = pa.Tensor.from_numpy(data)
+
+  object_id = plasma.ObjectID(np.random.bytes(20))
+  buf = client.create(object_id, pa.ipc.get_tensor_size(tensor))
+
+  stream = pa.FixedSizeBufferWriter(buf)
+  stream.set_memcopy_threads(4)
+  a = time.time()
+  pa.ipc.write_tensor(tensor, stream)
+  print("Writing took ", time.time() - a)
diff --git a/src/arrow/docs/source/python/timestamps.rst b/src/arrow/docs/source/python/timestamps.rst
new file mode 100644
index 000000000..fb4da5cc0
--- /dev/null
+++ b/src/arrow/docs/source/python/timestamps.rst
@@ -0,0 +1,198 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+**********
+Timestamps
+**********
+
+Arrow/Pandas Timestamps
+=======================
+
+Arrow timestamps are stored as a 64-bit integer with column metadata to
+associate a time unit (e.g. milliseconds, microseconds, or nanoseconds), and an
+optional time zone.  Pandas (`Timestamp`) uses a 64-bit integer representing
+nanoseconds and an optional time zone.
+Python/Pandas timestamp types without a associated time zone are referred to as
+"Time Zone Naive".  Python/Pandas timestamp types with an associated time zone are
+referred to as "Time Zone Aware".
+
+
+Timestamp Conversions
+=====================
+
+Pandas/Arrow ⇄ Spark
+--------------------
+
+Spark stores timestamps as 64-bit integers representing microseconds since
+the UNIX epoch.  It does not store any metadata about time zones with its
+timestamps.
+
+Spark interprets timestamps with the *session local time zone*, (i.e.
+``spark.sql.session.timeZone``). If that time zone is undefined, Spark turns to
+the default system time zone. For simplicity's sake below, the session
+local time zone is always defined.
+
+This implies a few things when round-tripping timestamps:
+
+#.  Timezone information is lost (all timestamps that result from
+    converting from spark to arrow/pandas are "time zone naive").
+#.  Timestamps are truncated to microseconds.
+#.  The session time zone might have unintuitive impacts on 
+    translation of timestamp values. 
+
+Spark to Pandas (through Apache Arrow)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following cases assume the Spark configuration
+``spark.sql.execution.arrow.enabled`` is set to ``"true"``.
+
+::
+
+    >>> pdf = pd.DataFrame({'naive': [datetime(2019, 1, 1, 0)], 
+    ...                     'aware': [Timestamp(year=2019, month=1, day=1, 
+    ...                               nanosecond=500, tz=timezone(timedelta(hours=-8)))]})
+    >>> pdf
+           naive                               aware
+           0 2018-10-01 2018-10-01 00:00:00.000000500-08:00
+
+    >>> spark.conf.set("spark.sql.session.timeZone", "UTC")
+    >>> utc_df = sqlContext.createDataFrame(pdf)
+    >>> utf_df.show()
+    +-------------------+-------------------+
+    |              naive|              aware|
+    +-------------------+-------------------+
+    |2019-01-01 00:00:00|2019-01-01 08:00:00|
+    +-------------------+-------------------+
+                    
+Note that conversion of the aware timestamp is shifted to reflect the time
+assuming UTC (it represents the same instant in time).  For naive
+timestamps, Spark treats them as being in the system local
+time zone and converts them UTC. Recall that internally, the schema
+for spark dataframe's does not store any time zone information with
+timestamps.
+
+Now if the session time zone is set to US Pacific Time (PST) we don't
+see any shift in the display of the aware time zone (it
+still represents the same instant in time):
+
+::
+
+    >>> spark.conf.set("spark.sql.session.timeZone", "US/Pacific")
+    >>> pst_df = sqlContext.createDataFrame(pdf)
+    >>> pst_df.show()
+    +-------------------+-------------------+
+    |              naive|              aware|
+    +-------------------+-------------------+
+    |2019-01-01 00:00:00|2019-01-01 00:00:00|
+    +-------------------+-------------------+
+
+Looking again at utc_df.show() we see one of the impacts of the session time
+zone.  The naive timestamp was initially converted assuming UTC, the instant it
+reflects is actually earlier than the naive time zone from the PST converted
+data frame:
+
+::
+
+    >>> utc_df.show()
+    +-------------------+-------------------+
+    |              naive|              aware|
+    +-------------------+-------------------+
+    |2018-12-31 16:00:00|2019-01-01 00:00:00|
+    +-------------------+-------------------+
+
+Spark to Pandas
+~~~~~~~~~~~~~~~
+
+We can observe what happens when converting back to Arrow/Pandas.  Assuming the
+session time zone is still PST:
+
+::
+
+   >>> pst_df.show()
+   +-------------------+-------------------+
+   |              naive|              aware|
+   +-------------------+-------------------+
+   |2019-01-01 00:00:00|2019-01-01 00:00:00|
+   +-------------------+-------------------+
+
+   
+    >>> pst_df.toPandas()
+    naive      aware
+    0 2019-01-01 2019-01-01
+    >>> pst_df.toPandas().info()
+    <class 'pandas.core.frame.DataFrame'>
+    RangeIndex: 1 entries, 0 to 0
+    Data columns (total 2 columns):
+    naive    1 non-null datetime64[ns]
+    aware    1 non-null datetime64[ns]
+    dtypes: datetime64[ns](2)
+    memory usage: 96.0 bytes
+    
+Notice that, in addition to being a "time zone naive" timestamp, the 'aware'
+value will now differ when converting to an epoch offset.  Spark does the conversion
+by first converting to the session time zone (or system local time zone if
+session time zones isn't set) and then localizes to remove the time zone
+information.  This results in the timestamp being 8 hours before the original
+time:
+
+::
+
+  >>> pst_df.toPandas()['aware'][0]
+  Timestamp('2019-01-01 00:00:00')
+  >>> pdf['aware'][0]
+  Timestamp('2019-01-01 00:00:00.000000500-0800', tz='UTC-08:00')
+  >>> (pst_df.toPandas()['aware'][0].timestamp()-pdf['aware'][0].timestamp())/3600
+  -8.0
+
+The same type of conversion happens with the data frame converted while 
+the session time zone was UTC.  In this case both naive and aware 
+represent different instants in time (the naive instant is due to 
+the change in session time zone between creating data frames):
+
+::
+
+  >>> utc_df.show()
+  +-------------------+-------------------+
+  |              naive|              aware|
+  +-------------------+-------------------+
+  |2018-12-31 16:00:00|2019-01-01 00:00:00|
+  +-------------------+-------------------+
+
+  >>> utc_df.toPandas()
+  naive      aware
+  0 2018-12-31 16:00:00 2019-01-01
+
+Note that the surprising shift for aware doesn't happen
+when the session time zone is UTC (but the timestamps
+still become "time zone naive"):
+  
+::
+  
+  >>> spark.conf.set("spark.sql.session.timeZone", "UTC")
+  >>> pst_df.show()
+  +-------------------+-------------------+
+  |              naive|              aware|
+  +-------------------+-------------------+
+  |2019-01-01 08:00:00|2019-01-01 08:00:00|
+  +-------------------+-------------------+
+  
+  >>> pst_df.toPandas()['aware'][0]
+  Timestamp('2019-01-01 08:00:00')
+  >>> pdf['aware'][0]
+  Timestamp('2019-01-01 00:00:00.000000500-0800', tz='UTC-08:00')
+  >>> (pst_df.toPandas()['aware'][0].timestamp()-pdf['aware'][0].timestamp())/3600
+  0.0
diff --git a/src/arrow/docs/source/r/index.rst b/src/arrow/docs/source/r/index.rst
new file mode 100644
index 000000000..b799544bb
--- /dev/null
+++ b/src/arrow/docs/source/r/index.rst
@@ -0,0 +1,21 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+R docs
+======
+
+Stub page for the R docs; actual source is located in r/ sub-directory.
diff --git a/src/arrow/docs/source/status.rst b/src/arrow/docs/source/status.rst
new file mode 100644
index 000000000..8e3e998df
--- /dev/null
+++ b/src/arrow/docs/source/status.rst
@@ -0,0 +1,239 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+=====================
+Implementation Status
+=====================
+
+The following tables summarize the features available in the various official
+Arrow libraries.  Unless otherwise stated, the Python, R, Ruby and C/GLib
+libraries follow the C++ Arrow library.
+
+Data Types
+==========
+
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Data type         | C++   | Java  | Go    | JavaScript | C#    | Rust  | Julia |
+| (primitive)       |       |       |       |            |       |       |       |
++===================+=======+=======+=======+============+=======+=======+=======+
+| Null              | ✓     | ✓     | ✓     |            |       |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Boolean           | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Int8/16/32/64     | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| UInt8/16/32/64    | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Float16           |       |       | ✓     |            |       |       | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Float32/64        | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Decimal128        | ✓     | ✓     | ✓     |            |  ✓    |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Decimal256        | ✓     | ✓     |       |            |  ✓    |       | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Date32/64         | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Time32/64         | ✓     | ✓     | ✓     | ✓          |       |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Timestamp         | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Duration          | ✓     | ✓     | ✓     |            |       |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Interval          | ✓     | ✓     | ✓     |            |       |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Fixed Size Binary | ✓     | ✓     | ✓     | ✓          |       |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Binary            | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Large Binary      | ✓     | ✓     | ✓     | ✓          |       |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Utf8              | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Large Utf8        | ✓     | ✓     | ✓     | ✓          |       |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Data type         | C++   | Java  | Go    | JavaScript | C#    | Rust  | Julia |
+| (nested)          |       |       |       |            |       |       |       |
++===================+=======+=======+=======+============+=======+=======+=======+
+| Fixed Size List   | ✓     | ✓     | ✓     | ✓          |       |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| List              | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Large List        | ✓     | ✓     |       |            |       |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Struct            | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Map               | ✓     | ✓     | ✓     | ✓          |       |       | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Dense Union       | ✓     | ✓     |       |            |       |       | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Sparse Union      | ✓     | ✓     |       |            |       |       | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Data type         | C++   | Java  | Go    | JavaScript | C#    | Rust  | Julia |
+| (special)         |       |       |       |            |       |       |       |
++===================+=======+=======+=======+============+=======+=======+=======+
+| Dictionary        | ✓     | ✓ (1) |       | ✓ (1)      |       | ✓ (1) | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+| Extension         | ✓     | ✓     | ✓     |            |       |       | ✓     |
++-------------------+-------+-------+-------+------------+-------+-------+-------+
+
+Notes:
+
+* \(1) Nested dictionaries not supported
+
+.. seealso::
+   The :ref:`format_columnar` specification.
+
+
+IPC Format
+==========
+
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| IPC Feature                 | C++   | Java  | Go    | JavaScript | C#    | Rust  | Julia |
+|                             |       |       |       |            |       |       |       |
++=============================+=======+=======+=======+============+=======+=======+=======+
+| Arrow stream format         | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Arrow file format           | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Record batches              | ✓     | ✓     | ✓     | ✓          |  ✓    |  ✓    | ✓     |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Dictionaries                | ✓     | ✓     | ✓     | ✓          |       |  ✓    | ✓     |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Replacement dictionaries    | ✓     | ✓     |       |            |       |       | ✓     |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Delta dictionaries          | ✓ (1) |       |       |            |       |       | ✓     |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Tensors                     | ✓     |       |       |            |       |       |       |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Sparse tensors              | ✓     |       |       |            |       |       |       |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Buffer compression          | ✓     | ✓ (3) | ✓     |            |       |       | ✓     |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Endianness conversion       | ✓ (2) |       |       |            |       |       |       |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Custom schema metadata      | ✓     | ✓     | ✓     |            |       |  ✓    | ✓     |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+
+Notes:
+
+* \(1) Delta dictionaries not supported on nested dictionaries
+
+* \(2) Data with non-native endianness can be byte-swapped automatically when reading.
+
+* \(3) LZ4 Codec currently is quite inefficient. ARROW-11901 tracks improving performance.
+
+.. seealso::
+   The :ref:`format-ipc` specification.
+
+
+Flight RPC
+==========
+
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Flight RPC Feature          | C++   | Java  | Go    | JavaScript | C#    | Rust  | Julia |
+|                             |       |       |       |            |       |       |       |
++=============================+=======+=======+=======+============+=======+=======+=======+
+| gRPC transport              | ✓     | ✓     | ✓     |            | ✓ (1) |       |       |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| gRPC + TLS transport        | ✓     | ✓     | ✓     |            | ✓     |       |       |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| RPC error codes             | ✓     | ✓     | ✓     |            | ✓     |       |       |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Authentication handlers     | ✓     | ✓     | ✓     |            | ✓ (2) |       |       |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Custom client middleware    | ✓     | ✓     | ✓     |            |       |       |       |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+| Custom server middleware    | ✓     | ✓     | ✓     |            |       |       |       |
++-----------------------------+-------+-------+-------+------------+-------+-------+-------+
+
+Notes:
+
+* \(1) No support for handshake or DoExchange.
+* \(2) Support using AspNetCore authentication handlers.
+
+.. seealso::
+   The :ref:`flight-rpc` specification.
+
+
+C Data Interface
+================
+
++-----------------------------+-----+--------+---+------+----+------+--------+------+
+| Feature                     | C++ | Python | R | Rust | Go | Java | C/GLib | Ruby |
+|                             |     |        |   |      |    |      |        |      |
++=============================+=====+========+===+======+====+======+========+======+
+| Schema export               | ✓   | ✓      | ✓ | ✓    | ✓  | ✓    | ✓      | ✓    |
++-----------------------------+-----+--------+---+------+----+------+--------+------+
+| Array export                | ✓   | ✓      | ✓ | ✓    | ✓  | ✓    | ✓      | ✓    |
++-----------------------------+-----+--------+---+------+----+------+--------+------+
+| Schema import               | ✓   | ✓      | ✓ | ✓    | ✓  | ✓    | ✓      | ✓    |
++-----------------------------+-----+--------+---+------+----+------+--------+------+
+| Array import                | ✓   | ✓      | ✓ | ✓    | ✓  | ✓    | ✓      | ✓    |
++-----------------------------+-----+--------+---+------+----+------+--------+------+
+
+.. seealso::
+   The :ref:`C Data Interface <c-data-interface>` specification.
+
+
+C Stream Interface (experimental)
+=================================
+
++-----------------------------+-----+--------+----+--------+------+
+| Feature                     | C++ | Python | Go | C/GLib | Ruby |
+|                             |     |        |    |        |      |
++=============================+=====+========+====+========+======+
+| Stream export               | ✓   | ✓      |    | ✓      | ✓    |
++-----------------------------+-----+--------+----+--------+------+
+| Stream import               | ✓   | ✓      | ✓  | ✓      | ✓    |
++-----------------------------+-----+--------+----+--------+------+
+
+.. seealso::
+   The :ref:`C Stream Interface <c-stream-interface>` specification.
+
+
+Third-Party Data Formats
+========================
+
++-----------------------------+---------+---------+-------+------------+-------+---------+-------+
+| Format                      | C++     | Java    | Go    | JavaScript | C#    | Rust    | Julia |
+|                             |         |         |       |            |       |         |       |
++=============================+=========+=========+=======+============+=======+=========+=======+
+| Avro                        |         | R       |       |            |       |         |       |
++-----------------------------+---------+---------+-------+------------+-------+---------+-------+
+| CSV                         | R       |         | R/W   |            |       | R/W     | R/W   |
++-----------------------------+---------+---------+-------+------------+-------+---------+-------+
+| ORC                         | R/W     | R (2)   |       |            |       |         |       |
++-----------------------------+---------+---------+-------+------------+-------+---------+-------+
+| Parquet                     | R/W     | R (3)   |       |            |       | R/W (1) |       |
++-----------------------------+---------+---------+-------+------------+-------+---------+-------+
+
+Notes:
+
+* *R* = Read supported
+
+* *W* = Write supported
+
+* \(1) Nested read/write not supported.
+
+* \(2) Through JNI bindings. (Provided by ``org.apache.arrow.orc:arrow-orc``)
+
+* \(3) Through JNI bindings to Arrow C++ Datasets. (Provided by ``org.apache.arrow:arrow-dataset``)
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-21 11:54:28 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-21 11:54:28 +0000
commit	e6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree	64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/docs
parent	Initial commit. (diff)
download	ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip