From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 21 Apr 2024 13:54:28 +0200
Subject: Adding upstream version 18.2.2.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 src/arrow/python/pyarrow/array.pxi | 2541 ++++++++++++++++++++++++++++++++++++
 1 file changed, 2541 insertions(+)
 create mode 100644 src/arrow/python/pyarrow/array.pxi

(limited to 'src/arrow/python/pyarrow/array.pxi')

diff --git a/src/arrow/python/pyarrow/array.pxi b/src/arrow/python/pyarrow/array.pxi
new file mode 100644
index 000000000..97cbce759
--- /dev/null
+++ b/src/arrow/python/pyarrow/array.pxi
@@ -0,0 +1,2541 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import warnings
+
+
+cdef _sequence_to_array(object sequence, object mask, object size,
+                        DataType type, CMemoryPool* pool, c_bool from_pandas):
+    cdef:
+        int64_t c_size
+        PyConversionOptions options
+        shared_ptr[CChunkedArray] chunked
+
+    if type is not None:
+        options.type = type.sp_type
+
+    if size is not None:
+        options.size = size
+
+    options.from_pandas = from_pandas
+    options.ignore_timezone = os.environ.get('PYARROW_IGNORE_TIMEZONE', False)
+
+    with nogil:
+        chunked = GetResultValue(
+            ConvertPySequence(sequence, mask, options, pool)
+        )
+
+    if chunked.get().num_chunks() == 1:
+        return pyarrow_wrap_array(chunked.get().chunk(0))
+    else:
+        return pyarrow_wrap_chunked_array(chunked)
+
+
+cdef inline _is_array_like(obj):
+    if isinstance(obj, np.ndarray):
+        return True
+    return pandas_api._have_pandas_internal() and pandas_api.is_array_like(obj)
+
+
+def _ndarray_to_arrow_type(object values, DataType type):
+    return pyarrow_wrap_data_type(_ndarray_to_type(values, type))
+
+
+cdef shared_ptr[CDataType] _ndarray_to_type(object values,
+                                            DataType type) except *:
+    cdef shared_ptr[CDataType] c_type
+
+    dtype = values.dtype
+
+    if type is None and dtype != object:
+        with nogil:
+            check_status(NumPyDtypeToArrow(dtype, &c_type))
+
+    if type is not None:
+        c_type = type.sp_type
+
+    return c_type
+
+
+cdef _ndarray_to_array(object values, object mask, DataType type,
+                       c_bool from_pandas, c_bool safe, CMemoryPool* pool):
+    cdef:
+        shared_ptr[CChunkedArray] chunked_out
+        shared_ptr[CDataType] c_type = _ndarray_to_type(values, type)
+        CCastOptions cast_options = CCastOptions(safe)
+
+    with nogil:
+        check_status(NdarrayToArrow(pool, values, mask, from_pandas,
+                                    c_type, cast_options, &chunked_out))
+
+    if chunked_out.get().num_chunks() > 1:
+        return pyarrow_wrap_chunked_array(chunked_out)
+    else:
+        return pyarrow_wrap_array(chunked_out.get().chunk(0))
+
+
+cdef _codes_to_indices(object codes, object mask, DataType type,
+                       MemoryPool memory_pool):
+    """
+    Convert the codes of a pandas Categorical to indices for a pyarrow
+    DictionaryArray, taking into account missing values + mask
+    """
+    if mask is None:
+        mask = codes == -1
+    else:
+        mask = mask | (codes == -1)
+    return array(codes, mask=mask, type=type, memory_pool=memory_pool)
+
+
+def _handle_arrow_array_protocol(obj, type, mask, size):
+    if mask is not None or size is not None:
+        raise ValueError(
+            "Cannot specify a mask or a size when passing an object that is "
+            "converted with the __arrow_array__ protocol.")
+    res = obj.__arrow_array__(type=type)
+    if not isinstance(res, (Array, ChunkedArray)):
+        raise TypeError("The object's __arrow_array__ method does not "
+                        "return a pyarrow Array or ChunkedArray.")
+    return res
+
+
+def array(object obj, type=None, mask=None, size=None, from_pandas=None,
+          bint safe=True, MemoryPool memory_pool=None):
+    """
+    Create pyarrow.Array instance from a Python object.
+
+    Parameters
+    ----------
+    obj : sequence, iterable, ndarray or Series
+        If both type and size are specified may be a single use iterable. If
+        not strongly-typed, Arrow type will be inferred for resulting array.
+    type : pyarrow.DataType
+        Explicit type to attempt to coerce to, otherwise will be inferred from
+        the data.
+    mask : array[bool], optional
+        Indicate which values are null (True) or not null (False).
+    size : int64, optional
+        Size of the elements. If the input is larger than size bail at this
+        length. For iterators, if size is larger than the input iterator this
+        will be treated as a "max size", but will involve an initial allocation
+        of size followed by a resize to the actual size (so if you know the
+        exact size specifying it correctly will give you better performance).
+    from_pandas : bool, default None
+        Use pandas's semantics for inferring nulls from values in
+        ndarray-like data. If passed, the mask tasks precedence, but
+        if a value is unmasked (not-null), but still null according to
+        pandas semantics, then it is null. Defaults to False if not
+        passed explicitly by user, or True if a pandas object is
+        passed in.
+    safe : bool, default True
+        Check for overflows or other unsafe conversions.
+    memory_pool : pyarrow.MemoryPool, optional
+        If not passed, will allocate memory from the currently-set default
+        memory pool.
+
+    Returns
+    -------
+    array : pyarrow.Array or pyarrow.ChunkedArray
+        A ChunkedArray instead of an Array is returned if:
+
+        - the object data overflowed binary storage.
+        - the object's ``__arrow_array__`` protocol method returned a chunked
+          array.
+
+    Notes
+    -----
+    Localized timestamps will currently be returned as UTC (pandas's native
+    representation). Timezone-naive data will be implicitly interpreted as
+    UTC.
+
+    Pandas's DateOffsets and dateutil.relativedelta.relativedelta are by
+    default converted as MonthDayNanoIntervalArray. relativedelta leapdays
+    are ignored as are all absolute fields on both objects. datetime.timedelta
+    can also be converted to MonthDayNanoIntervalArray but this requires
+    passing MonthDayNanoIntervalType explicitly.
+
+    Converting to dictionary array will promote to a wider integer type for
+    indices if the number of distinct values cannot be represented, even if
+    the index type was explicitly set. This means that if there are more than
+    127 values the returned dictionary array's index type will be at least
+    pa.int16() even if pa.int8() was passed to the function. Note that an
+    explicit index type will not be demoted even if it is wider than required.
+
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> import pyarrow as pa
+    >>> pa.array(pd.Series([1, 2]))
+    <pyarrow.lib.Int64Array object at 0x7f674e4c0e10>
+    [
+      1,
+      2
+    ]
+
+    >>> pa.array(["a", "b", "a"], type=pa.dictionary(pa.int8(), pa.string()))
+    <pyarrow.lib.DictionaryArray object at 0x7feb288d9040>
+    -- dictionary:
+    [
+      "a",
+      "b"
+    ]
+    -- indices:
+    [
+      0,
+      1,
+      0
+    ]
+
+    >>> import numpy as np
+    >>> pa.array(pd.Series([1, 2]), mask=np.array([0, 1], dtype=bool))
+    <pyarrow.lib.Int64Array object at 0x7f9019e11208>
+    [
+      1,
+      null
+    ]
+
+    >>> arr = pa.array(range(1024), type=pa.dictionary(pa.int8(), pa.int64()))
+    >>> arr.type.index_type
+    DataType(int16)
+    """
+    cdef:
+        CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+        bint is_pandas_object = False
+        bint c_from_pandas
+
+    type = ensure_type(type, allow_none=True)
+
+    if from_pandas is None:
+        c_from_pandas = False
+    else:
+        c_from_pandas = from_pandas
+
+    if hasattr(obj, '__arrow_array__'):
+        return _handle_arrow_array_protocol(obj, type, mask, size)
+    elif _is_array_like(obj):
+        if mask is not None:
+            if _is_array_like(mask):
+                mask = get_values(mask, &is_pandas_object)
+            else:
+                raise TypeError("Mask must be a numpy array "
+                                "when converting numpy arrays")
+
+        values = get_values(obj, &is_pandas_object)
+        if is_pandas_object and from_pandas is None:
+            c_from_pandas = True
+
+        if isinstance(values, np.ma.MaskedArray):
+            if mask is not None:
+                raise ValueError("Cannot pass a numpy masked array and "
+                                 "specify a mask at the same time")
+            else:
+                # don't use shrunken masks
+                mask = None if values.mask is np.ma.nomask else values.mask
+                values = values.data
+
+        if mask is not None:
+            if mask.dtype != np.bool_:
+                raise TypeError("Mask must be boolean dtype")
+            if mask.ndim != 1:
+                raise ValueError("Mask must be 1D array")
+            if len(values) != len(mask):
+                raise ValueError(
+                    "Mask is a different length from sequence being converted")
+
+        if hasattr(values, '__arrow_array__'):
+            return _handle_arrow_array_protocol(values, type, mask, size)
+        elif pandas_api.is_categorical(values):
+            if type is not None:
+                if type.id != Type_DICTIONARY:
+                    return _ndarray_to_array(
+                        np.asarray(values), mask, type, c_from_pandas, safe,
+                        pool)
+                index_type = type.index_type
+                value_type = type.value_type
+                if values.ordered != type.ordered:
+                    warnings.warn(
+                        "The 'ordered' flag of the passed categorical values "
+                        "does not match the 'ordered' of the specified type. "
+                        "Using the flag of the values, but in the future this "
+                        "mismatch will raise a ValueError.",
+                        FutureWarning, stacklevel=2)
+            else:
+                index_type = None
+                value_type = None
+
+            indices = _codes_to_indices(
+                values.codes, mask, index_type, memory_pool)
+            try:
+                dictionary = array(
+                    values.categories.values, type=value_type,
+                    memory_pool=memory_pool)
+            except TypeError:
+                # TODO when removing the deprecation warning, this whole
+                # try/except can be removed (to bubble the TypeError of
+                # the first array(..) call)
+                if value_type is not None:
+                    warnings.warn(
+                        "The dtype of the 'categories' of the passed "
+                        "categorical values ({0}) does not match the "
+                        "specified type ({1}). For now ignoring the specified "
+                        "type, but in the future this mismatch will raise a "
+                        "TypeError".format(
+                            values.categories.dtype, value_type),
+                        FutureWarning, stacklevel=2)
+                    dictionary = array(
+                        values.categories.values, memory_pool=memory_pool)
+                else:
+                    raise
+
+            return DictionaryArray.from_arrays(
+                indices, dictionary, ordered=values.ordered, safe=safe)
+        else:
+            if pandas_api.have_pandas:
+                values, type = pandas_api.compat.get_datetimetz_type(
+                    values, obj.dtype, type)
+            return _ndarray_to_array(values, mask, type, c_from_pandas, safe,
+                                     pool)
+    else:
+        # ConvertPySequence does strict conversion if type is explicitly passed
+        return _sequence_to_array(obj, mask, size, type, pool, c_from_pandas)
+
+
+def asarray(values, type=None):
+    """
+    Convert to pyarrow.Array, inferring type if not provided.
+
+    Parameters
+    ----------
+    values : array-like
+        This can be a sequence, numpy.ndarray, pyarrow.Array or
+        pyarrow.ChunkedArray. If a ChunkedArray is passed, the output will be
+        a ChunkedArray, otherwise the output will be a Array.
+    type : string or DataType
+        Explicitly construct the array with this type. Attempt to cast if
+        indicated type is different.
+
+    Returns
+    -------
+    arr : Array or ChunkedArray
+    """
+    if isinstance(values, (Array, ChunkedArray)):
+        if type is not None and not values.type.equals(type):
+            values = values.cast(type)
+        return values
+    else:
+        return array(values, type=type)
+
+
+def nulls(size, type=None, MemoryPool memory_pool=None):
+    """
+    Create a strongly-typed Array instance with all elements null.
+
+    Parameters
+    ----------
+    size : int
+        Array length.
+    type : pyarrow.DataType, default None
+        Explicit type for the array. By default use NullType.
+    memory_pool : MemoryPool, default None
+        Arrow MemoryPool to use for allocations. Uses the default memory
+        pool is not passed.
+
+    Returns
+    -------
+    arr : Array
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.nulls(10)
+    <pyarrow.lib.NullArray object at 0x7ffaf04c2e50>
+    10 nulls
+
+    >>> pa.nulls(3, pa.uint32())
+    <pyarrow.lib.UInt32Array object at 0x7ffaf04c2e50>
+    [
+      null,
+      null,
+      null
+    ]
+    """
+    cdef:
+        CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+        int64_t length = size
+        shared_ptr[CDataType] ty
+        shared_ptr[CArray] arr
+
+    type = ensure_type(type, allow_none=True)
+    if type is None:
+        type = null()
+
+    ty = pyarrow_unwrap_data_type(type)
+    with nogil:
+        arr = GetResultValue(MakeArrayOfNull(ty, length, pool))
+
+    return pyarrow_wrap_array(arr)
+
+
+def repeat(value, size, MemoryPool memory_pool=None):
+    """
+    Create an Array instance whose slots are the given scalar.
+
+    Parameters
+    ----------
+    value : Scalar-like object
+        Either a pyarrow.Scalar or any python object coercible to a Scalar.
+    size : int
+        Number of times to repeat the scalar in the output Array.
+    memory_pool : MemoryPool, default None
+        Arrow MemoryPool to use for allocations. Uses the default memory
+        pool is not passed.
+
+    Returns
+    -------
+    arr : Array
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> pa.repeat(10, 3)
+    <pyarrow.lib.Int64Array object at 0x7ffac03a2750>
+    [
+      10,
+      10,
+      10
+    ]
+
+    >>> pa.repeat([1, 2], 2)
+    <pyarrow.lib.ListArray object at 0x7ffaf04c2e50>
+    [
+      [
+        1,
+        2
+      ],
+      [
+        1,
+        2
+      ]
+    ]
+
+    >>> pa.repeat("string", 3)
+    <pyarrow.lib.StringArray object at 0x7ffac03a2750>
+    [
+      "string",
+      "string",
+      "string"
+    ]
+
+    >>> pa.repeat(pa.scalar({'a': 1, 'b': [1, 2]}), 2)
+    <pyarrow.lib.StructArray object at 0x7ffac03a2750>
+    -- is_valid: all not null
+    -- child 0 type: int64
+      [
+        1,
+        1
+      ]
+    -- child 1 type: list<item: int64>
+      [
+        [
+          1,
+          2
+        ],
+        [
+          1,
+          2
+        ]
+      ]
+    """
+    cdef:
+        CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+        int64_t length = size
+        shared_ptr[CArray] c_array
+        shared_ptr[CScalar] c_scalar
+
+    if not isinstance(value, Scalar):
+        value = scalar(value, memory_pool=memory_pool)
+
+    c_scalar = (<Scalar> value).unwrap()
+    with nogil:
+        c_array = GetResultValue(
+            MakeArrayFromScalar(deref(c_scalar), length, pool)
+        )
+
+    return pyarrow_wrap_array(c_array)
+
+
+def infer_type(values, mask=None, from_pandas=False):
+    """
+    Attempt to infer Arrow data type that can hold the passed Python
+    sequence type in an Array object
+
+    Parameters
+    ----------
+    values : array-like
+        Sequence to infer type from.
+    mask : ndarray (bool type), optional
+        Optional exclusion mask where True marks null, False non-null.
+    from_pandas : bool, default False
+        Use pandas's NA/null sentinel values for type inference.
+
+    Returns
+    -------
+    type : DataType
+    """
+    cdef:
+        shared_ptr[CDataType] out
+        c_bool use_pandas_sentinels = from_pandas
+
+    if mask is not None and not isinstance(mask, np.ndarray):
+        mask = np.array(mask, dtype=bool)
+
+    out = GetResultValue(InferArrowType(values, mask, use_pandas_sentinels))
+    return pyarrow_wrap_data_type(out)
+
+
+def _normalize_slice(object arrow_obj, slice key):
+    """
+    Slices with step not equal to 1 (or None) will produce a copy
+    rather than a zero-copy view
+    """
+    cdef:
+        Py_ssize_t start, stop, step
+        Py_ssize_t n = len(arrow_obj)
+
+    start = key.start or 0
+    if start < 0:
+        start += n
+        if start < 0:
+            start = 0
+    elif start >= n:
+        start = n
+
+    stop = key.stop if key.stop is not None else n
+    if stop < 0:
+        stop += n
+        if stop < 0:
+            stop = 0
+    elif stop >= n:
+        stop = n
+
+    step = key.step or 1
+    if step != 1:
+        if step < 0:
+            # Negative steps require some special handling
+            if key.start is None:
+                start = n - 1
+
+            if key.stop is None:
+                stop = -1
+
+        indices = np.arange(start, stop, step)
+        return arrow_obj.take(indices)
+    else:
+        length = max(stop - start, 0)
+        return arrow_obj.slice(start, length)
+
+
+cdef Py_ssize_t _normalize_index(Py_ssize_t index,
+                                 Py_ssize_t length) except -1:
+    if index < 0:
+        index += length
+        if index < 0:
+            raise IndexError("index out of bounds")
+    elif index >= length:
+        raise IndexError("index out of bounds")
+    return index
+
+
+cdef wrap_datum(const CDatum& datum):
+    if datum.kind() == DatumType_ARRAY:
+        return pyarrow_wrap_array(MakeArray(datum.array()))
+    elif datum.kind() == DatumType_CHUNKED_ARRAY:
+        return pyarrow_wrap_chunked_array(datum.chunked_array())
+    elif datum.kind() == DatumType_RECORD_BATCH:
+        return pyarrow_wrap_batch(datum.record_batch())
+    elif datum.kind() == DatumType_TABLE:
+        return pyarrow_wrap_table(datum.table())
+    elif datum.kind() == DatumType_SCALAR:
+        return pyarrow_wrap_scalar(datum.scalar())
+    else:
+        raise ValueError("Unable to wrap Datum in a Python object")
+
+
+cdef _append_array_buffers(const CArrayData* ad, list res):
+    """
+    Recursively append Buffer wrappers from *ad* and its children.
+    """
+    cdef size_t i, n
+    assert ad != NULL
+    n = ad.buffers.size()
+    for i in range(n):
+        buf = ad.buffers[i]
+        res.append(pyarrow_wrap_buffer(buf)
+                   if buf.get() != NULL else None)
+    n = ad.child_data.size()
+    for i in range(n):
+        _append_array_buffers(ad.child_data[i].get(), res)
+
+
+cdef _reduce_array_data(const CArrayData* ad):
+    """
+    Recursively dissect ArrayData to (pickable) tuples.
+    """
+    cdef size_t i, n
+    assert ad != NULL
+
+    n = ad.buffers.size()
+    buffers = []
+    for i in range(n):
+        buf = ad.buffers[i]
+        buffers.append(pyarrow_wrap_buffer(buf)
+                       if buf.get() != NULL else None)
+
+    children = []
+    n = ad.child_data.size()
+    for i in range(n):
+        children.append(_reduce_array_data(ad.child_data[i].get()))
+
+    if ad.dictionary.get() != NULL:
+        dictionary = _reduce_array_data(ad.dictionary.get())
+    else:
+        dictionary = None
+
+    return pyarrow_wrap_data_type(ad.type), ad.length, ad.null_count, \
+        ad.offset, buffers, children, dictionary
+
+
+cdef shared_ptr[CArrayData] _reconstruct_array_data(data):
+    """
+    Reconstruct CArrayData objects from the tuple structure generated
+    by _reduce_array_data.
+    """
+    cdef:
+        int64_t length, null_count, offset, i
+        DataType dtype
+        Buffer buf
+        vector[shared_ptr[CBuffer]] c_buffers
+        vector[shared_ptr[CArrayData]] c_children
+        shared_ptr[CArrayData] c_dictionary
+
+    dtype, length, null_count, offset, buffers, children, dictionary = data
+
+    for i in range(len(buffers)):
+        buf = buffers[i]
+        if buf is None:
+            c_buffers.push_back(shared_ptr[CBuffer]())
+        else:
+            c_buffers.push_back(buf.buffer)
+
+    for i in range(len(children)):
+        c_children.push_back(_reconstruct_array_data(children[i]))
+
+    if dictionary is not None:
+        c_dictionary = _reconstruct_array_data(dictionary)
+
+    return CArrayData.MakeWithChildrenAndDictionary(
+        dtype.sp_type,
+        length,
+        c_buffers,
+        c_children,
+        c_dictionary,
+        null_count,
+        offset)
+
+
+def _restore_array(data):
+    """
+    Reconstruct an Array from pickled ArrayData.
+    """
+    cdef shared_ptr[CArrayData] ad = _reconstruct_array_data(data)
+    return pyarrow_wrap_array(MakeArray(ad))
+
+
+cdef class _PandasConvertible(_Weakrefable):
+
+    def to_pandas(
+            self,
+            memory_pool=None,
+            categories=None,
+            bint strings_to_categorical=False,
+            bint zero_copy_only=False,
+            bint integer_object_nulls=False,
+            bint date_as_object=True,
+            bint timestamp_as_object=False,
+            bint use_threads=True,
+            bint deduplicate_objects=True,
+            bint ignore_metadata=False,
+            bint safe=True,
+            bint split_blocks=False,
+            bint self_destruct=False,
+            types_mapper=None
+    ):
+        """
+        Convert to a pandas-compatible NumPy array or DataFrame, as appropriate
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            Arrow MemoryPool to use for allocations. Uses the default memory
+            pool is not passed.
+        strings_to_categorical : bool, default False
+            Encode string (UTF8) and binary types to pandas.Categorical.
+        categories: list, default empty
+            List of fields that should be returned as pandas.Categorical. Only
+            applies to table-like data structures.
+        zero_copy_only : bool, default False
+            Raise an ArrowException if this function call would require copying
+            the underlying data.
+        integer_object_nulls : bool, default False
+            Cast integers with nulls to objects
+        date_as_object : bool, default True
+            Cast dates to objects. If False, convert to datetime64[ns] dtype.
+        timestamp_as_object : bool, default False
+            Cast non-nanosecond timestamps (np.datetime64) to objects. This is
+            useful if you have timestamps that don't fit in the normal date
+            range of nanosecond timestamps (1678 CE-2262 CE).
+            If False, all timestamps are converted to datetime64[ns] dtype.
+        use_threads: bool, default True
+            Whether to parallelize the conversion using multiple threads.
+        deduplicate_objects : bool, default False
+            Do not create multiple copies Python objects when created, to save
+            on memory use. Conversion will be slower.
+        ignore_metadata : bool, default False
+            If True, do not use the 'pandas' metadata to reconstruct the
+            DataFrame index, if present
+        safe : bool, default True
+            For certain data types, a cast is needed in order to store the
+            data in a pandas DataFrame or Series (e.g. timestamps are always
+            stored as nanoseconds in pandas). This option controls whether it
+            is a safe cast or not.
+        split_blocks : bool, default False
+            If True, generate one internal "block" for each column when
+            creating a pandas.DataFrame from a RecordBatch or Table. While this
+            can temporarily reduce memory note that various pandas operations
+            can trigger "consolidation" which may balloon memory use.
+        self_destruct : bool, default False
+            EXPERIMENTAL: If True, attempt to deallocate the originating Arrow
+            memory while converting the Arrow object to pandas. If you use the
+            object after calling to_pandas with this option it will crash your
+            program.
+
+            Note that you may not see always memory usage improvements. For
+            example, if multiple columns share an underlying allocation,
+            memory can't be freed until all columns are converted.
+        types_mapper : function, default None
+            A function mapping a pyarrow DataType to a pandas ExtensionDtype.
+            This can be used to override the default pandas type for conversion
+            of built-in pyarrow types or in absence of pandas_metadata in the
+            Table schema. The function receives a pyarrow DataType and is
+            expected to return a pandas ExtensionDtype or ``None`` if the
+            default conversion should be used for that type. If you have
+            a dictionary mapping, you can pass ``dict.get`` as function.
+
+        Returns
+        -------
+        pandas.Series or pandas.DataFrame depending on type of object
+        """
+        options = dict(
+            pool=memory_pool,
+            strings_to_categorical=strings_to_categorical,
+            zero_copy_only=zero_copy_only,
+            integer_object_nulls=integer_object_nulls,
+            date_as_object=date_as_object,
+            timestamp_as_object=timestamp_as_object,
+            use_threads=use_threads,
+            deduplicate_objects=deduplicate_objects,
+            safe=safe,
+            split_blocks=split_blocks,
+            self_destruct=self_destruct
+        )
+        return self._to_pandas(options, categories=categories,
+                               ignore_metadata=ignore_metadata,
+                               types_mapper=types_mapper)
+
+
+cdef PandasOptions _convert_pandas_options(dict options):
+    cdef PandasOptions result
+    result.pool = maybe_unbox_memory_pool(options['pool'])
+    result.strings_to_categorical = options['strings_to_categorical']
+    result.zero_copy_only = options['zero_copy_only']
+    result.integer_object_nulls = options['integer_object_nulls']
+    result.date_as_object = options['date_as_object']
+    result.timestamp_as_object = options['timestamp_as_object']
+    result.use_threads = options['use_threads']
+    result.deduplicate_objects = options['deduplicate_objects']
+    result.safe_cast = options['safe']
+    result.split_blocks = options['split_blocks']
+    result.self_destruct = options['self_destruct']
+    result.ignore_timezone = os.environ.get('PYARROW_IGNORE_TIMEZONE', False)
+    return result
+
+
+cdef class Array(_PandasConvertible):
+    """
+    The base class for all Arrow arrays.
+    """
+
+    def __init__(self):
+        raise TypeError("Do not call {}'s constructor directly, use one of "
+                        "the `pyarrow.Array.from_*` functions instead."
+                        .format(self.__class__.__name__))
+
+    cdef void init(self, const shared_ptr[CArray]& sp_array) except *:
+        self.sp_array = sp_array
+        self.ap = sp_array.get()
+        self.type = pyarrow_wrap_data_type(self.sp_array.get().type())
+
+    def _debug_print(self):
+        with nogil:
+            check_status(DebugPrint(deref(self.ap), 0))
+
+    def diff(self, Array other):
+        """
+        Compare contents of this array against another one.
+
+        Return string containing the result of arrow::Diff comparing contents
+        of this array against the other array.
+        """
+        cdef c_string result
+        with nogil:
+            result = self.ap.Diff(deref(other.ap))
+        return frombytes(result, safe=True)
+
+    def cast(self, object target_type, safe=True):
+        """
+        Cast array values to another data type
+
+        See pyarrow.compute.cast for usage
+        """
+        return _pc().cast(self, target_type, safe=safe)
+
+    def view(self, object target_type):
+        """
+        Return zero-copy "view" of array as another data type.
+
+        The data types must have compatible columnar buffer layouts
+
+        Parameters
+        ----------
+        target_type : DataType
+            Type to construct view as.
+
+        Returns
+        -------
+        view : Array
+        """
+        cdef DataType type = ensure_type(target_type)
+        cdef shared_ptr[CArray] result
+        with nogil:
+            result = GetResultValue(self.ap.View(type.sp_type))
+        return pyarrow_wrap_array(result)
+
+    def sum(self, **kwargs):
+        """
+        Sum the values in a numerical array.
+        """
+        options = _pc().ScalarAggregateOptions(**kwargs)
+        return _pc().call_function('sum', [self], options)
+
+    def unique(self):
+        """
+        Compute distinct elements in array.
+        """
+        return _pc().call_function('unique', [self])
+
+    def dictionary_encode(self, null_encoding='mask'):
+        """
+        Compute dictionary-encoded representation of array.
+        """
+        options = _pc().DictionaryEncodeOptions(null_encoding)
+        return _pc().call_function('dictionary_encode', [self], options)
+
+    def value_counts(self):
+        """
+        Compute counts of unique elements in array.
+
+        Returns
+        -------
+        An array of  <input type "Values", int64_t "Counts"> structs
+        """
+        return _pc().call_function('value_counts', [self])
+
+    @staticmethod
+    def from_pandas(obj, mask=None, type=None, bint safe=True,
+                    MemoryPool memory_pool=None):
+        """
+        Convert pandas.Series to an Arrow Array.
+
+        This method uses Pandas semantics about what values indicate
+        nulls. See pyarrow.array for more general conversion from arrays or
+        sequences to Arrow arrays.
+
+        Parameters
+        ----------
+        obj : ndarray, pandas.Series, array-like
+        mask : array (boolean), optional
+            Indicate which values are null (True) or not null (False).
+        type : pyarrow.DataType
+            Explicit type to attempt to coerce to, otherwise will be inferred
+            from the data.
+        safe : bool, default True
+            Check for overflows or other unsafe conversions.
+        memory_pool : pyarrow.MemoryPool, optional
+            If not passed, will allocate memory from the currently-set default
+            memory pool.
+
+        Notes
+        -----
+        Localized timestamps will currently be returned as UTC (pandas's native
+        representation). Timezone-naive data will be implicitly interpreted as
+        UTC.
+
+        Returns
+        -------
+        array : pyarrow.Array or pyarrow.ChunkedArray
+            ChunkedArray is returned if object data overflows binary buffer.
+        """
+        return array(obj, mask=mask, type=type, safe=safe, from_pandas=True,
+                     memory_pool=memory_pool)
+
+    def __reduce__(self):
+        return _restore_array, \
+            (_reduce_array_data(self.sp_array.get().data().get()),)
+
+    @staticmethod
+    def from_buffers(DataType type, length, buffers, null_count=-1, offset=0,
+                     children=None):
+        """
+        Construct an Array from a sequence of buffers.
+
+        The concrete type returned depends on the datatype.
+
+        Parameters
+        ----------
+        type : DataType
+            The value type of the array.
+        length : int
+            The number of values in the array.
+        buffers : List[Buffer]
+            The buffers backing this array.
+        null_count : int, default -1
+            The number of null entries in the array. Negative value means that
+            the null count is not known.
+        offset : int, default 0
+            The array's logical offset (in values, not in bytes) from the
+            start of each buffer.
+        children : List[Array], default None
+            Nested type children with length matching type.num_fields.
+
+        Returns
+        -------
+        array : Array
+        """
+        cdef:
+            Buffer buf
+            Array child
+            vector[shared_ptr[CBuffer]] c_buffers
+            vector[shared_ptr[CArrayData]] c_child_data
+            shared_ptr[CArrayData] array_data
+
+        children = children or []
+
+        if type.num_fields != len(children):
+            raise ValueError("Type's expected number of children "
+                             "({0}) did not match the passed number "
+                             "({1}).".format(type.num_fields, len(children)))
+
+        if type.num_buffers != len(buffers):
+            raise ValueError("Type's expected number of buffers "
+                             "({0}) did not match the passed number "
+                             "({1}).".format(type.num_buffers, len(buffers)))
+
+        for buf in buffers:
+            # None will produce a null buffer pointer
+            c_buffers.push_back(pyarrow_unwrap_buffer(buf))
+
+        for child in children:
+            c_child_data.push_back(child.ap.data())
+
+        array_data = CArrayData.MakeWithChildren(type.sp_type, length,
+                                                 c_buffers, c_child_data,
+                                                 null_count, offset)
+        cdef Array result = pyarrow_wrap_array(MakeArray(array_data))
+        result.validate()
+        return result
+
+    @property
+    def null_count(self):
+        return self.sp_array.get().null_count()
+
+    @property
+    def nbytes(self):
+        """
+        Total number of bytes consumed by the elements of the array.
+        """
+        size = 0
+        for buf in self.buffers():
+            if buf is not None:
+                size += buf.size
+        return size
+
+    def __sizeof__(self):
+        return super(Array, self).__sizeof__() + self.nbytes
+
+    def __iter__(self):
+        for i in range(len(self)):
+            yield self.getitem(i)
+
+    def __repr__(self):
+        type_format = object.__repr__(self)
+        return '{0}\n{1}'.format(type_format, str(self))
+
+    def to_string(self, *, int indent=0, int window=10,
+                  c_bool skip_new_lines=False):
+        """
+        Render a "pretty-printed" string representation of the Array.
+
+        Parameters
+        ----------
+        indent : int
+            How much to indent right the content of the array,
+            by default ``0``.
+        window : int
+            How many items to preview at the begin and end
+            of the array when the arrays is bigger than the window.
+            The other elements will be ellipsed.
+        skip_new_lines : bool
+            If the array should be rendered as a single line of text
+            or if each element should be on its own line.
+        """
+        cdef:
+            c_string result
+            PrettyPrintOptions options
+
+        with nogil:
+            options = PrettyPrintOptions(indent, window)
+            options.skip_new_lines = skip_new_lines
+            check_status(
+                PrettyPrint(
+                    deref(self.ap),
+                    options,
+                    &result
+                )
+            )
+
+        return frombytes(result, safe=True)
+
+    def format(self, **kwargs):
+        import warnings
+        warnings.warn('Array.format is deprecated, use Array.to_string')
+        return self.to_string(**kwargs)
+
+    def __str__(self):
+        return self.to_string()
+
+    def __eq__(self, other):
+        try:
+            return self.equals(other)
+        except TypeError:
+            # This also handles comparing with None
+            # as Array.equals(None) raises a TypeError.
+            return NotImplemented
+
+    def equals(Array self, Array other not None):
+        return self.ap.Equals(deref(other.ap))
+
+    def __len__(self):
+        return self.length()
+
+    cdef int64_t length(self):
+        if self.sp_array.get():
+            return self.sp_array.get().length()
+        else:
+            return 0
+
+    def is_null(self, *, nan_is_null=False):
+        """
+        Return BooleanArray indicating the null values.
+
+        Parameters
+        ----------
+        nan_is_null : bool (optional, default False)
+            Whether floating-point NaN values should also be considered null.
+
+        Returns
+        -------
+        array : boolean Array
+        """
+        options = _pc().NullOptions(nan_is_null=nan_is_null)
+        return _pc().call_function('is_null', [self], options)
+
+    def is_valid(self):
+        """
+        Return BooleanArray indicating the non-null values.
+        """
+        return _pc().is_valid(self)
+
+    def fill_null(self, fill_value):
+        """
+        See pyarrow.compute.fill_null for usage.
+        """
+        return _pc().fill_null(self, fill_value)
+
+    def __getitem__(self, key):
+        """
+        Slice or return value at given index
+
+        Parameters
+        ----------
+        key : integer or slice
+            Slices with step not equal to 1 (or None) will produce a copy
+            rather than a zero-copy view
+
+        Returns
+        -------
+        value : Scalar (index) or Array (slice)
+        """
+        if PySlice_Check(key):
+            return _normalize_slice(self, key)
+
+        return self.getitem(_normalize_index(key, self.length()))
+
+    cdef getitem(self, int64_t i):
+        return Scalar.wrap(GetResultValue(self.ap.GetScalar(i)))
+
+    def slice(self, offset=0, length=None):
+        """
+        Compute zero-copy slice of this array.
+
+        Parameters
+        ----------
+        offset : int, default 0
+            Offset from start of array to slice.
+        length : int, default None
+            Length of slice (default is until end of Array starting from
+            offset).
+
+        Returns
+        -------
+        sliced : RecordBatch
+        """
+        cdef:
+            shared_ptr[CArray] result
+
+        if offset < 0:
+            raise IndexError('Offset must be non-negative')
+
+        offset = min(len(self), offset)
+        if length is None:
+            result = self.ap.Slice(offset)
+        else:
+            if length < 0:
+                raise ValueError('Length must be non-negative')
+            result = self.ap.Slice(offset, length)
+
+        return pyarrow_wrap_array(result)
+
+    def take(self, object indices):
+        """
+        Select values from an array. See pyarrow.compute.take for full usage.
+        """
+        return _pc().take(self, indices)
+
+    def drop_null(self):
+        """
+        Remove missing values from an array.
+        """
+        return _pc().drop_null(self)
+
+    def filter(self, Array mask, *, null_selection_behavior='drop'):
+        """
+        Select values from an array. See pyarrow.compute.filter for full usage.
+        """
+        return _pc().filter(self, mask,
+                            null_selection_behavior=null_selection_behavior)
+
+    def index(self, value, start=None, end=None, *, memory_pool=None):
+        """
+        Find the first index of a value.
+
+        See pyarrow.compute.index for full usage.
+        """
+        return _pc().index(self, value, start, end, memory_pool=memory_pool)
+
+    def _to_pandas(self, options, **kwargs):
+        return _array_like_to_pandas(self, options)
+
+    def __array__(self, dtype=None):
+        values = self.to_numpy(zero_copy_only=False)
+        if dtype is None:
+            return values
+        return values.astype(dtype)
+
+    def to_numpy(self, zero_copy_only=True, writable=False):
+        """
+        Return a NumPy view or copy of this array (experimental).
+
+        By default, tries to return a view of this array. This is only
+        supported for primitive arrays with the same memory layout as NumPy
+        (i.e. integers, floating point, ..) and without any nulls.
+
+        Parameters
+        ----------
+        zero_copy_only : bool, default True
+            If True, an exception will be raised if the conversion to a numpy
+            array would require copying the underlying data (e.g. in presence
+            of nulls, or for non-primitive types).
+        writable : bool, default False
+            For numpy arrays created with zero copy (view on the Arrow data),
+            the resulting array is not writable (Arrow data is immutable).
+            By setting this to True, a copy of the array is made to ensure
+            it is writable.
+
+        Returns
+        -------
+        array : numpy.ndarray
+        """
+        cdef:
+            PyObject* out
+            PandasOptions c_options
+            object values
+
+        if zero_copy_only and writable:
+            raise ValueError(
+                "Cannot return a writable array if asking for zero-copy")
+
+        # If there are nulls and the array is a DictionaryArray
+        # decoding the dictionary will make sure nulls are correctly handled.
+        # Decoding a dictionary does imply a copy by the way,
+        # so it can't be done if the user requested a zero_copy.
+        c_options.decode_dictionaries = not zero_copy_only
+        c_options.zero_copy_only = zero_copy_only
+
+        with nogil:
+            check_status(ConvertArrayToPandas(c_options, self.sp_array,
+                                              self, &out))
+
+        # wrap_array_output uses pandas to convert to Categorical, here
+        # always convert to numpy array without pandas dependency
+        array = PyObject_to_object(out)
+
+        if isinstance(array, dict):
+            array = np.take(array['dictionary'], array['indices'])
+
+        if writable and not array.flags.writeable:
+            # if the conversion already needed to a copy, writeable is True
+            array = array.copy()
+        return array
+
+    def to_pylist(self):
+        """
+        Convert to a list of native Python objects.
+
+        Returns
+        -------
+        lst : list
+        """
+        return [x.as_py() for x in self]
+
+    def tolist(self):
+        """
+        Alias of to_pylist for compatibility with NumPy.
+        """
+        return self.to_pylist()
+
+    def validate(self, *, full=False):
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially O(n)).
+
+        Parameters
+        ----------
+        full: bool, default False
+            If True, run expensive checks, otherwise cheap checks only.
+
+        Raises
+        ------
+        ArrowInvalid
+        """
+        if full:
+            with nogil:
+                check_status(self.ap.ValidateFull())
+        else:
+            with nogil:
+                check_status(self.ap.Validate())
+
+    @property
+    def offset(self):
+        """
+        A relative position into another array's data.
+
+        The purpose is to enable zero-copy slicing. This value defaults to zero
+        but must be applied on all operations with the physical storage
+        buffers.
+        """
+        return self.sp_array.get().offset()
+
+    def buffers(self):
+        """
+        Return a list of Buffer objects pointing to this array's physical
+        storage.
+
+        To correctly interpret these buffers, you need to also apply the offset
+        multiplied with the size of the stored data type.
+        """
+        res = []
+        _append_array_buffers(self.sp_array.get().data().get(), res)
+        return res
+
+    def _export_to_c(self, uintptr_t out_ptr, uintptr_t out_schema_ptr=0):
+        """
+        Export to a C ArrowArray struct, given its pointer.
+
+        If a C ArrowSchema struct pointer is also given, the array type
+        is exported to it at the same time.
+
+        Parameters
+        ----------
+        out_ptr: int
+            The raw pointer to a C ArrowArray struct.
+        out_schema_ptr: int (optional)
+            The raw pointer to a C ArrowSchema struct.
+
+        Be careful: if you don't pass the ArrowArray struct to a consumer,
+        array memory will leak.  This is a low-level function intended for
+        expert users.
+        """
+        with nogil:
+            check_status(ExportArray(deref(self.sp_array),
+                                     <ArrowArray*> out_ptr,
+                                     <ArrowSchema*> out_schema_ptr))
+
+    @staticmethod
+    def _import_from_c(uintptr_t in_ptr, type):
+        """
+        Import Array from a C ArrowArray struct, given its pointer
+        and the imported array type.
+
+        Parameters
+        ----------
+        in_ptr: int
+            The raw pointer to a C ArrowArray struct.
+        type: DataType or int
+            Either a DataType object, or the raw pointer to a C ArrowSchema
+            struct.
+
+        This is a low-level function intended for expert users.
+        """
+        cdef:
+            shared_ptr[CArray] c_array
+
+        c_type = pyarrow_unwrap_data_type(type)
+        if c_type == nullptr:
+            # Not a DataType object, perhaps a raw ArrowSchema pointer
+            type_ptr = <uintptr_t> type
+            with nogil:
+                c_array = GetResultValue(ImportArray(<ArrowArray*> in_ptr,
+                                                     <ArrowSchema*> type_ptr))
+        else:
+            with nogil:
+                c_array = GetResultValue(ImportArray(<ArrowArray*> in_ptr,
+                                                     c_type))
+        return pyarrow_wrap_array(c_array)
+
+
+cdef _array_like_to_pandas(obj, options):
+    cdef:
+        PyObject* out
+        PandasOptions c_options = _convert_pandas_options(options)
+
+    original_type = obj.type
+    name = obj._name
+
+    # ARROW-3789(wesm): Convert date/timestamp types to datetime64[ns]
+    c_options.coerce_temporal_nanoseconds = True
+
+    if isinstance(obj, Array):
+        with nogil:
+            check_status(ConvertArrayToPandas(c_options,
+                                              (<Array> obj).sp_array,
+                                              obj, &out))
+    elif isinstance(obj, ChunkedArray):
+        with nogil:
+            check_status(libarrow.ConvertChunkedArrayToPandas(
+                c_options,
+                (<ChunkedArray> obj).sp_chunked_array,
+                obj, &out))
+
+    arr = wrap_array_output(out)
+
+    if (isinstance(original_type, TimestampType) and
+            options["timestamp_as_object"]):
+        # ARROW-5359 - need to specify object dtype to avoid pandas to
+        # coerce back to ns resolution
+        dtype = "object"
+    else:
+        dtype = None
+
+    result = pandas_api.series(arr, dtype=dtype, name=name)
+
+    if (isinstance(original_type, TimestampType) and
+            original_type.tz is not None and
+            # can be object dtype for non-ns and timestamp_as_object=True
+            result.dtype.kind == "M"):
+        from pyarrow.pandas_compat import make_tz_aware
+        result = make_tz_aware(result, original_type.tz)
+
+    return result
+
+
+cdef wrap_array_output(PyObject* output):
+    cdef object obj = PyObject_to_object(output)
+
+    if isinstance(obj, dict):
+        return pandas_api.categorical_type(obj['indices'],
+                                           categories=obj['dictionary'],
+                                           ordered=obj['ordered'],
+                                           fastpath=True)
+    else:
+        return obj
+
+
+cdef class NullArray(Array):
+    """
+    Concrete class for Arrow arrays of null data type.
+    """
+
+
+cdef class BooleanArray(Array):
+    """
+    Concrete class for Arrow arrays of boolean data type.
+    """
+    @property
+    def false_count(self):
+        return (<CBooleanArray*> self.ap).false_count()
+
+    @property
+    def true_count(self):
+        return (<CBooleanArray*> self.ap).true_count()
+
+
+cdef class NumericArray(Array):
+    """
+    A base class for Arrow numeric arrays.
+    """
+
+
+cdef class IntegerArray(NumericArray):
+    """
+    A base class for Arrow integer arrays.
+    """
+
+
+cdef class FloatingPointArray(NumericArray):
+    """
+    A base class for Arrow floating-point arrays.
+    """
+
+
+cdef class Int8Array(IntegerArray):
+    """
+    Concrete class for Arrow arrays of int8 data type.
+    """
+
+
+cdef class UInt8Array(IntegerArray):
+    """
+    Concrete class for Arrow arrays of uint8 data type.
+    """
+
+
+cdef class Int16Array(IntegerArray):
+    """
+    Concrete class for Arrow arrays of int16 data type.
+    """
+
+
+cdef class UInt16Array(IntegerArray):
+    """
+    Concrete class for Arrow arrays of uint16 data type.
+    """
+
+
+cdef class Int32Array(IntegerArray):
+    """
+    Concrete class for Arrow arrays of int32 data type.
+    """
+
+
+cdef class UInt32Array(IntegerArray):
+    """
+    Concrete class for Arrow arrays of uint32 data type.
+    """
+
+
+cdef class Int64Array(IntegerArray):
+    """
+    Concrete class for Arrow arrays of int64 data type.
+    """
+
+
+cdef class UInt64Array(IntegerArray):
+    """
+    Concrete class for Arrow arrays of uint64 data type.
+    """
+
+
+cdef class Date32Array(NumericArray):
+    """
+    Concrete class for Arrow arrays of date32 data type.
+    """
+
+
+cdef class Date64Array(NumericArray):
+    """
+    Concrete class for Arrow arrays of date64 data type.
+    """
+
+
+cdef class TimestampArray(NumericArray):
+    """
+    Concrete class for Arrow arrays of timestamp data type.
+    """
+
+
+cdef class Time32Array(NumericArray):
+    """
+    Concrete class for Arrow arrays of time32 data type.
+    """
+
+
+cdef class Time64Array(NumericArray):
+    """
+    Concrete class for Arrow arrays of time64 data type.
+    """
+
+
+cdef class DurationArray(NumericArray):
+    """
+    Concrete class for Arrow arrays of duration data type.
+    """
+
+
+cdef class MonthDayNanoIntervalArray(Array):
+    """
+    Concrete class for Arrow arrays of interval[MonthDayNano] type.
+    """
+
+    def to_pylist(self):
+        """
+        Convert to a list of native Python objects.
+
+        pyarrow.MonthDayNano is used as the native representation.
+
+        Returns
+        -------
+        lst : list
+        """
+        cdef:
+            CResult[PyObject*] maybe_py_list
+            PyObject* py_list
+            CMonthDayNanoIntervalArray* array
+        array = <CMonthDayNanoIntervalArray*>self.sp_array.get()
+        maybe_py_list = MonthDayNanoIntervalArrayToPyList(deref(array))
+        py_list = GetResultValue(maybe_py_list)
+        return PyObject_to_object(py_list)
+
+
+cdef class HalfFloatArray(FloatingPointArray):
+    """
+    Concrete class for Arrow arrays of float16 data type.
+    """
+
+
+cdef class FloatArray(FloatingPointArray):
+    """
+    Concrete class for Arrow arrays of float32 data type.
+    """
+
+
+cdef class DoubleArray(FloatingPointArray):
+    """
+    Concrete class for Arrow arrays of float64 data type.
+    """
+
+
+cdef class FixedSizeBinaryArray(Array):
+    """
+    Concrete class for Arrow arrays of a fixed-size binary data type.
+    """
+
+
+cdef class Decimal128Array(FixedSizeBinaryArray):
+    """
+    Concrete class for Arrow arrays of decimal128 data type.
+    """
+
+
+cdef class Decimal256Array(FixedSizeBinaryArray):
+    """
+    Concrete class for Arrow arrays of decimal256 data type.
+    """
+
+cdef class BaseListArray(Array):
+
+    def flatten(self):
+        """
+        Unnest this ListArray/LargeListArray by one level.
+
+        The returned Array is logically a concatenation of all the sub-lists
+        in this Array.
+
+        Note that this method is different from ``self.values()`` in that
+        it takes care of the slicing offset as well as null elements backed
+        by non-empty sub-lists.
+
+        Returns
+        -------
+        result : Array
+        """
+        return _pc().list_flatten(self)
+
+    def value_parent_indices(self):
+        """
+        Return array of same length as list child values array where each
+        output value is the index of the parent list array slot containing each
+        child value.
+
+        Examples
+        --------
+        >>> arr = pa.array([[1, 2, 3], [], None, [4]],
+        ...                type=pa.list_(pa.int32()))
+        >>> arr.value_parent_indices()
+        <pyarrow.lib.Int32Array object at 0x7efc5db958a0>
+        [
+          0,
+          0,
+          0,
+          3
+        ]
+        """
+        return _pc().list_parent_indices(self)
+
+    def value_lengths(self):
+        """
+        Return integers array with values equal to the respective length of
+        each list element. Null list values are null in the output.
+
+        Examples
+        --------
+        >>> arr = pa.array([[1, 2, 3], [], None, [4]],
+        ...                type=pa.list_(pa.int32()))
+        >>> arr.value_lengths()
+        <pyarrow.lib.Int32Array object at 0x7efc5db95910>
+        [
+          3,
+          0,
+          null,
+          1
+        ]
+        """
+        return _pc().list_value_length(self)
+
+
+cdef class ListArray(BaseListArray):
+    """
+    Concrete class for Arrow arrays of a list data type.
+    """
+
+    @staticmethod
+    def from_arrays(offsets, values, MemoryPool pool=None):
+        """
+        Construct ListArray from arrays of int32 offsets and values.
+
+        Parameters
+        ----------
+        offsets : Array (int32 type)
+        values : Array (any type)
+        pool : MemoryPool
+
+        Returns
+        -------
+        list_array : ListArray
+
+        Examples
+        --------
+        >>> values = pa.array([1, 2, 3, 4])
+        >>> offsets = pa.array([0, 2, 4])
+        >>> pa.ListArray.from_arrays(offsets, values)
+        <pyarrow.lib.ListArray object at 0x7fbde226bf40>
+        [
+          [
+            0,
+            1
+          ],
+          [
+            2,
+            3
+          ]
+        ]
+        # nulls in the offsets array become null lists
+        >>> offsets = pa.array([0, None, 2, 4])
+        >>> pa.ListArray.from_arrays(offsets, values)
+        <pyarrow.lib.ListArray object at 0x7fbde226bf40>
+        [
+          [
+            0,
+            1
+          ],
+          null,
+          [
+            2,
+            3
+          ]
+        ]
+        """
+        cdef:
+            Array _offsets, _values
+            shared_ptr[CArray] out
+        cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
+
+        _offsets = asarray(offsets, type='int32')
+        _values = asarray(values)
+
+        with nogil:
+            out = GetResultValue(
+                CListArray.FromArrays(_offsets.ap[0], _values.ap[0], cpool))
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+    @property
+    def values(self):
+        cdef CListArray* arr = <CListArray*> self.ap
+        return pyarrow_wrap_array(arr.values())
+
+    @property
+    def offsets(self):
+        """
+        Return the offsets as an int32 array.
+        """
+        return pyarrow_wrap_array((<CListArray*> self.ap).offsets())
+
+
+cdef class LargeListArray(BaseListArray):
+    """
+    Concrete class for Arrow arrays of a large list data type.
+
+    Identical to ListArray, but 64-bit offsets.
+    """
+
+    @staticmethod
+    def from_arrays(offsets, values, MemoryPool pool=None):
+        """
+        Construct LargeListArray from arrays of int64 offsets and values.
+
+        Parameters
+        ----------
+        offsets : Array (int64 type)
+        values : Array (any type)
+        pool : MemoryPool
+
+        Returns
+        -------
+        list_array : LargeListArray
+        """
+        cdef:
+            Array _offsets, _values
+            shared_ptr[CArray] out
+        cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
+
+        _offsets = asarray(offsets, type='int64')
+        _values = asarray(values)
+
+        with nogil:
+            out = GetResultValue(
+                CLargeListArray.FromArrays(_offsets.ap[0], _values.ap[0],
+                                           cpool))
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+    @property
+    def values(self):
+        cdef CLargeListArray* arr = <CLargeListArray*> self.ap
+        return pyarrow_wrap_array(arr.values())
+
+    @property
+    def offsets(self):
+        """
+        Return the offsets as an int64 array.
+        """
+        return pyarrow_wrap_array((<CLargeListArray*> self.ap).offsets())
+
+
+cdef class MapArray(Array):
+    """
+    Concrete class for Arrow arrays of a map data type.
+    """
+
+    @staticmethod
+    def from_arrays(offsets, keys, items, MemoryPool pool=None):
+        """
+        Construct MapArray from arrays of int32 offsets and key, item arrays.
+
+        Parameters
+        ----------
+        offsets : array-like or sequence (int32 type)
+        keys : array-like or sequence (any type)
+        items : array-like or sequence (any type)
+        pool : MemoryPool
+
+        Returns
+        -------
+        map_array : MapArray
+        """
+        cdef:
+            Array _offsets, _keys, _items
+            shared_ptr[CArray] out
+        cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool)
+
+        _offsets = asarray(offsets, type='int32')
+        _keys = asarray(keys)
+        _items = asarray(items)
+
+        with nogil:
+            out = GetResultValue(
+                CMapArray.FromArrays(_offsets.sp_array,
+                                     _keys.sp_array,
+                                     _items.sp_array, cpool))
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+    @property
+    def keys(self):
+        return pyarrow_wrap_array((<CMapArray*> self.ap).keys())
+
+    @property
+    def items(self):
+        return pyarrow_wrap_array((<CMapArray*> self.ap).items())
+
+
+cdef class FixedSizeListArray(Array):
+    """
+    Concrete class for Arrow arrays of a fixed size list data type.
+    """
+
+    @staticmethod
+    def from_arrays(values, int32_t list_size):
+        """
+        Construct FixedSizeListArray from array of values and a list length.
+
+        Parameters
+        ----------
+        values : Array (any type)
+        list_size : int
+            The fixed length of the lists.
+
+        Returns
+        -------
+        FixedSizeListArray
+        """
+        cdef:
+            Array _values
+            CResult[shared_ptr[CArray]] c_result
+
+        _values = asarray(values)
+
+        with nogil:
+            c_result = CFixedSizeListArray.FromArrays(
+                _values.sp_array, list_size)
+        cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
+        result.validate()
+        return result
+
+    @property
+    def values(self):
+        return self.flatten()
+
+    def flatten(self):
+        """
+        Unnest this FixedSizeListArray by one level.
+
+        Returns
+        -------
+        result : Array
+        """
+        cdef CFixedSizeListArray* arr = <CFixedSizeListArray*> self.ap
+        return pyarrow_wrap_array(arr.values())
+
+
+cdef class UnionArray(Array):
+    """
+    Concrete class for Arrow arrays of a Union data type.
+    """
+
+    def child(self, int pos):
+        import warnings
+        warnings.warn("child is deprecated, use field", FutureWarning)
+        return self.field(pos)
+
+    def field(self, int pos):
+        """
+        Return the given child field as an individual array.
+
+        For sparse unions, the returned array has its offset, length,
+        and null count adjusted.
+
+        For dense unions, the returned array is unchanged.
+        """
+        cdef shared_ptr[CArray] result
+        result = (<CUnionArray*> self.ap).field(pos)
+        if result != NULL:
+            return pyarrow_wrap_array(result)
+        raise KeyError("UnionArray does not have child {}".format(pos))
+
+    @property
+    def type_codes(self):
+        """Get the type codes array."""
+        buf = pyarrow_wrap_buffer((<CUnionArray*> self.ap).type_codes())
+        return Array.from_buffers(int8(), len(self), [None, buf])
+
+    @property
+    def offsets(self):
+        """
+        Get the value offsets array (dense arrays only).
+
+        Does not account for any slice offset.
+        """
+        if self.type.mode != "dense":
+            raise ArrowTypeError("Can only get value offsets for dense arrays")
+        cdef CDenseUnionArray* dense = <CDenseUnionArray*> self.ap
+        buf = pyarrow_wrap_buffer(dense.value_offsets())
+        return Array.from_buffers(int32(), len(self), [None, buf])
+
+    @staticmethod
+    def from_dense(Array types, Array value_offsets, list children,
+                   list field_names=None, list type_codes=None):
+        """
+        Construct dense UnionArray from arrays of int8 types, int32 offsets and
+        children arrays
+
+        Parameters
+        ----------
+        types : Array (int8 type)
+        value_offsets : Array (int32 type)
+        children : list
+        field_names : list
+        type_codes : list
+
+        Returns
+        -------
+        union_array : UnionArray
+        """
+        cdef:
+            shared_ptr[CArray] out
+            vector[shared_ptr[CArray]] c
+            Array child
+            vector[c_string] c_field_names
+            vector[int8_t] c_type_codes
+
+        for child in children:
+            c.push_back(child.sp_array)
+        if field_names is not None:
+            for x in field_names:
+                c_field_names.push_back(tobytes(x))
+        if type_codes is not None:
+            for x in type_codes:
+                c_type_codes.push_back(x)
+
+        with nogil:
+            out = GetResultValue(CDenseUnionArray.Make(
+                deref(types.ap), deref(value_offsets.ap), c, c_field_names,
+                c_type_codes))
+
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+    @staticmethod
+    def from_sparse(Array types, list children, list field_names=None,
+                    list type_codes=None):
+        """
+        Construct sparse UnionArray from arrays of int8 types and children
+        arrays
+
+        Parameters
+        ----------
+        types : Array (int8 type)
+        children : list
+        field_names : list
+        type_codes : list
+
+        Returns
+        -------
+        union_array : UnionArray
+        """
+        cdef:
+            shared_ptr[CArray] out
+            vector[shared_ptr[CArray]] c
+            Array child
+            vector[c_string] c_field_names
+            vector[int8_t] c_type_codes
+
+        for child in children:
+            c.push_back(child.sp_array)
+        if field_names is not None:
+            for x in field_names:
+                c_field_names.push_back(tobytes(x))
+        if type_codes is not None:
+            for x in type_codes:
+                c_type_codes.push_back(x)
+
+        with nogil:
+            out = GetResultValue(CSparseUnionArray.Make(
+                deref(types.ap), c, c_field_names, c_type_codes))
+
+        cdef Array result = pyarrow_wrap_array(out)
+        result.validate()
+        return result
+
+
+cdef class StringArray(Array):
+    """
+    Concrete class for Arrow arrays of string (or utf8) data type.
+    """
+
+    @staticmethod
+    def from_buffers(int length, Buffer value_offsets, Buffer data,
+                     Buffer null_bitmap=None, int null_count=-1,
+                     int offset=0):
+        """
+        Construct a StringArray from value_offsets and data buffers.
+        If there are nulls in the data, also a null_bitmap and the matching
+        null_count must be passed.
+
+        Parameters
+        ----------
+        length : int
+        value_offsets : Buffer
+        data : Buffer
+        null_bitmap : Buffer, optional
+        null_count : int, default 0
+        offset : int, default 0
+
+        Returns
+        -------
+        string_array : StringArray
+        """
+        return Array.from_buffers(utf8(), length,
+                                  [null_bitmap, value_offsets, data],
+                                  null_count, offset)
+
+
+cdef class LargeStringArray(Array):
+    """
+    Concrete class for Arrow arrays of large string (or utf8) data type.
+    """
+
+    @staticmethod
+    def from_buffers(int length, Buffer value_offsets, Buffer data,
+                     Buffer null_bitmap=None, int null_count=-1,
+                     int offset=0):
+        """
+        Construct a LargeStringArray from value_offsets and data buffers.
+        If there are nulls in the data, also a null_bitmap and the matching
+        null_count must be passed.
+
+        Parameters
+        ----------
+        length : int
+        value_offsets : Buffer
+        data : Buffer
+        null_bitmap : Buffer, optional
+        null_count : int, default 0
+        offset : int, default 0
+
+        Returns
+        -------
+        string_array : StringArray
+        """
+        return Array.from_buffers(large_utf8(), length,
+                                  [null_bitmap, value_offsets, data],
+                                  null_count, offset)
+
+
+cdef class BinaryArray(Array):
+    """
+    Concrete class for Arrow arrays of variable-sized binary data type.
+    """
+    @property
+    def total_values_length(self):
+        """
+        The number of bytes from beginning to end of the data buffer addressed
+        by the offsets of this BinaryArray.
+        """
+        return (<CBinaryArray*> self.ap).total_values_length()
+
+
+cdef class LargeBinaryArray(Array):
+    """
+    Concrete class for Arrow arrays of large variable-sized binary data type.
+    """
+    @property
+    def total_values_length(self):
+        """
+        The number of bytes from beginning to end of the data buffer addressed
+        by the offsets of this LargeBinaryArray.
+        """
+        return (<CLargeBinaryArray*> self.ap).total_values_length()
+
+
+cdef class DictionaryArray(Array):
+    """
+    Concrete class for dictionary-encoded Arrow arrays.
+    """
+
+    def dictionary_encode(self):
+        return self
+
+    def dictionary_decode(self):
+        """
+        Decodes the DictionaryArray to an Array.
+        """
+        return self.dictionary.take(self.indices)
+
+    @property
+    def dictionary(self):
+        cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
+
+        if self._dictionary is None:
+            self._dictionary = pyarrow_wrap_array(darr.dictionary())
+
+        return self._dictionary
+
+    @property
+    def indices(self):
+        cdef CDictionaryArray* darr = <CDictionaryArray*>(self.ap)
+
+        if self._indices is None:
+            self._indices = pyarrow_wrap_array(darr.indices())
+
+        return self._indices
+
+    @staticmethod
+    def from_arrays(indices, dictionary, mask=None, bint ordered=False,
+                    bint from_pandas=False, bint safe=True,
+                    MemoryPool memory_pool=None):
+        """
+        Construct a DictionaryArray from indices and values.
+
+        Parameters
+        ----------
+        indices : pyarrow.Array, numpy.ndarray or pandas.Series, int type
+            Non-negative integers referencing the dictionary values by zero
+            based index.
+        dictionary : pyarrow.Array, ndarray or pandas.Series
+            The array of values referenced by the indices.
+        mask : ndarray or pandas.Series, bool type
+            True values indicate that indices are actually null.
+        from_pandas : bool, default False
+            If True, the indices should be treated as though they originated in
+            a pandas.Categorical (null encoded as -1).
+        ordered : bool, default False
+            Set to True if the category values are ordered.
+        safe : bool, default True
+            If True, check that the dictionary indices are in range.
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise uses default pool.
+
+        Returns
+        -------
+        dict_array : DictionaryArray
+        """
+        cdef:
+            Array _indices, _dictionary
+            shared_ptr[CDataType] c_type
+            shared_ptr[CArray] c_result
+
+        if isinstance(indices, Array):
+            if mask is not None:
+                raise NotImplementedError(
+                    "mask not implemented with Arrow array inputs yet")
+            _indices = indices
+        else:
+            if from_pandas:
+                _indices = _codes_to_indices(indices, mask, None, memory_pool)
+            else:
+                _indices = array(indices, mask=mask, memory_pool=memory_pool)
+
+        if isinstance(dictionary, Array):
+            _dictionary = dictionary
+        else:
+            _dictionary = array(dictionary, memory_pool=memory_pool)
+
+        if not isinstance(_indices, IntegerArray):
+            raise ValueError('Indices must be integer type')
+
+        cdef c_bool c_ordered = ordered
+
+        c_type.reset(new CDictionaryType(_indices.type.sp_type,
+                                         _dictionary.sp_array.get().type(),
+                                         c_ordered))
+
+        if safe:
+            with nogil:
+                c_result = GetResultValue(
+                    CDictionaryArray.FromArrays(c_type, _indices.sp_array,
+                                                _dictionary.sp_array))
+        else:
+            c_result.reset(new CDictionaryArray(c_type, _indices.sp_array,
+                                                _dictionary.sp_array))
+
+        cdef Array result = pyarrow_wrap_array(c_result)
+        result.validate()
+        return result
+
+
+cdef class StructArray(Array):
+    """
+    Concrete class for Arrow arrays of a struct data type.
+    """
+
+    def field(self, index):
+        """
+        Retrieves the child array belonging to field.
+
+        Parameters
+        ----------
+        index : Union[int, str]
+            Index / position or name of the field.
+
+        Returns
+        -------
+        result : Array
+        """
+        cdef:
+            CStructArray* arr = <CStructArray*> self.ap
+            shared_ptr[CArray] child
+
+        if isinstance(index, (bytes, str)):
+            child = arr.GetFieldByName(tobytes(index))
+            if child == nullptr:
+                raise KeyError(index)
+        elif isinstance(index, int):
+            child = arr.field(
+                <int>_normalize_index(index, self.ap.num_fields()))
+        else:
+            raise TypeError('Expected integer or string index')
+
+        return pyarrow_wrap_array(child)
+
+    def flatten(self, MemoryPool memory_pool=None):
+        """
+        Return one individual array for each field in the struct.
+
+        Parameters
+        ----------
+        memory_pool : MemoryPool, default None
+            For memory allocations, if required, otherwise use default pool.
+
+        Returns
+        -------
+        result : List[Array]
+        """
+        cdef:
+            vector[shared_ptr[CArray]] arrays
+            CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+            CStructArray* sarr = <CStructArray*> self.ap
+
+        with nogil:
+            arrays = GetResultValue(sarr.Flatten(pool))
+
+        return [pyarrow_wrap_array(arr) for arr in arrays]
+
+    @staticmethod
+    def from_arrays(arrays, names=None, fields=None, mask=None,
+                    memory_pool=None):
+        """
+        Construct StructArray from collection of arrays representing
+        each field in the struct.
+
+        Either field names or field instances must be passed.
+
+        Parameters
+        ----------
+        arrays : sequence of Array
+        names : List[str] (optional)
+            Field names for each struct child.
+        fields : List[Field] (optional)
+            Field instances for each struct child.
+        mask : pyarrow.Array[bool] (optional)
+            Indicate which values are null (True) or not null (False).
+        memory_pool : MemoryPool (optional)
+            For memory allocations, if required, otherwise uses default pool.
+
+        Returns
+        -------
+        result : StructArray
+        """
+        cdef:
+            shared_ptr[CArray] c_array
+            shared_ptr[CBuffer] c_mask
+            vector[shared_ptr[CArray]] c_arrays
+            vector[c_string] c_names
+            vector[shared_ptr[CField]] c_fields
+            CResult[shared_ptr[CArray]] c_result
+            ssize_t num_arrays
+            ssize_t length
+            ssize_t i
+            Field py_field
+            DataType struct_type
+
+        if names is None and fields is None:
+            raise ValueError('Must pass either names or fields')
+        if names is not None and fields is not None:
+            raise ValueError('Must pass either names or fields, not both')
+
+        if mask is None:
+            c_mask = shared_ptr[CBuffer]()
+        elif isinstance(mask, Array):
+            if mask.type.id != Type_BOOL:
+                raise ValueError('Mask must be a pyarrow.Array of type bool')
+            if mask.null_count != 0:
+                raise ValueError('Mask must not contain nulls')
+            inverted_mask = _pc().invert(mask, memory_pool=memory_pool)
+            c_mask = pyarrow_unwrap_buffer(inverted_mask.buffers()[1])
+        else:
+            raise ValueError('Mask must be a pyarrow.Array of type bool')
+
+        arrays = [asarray(x) for x in arrays]
+        for arr in arrays:
+            c_array = pyarrow_unwrap_array(arr)
+            if c_array == nullptr:
+                raise TypeError(f"Expected Array, got {arr.__class__}")
+            c_arrays.push_back(c_array)
+        if names is not None:
+            for name in names:
+                c_names.push_back(tobytes(name))
+        else:
+            for item in fields:
+                if isinstance(item, tuple):
+                    py_field = field(*item)
+                else:
+                    py_field = item
+                c_fields.push_back(py_field.sp_field)
+
+        if (c_arrays.size() == 0 and c_names.size() == 0 and
+                c_fields.size() == 0):
+            # The C++ side doesn't allow this
+            return array([], struct([]))
+
+        if names is not None:
+            # XXX Cannot pass "nullptr" for a shared_ptr<T> argument:
+            # https://github.com/cython/cython/issues/3020
+            c_result = CStructArray.MakeFromFieldNames(
+                c_arrays, c_names, c_mask, -1, 0)
+        else:
+            c_result = CStructArray.MakeFromFields(
+                c_arrays, c_fields, c_mask, -1, 0)
+        cdef Array result = pyarrow_wrap_array(GetResultValue(c_result))
+        result.validate()
+        return result
+
+
+cdef class ExtensionArray(Array):
+    """
+    Concrete class for Arrow extension arrays.
+    """
+
+    @property
+    def storage(self):
+        cdef:
+            CExtensionArray* ext_array = <CExtensionArray*>(self.ap)
+
+        return pyarrow_wrap_array(ext_array.storage())
+
+    @staticmethod
+    def from_storage(BaseExtensionType typ, Array storage):
+        """
+        Construct ExtensionArray from type and storage array.
+
+        Parameters
+        ----------
+        typ : DataType
+            The extension type for the result array.
+        storage : Array
+            The underlying storage for the result array.
+
+        Returns
+        -------
+        ext_array : ExtensionArray
+        """
+        cdef:
+            shared_ptr[CExtensionArray] ext_array
+
+        if storage.type != typ.storage_type:
+            raise TypeError("Incompatible storage type {0} "
+                            "for extension type {1}".format(storage.type, typ))
+
+        ext_array = make_shared[CExtensionArray](typ.sp_type, storage.sp_array)
+        cdef Array result = pyarrow_wrap_array(<shared_ptr[CArray]> ext_array)
+        result.validate()
+        return result
+
+    def _to_pandas(self, options, **kwargs):
+        pandas_dtype = None
+        try:
+            pandas_dtype = self.type.to_pandas_dtype()
+        except NotImplementedError:
+            pass
+
+        # pandas ExtensionDtype that implements conversion from pyarrow
+        if hasattr(pandas_dtype, '__from_arrow__'):
+            arr = pandas_dtype.__from_arrow__(self)
+            return pandas_api.series(arr)
+
+        # otherwise convert the storage array with the base implementation
+        return Array._to_pandas(self.storage, options, **kwargs)
+
+    def to_numpy(self, **kwargs):
+        """
+        Convert extension array to a numpy ndarray.
+
+        See Also
+        --------
+        Array.to_numpy
+        """
+        return self.storage.to_numpy(**kwargs)
+
+
+cdef dict _array_classes = {
+    _Type_NA: NullArray,
+    _Type_BOOL: BooleanArray,
+    _Type_UINT8: UInt8Array,
+    _Type_UINT16: UInt16Array,
+    _Type_UINT32: UInt32Array,
+    _Type_UINT64: UInt64Array,
+    _Type_INT8: Int8Array,
+    _Type_INT16: Int16Array,
+    _Type_INT32: Int32Array,
+    _Type_INT64: Int64Array,
+    _Type_DATE32: Date32Array,
+    _Type_DATE64: Date64Array,
+    _Type_TIMESTAMP: TimestampArray,
+    _Type_TIME32: Time32Array,
+    _Type_TIME64: Time64Array,
+    _Type_DURATION: DurationArray,
+    _Type_INTERVAL_MONTH_DAY_NANO: MonthDayNanoIntervalArray,
+    _Type_HALF_FLOAT: HalfFloatArray,
+    _Type_FLOAT: FloatArray,
+    _Type_DOUBLE: DoubleArray,
+    _Type_LIST: ListArray,
+    _Type_LARGE_LIST: LargeListArray,
+    _Type_MAP: MapArray,
+    _Type_FIXED_SIZE_LIST: FixedSizeListArray,
+    _Type_SPARSE_UNION: UnionArray,
+    _Type_DENSE_UNION: UnionArray,
+    _Type_BINARY: BinaryArray,
+    _Type_STRING: StringArray,
+    _Type_LARGE_BINARY: LargeBinaryArray,
+    _Type_LARGE_STRING: LargeStringArray,
+    _Type_DICTIONARY: DictionaryArray,
+    _Type_FIXED_SIZE_BINARY: FixedSizeBinaryArray,
+    _Type_DECIMAL128: Decimal128Array,
+    _Type_DECIMAL256: Decimal256Array,
+    _Type_STRUCT: StructArray,
+    _Type_EXTENSION: ExtensionArray,
+}
+
+
+cdef object get_array_class_from_type(
+        const shared_ptr[CDataType]& sp_data_type):
+    cdef CDataType* data_type = sp_data_type.get()
+    if data_type == NULL:
+        raise ValueError('Array data type was NULL')
+
+    if data_type.id() == _Type_EXTENSION:
+        py_ext_data_type = pyarrow_wrap_data_type(sp_data_type)
+        return py_ext_data_type.__arrow_ext_class__()
+    else:
+        return _array_classes[data_type.id()]
+
+
+cdef object get_values(object obj, bint* is_series):
+    if pandas_api.is_series(obj) or pandas_api.is_index(obj):
+        result = pandas_api.get_values(obj)
+        is_series[0] = True
+    elif isinstance(obj, np.ndarray):
+        result = obj
+        is_series[0] = False
+    else:
+        result = pandas_api.series(obj).values
+        is_series[0] = False
+
+    return result
+
+
+def concat_arrays(arrays, MemoryPool memory_pool=None):
+    """
+    Concatenate the given arrays.
+
+    The contents of the input arrays are copied into the returned array.
+
+    Raises
+    ------
+    ArrowInvalid : if not all of the arrays have the same type.
+
+    Parameters
+    ----------
+    arrays : iterable of pyarrow.Array
+        Arrays to concatenate, must be identically typed.
+    memory_pool : MemoryPool, default None
+        For memory allocations. If None, the default pool is used.
+    """
+    cdef:
+        vector[shared_ptr[CArray]] c_arrays
+        shared_ptr[CArray] c_concatenated
+        CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+
+    for array in arrays:
+        if not isinstance(array, Array):
+            raise TypeError("Iterable should contain Array objects, "
+                            "got {0} instead".format(type(array)))
+        c_arrays.push_back(pyarrow_unwrap_array(array))
+
+    with nogil:
+        c_concatenated = GetResultValue(Concatenate(c_arrays, pool))
+
+    return pyarrow_wrap_array(c_concatenated)
+
+
+def _empty_array(DataType type):
+    """
+    Create empty array of the given type.
+    """
+    if type.id == Type_DICTIONARY:
+        arr = DictionaryArray.from_arrays(
+            _empty_array(type.index_type), _empty_array(type.value_type),
+            ordered=type.ordered)
+    else:
+        arr = array([], type=type)
+    return arr
-- 
cgit v1.2.3