1 files changed, 1748 insertions, 0 deletions
diff --git a/src/arrow/python/pyarrow/tests/test_table.py b/src/arrow/python/pyarrow/tests/test_table.py
new file mode 100644
index 000000000..ef41a733d
--- /dev/null
+++ b/src/arrow/python/pyarrow/tests/test_table.py
@@ -0,0 +1,1748 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections import OrderedDict
+from collections.abc import Iterable
+import pickle
+import sys
+import weakref
+
+import numpy as np
+import pytest
+import pyarrow as pa
+
+
+def test_chunked_array_basics():
+    data = pa.chunked_array([], type=pa.string())
+    assert data.type == pa.string()
+    assert data.to_pylist() == []
+    data.validate()
+
+    data2 = pa.chunked_array([], type='binary')
+    assert data2.type == pa.binary()
+
+    with pytest.raises(ValueError):
+        pa.chunked_array([])
+
+    data = pa.chunked_array([
+        [1, 2, 3],
+        [4, 5, 6],
+        [7, 8, 9]
+    ])
+    assert isinstance(data.chunks, list)
+    assert all(isinstance(c, pa.lib.Int64Array) for c in data.chunks)
+    assert all(isinstance(c, pa.lib.Int64Array) for c in data.iterchunks())
+    assert len(data.chunks) == 3
+    assert data.nbytes == sum(c.nbytes for c in data.iterchunks())
+    assert sys.getsizeof(data) >= object.__sizeof__(data) + data.nbytes
+    data.validate()
+
+    wr = weakref.ref(data)
+    assert wr() is not None
+    del data
+    assert wr() is None
+
+
+def test_chunked_array_construction():
+    arr = pa.chunked_array([
+        [1, 2, 3],
+        [4, 5, 6],
+        [7, 8, 9],
+    ])
+    assert arr.type == pa.int64()
+    assert len(arr) == 9
+    assert len(arr.chunks) == 3
+
+    arr = pa.chunked_array([
+        [1, 2, 3],
+        [4., 5., 6.],
+        [7, 8, 9],
+    ])
+    assert arr.type == pa.int64()
+    assert len(arr) == 9
+    assert len(arr.chunks) == 3
+
+    arr = pa.chunked_array([
+        [1, 2, 3],
+        [4., 5., 6.],
+        [7, 8, 9],
+    ], type=pa.int8())
+    assert arr.type == pa.int8()
+    assert len(arr) == 9
+    assert len(arr.chunks) == 3
+
+    arr = pa.chunked_array([
+        [1, 2, 3],
+        []
+    ])
+    assert arr.type == pa.int64()
+    assert len(arr) == 3
+    assert len(arr.chunks) == 2
+
+    msg = (
+        "When passing an empty collection of arrays you must also pass the "
+        "data type"
+    )
+    with pytest.raises(ValueError, match=msg):
+        assert pa.chunked_array([])
+
+    assert pa.chunked_array([], type=pa.string()).type == pa.string()
+    assert pa.chunked_array([[]]).type == pa.null()
+    assert pa.chunked_array([[]], type=pa.string()).type == pa.string()
+
+
+def test_combine_chunks():
+    # ARROW-77363
+    arr = pa.array([1, 2])
+    chunked_arr = pa.chunked_array([arr, arr])
+    res = chunked_arr.combine_chunks()
+    expected = pa.array([1, 2, 1, 2])
+    assert res.equals(expected)
+
+
+def test_chunked_array_to_numpy():
+    data = pa.chunked_array([
+        [1, 2, 3],
+        [4, 5, 6],
+        []
+    ])
+    arr1 = np.asarray(data)
+    arr2 = data.to_numpy()
+
+    assert isinstance(arr2, np.ndarray)
+    assert arr2.shape == (6,)
+    assert np.array_equal(arr1, arr2)
+
+
+def test_chunked_array_mismatch_types():
+    with pytest.raises(TypeError):
+        # Given array types are different
+        pa.chunked_array([
+            pa.array([1, 2, 3]),
+            pa.array([1., 2., 3.])
+        ])
+
+    with pytest.raises(TypeError):
+        # Given array type is different from explicit type argument
+        pa.chunked_array([pa.array([1, 2, 3])], type=pa.float64())
+
+
+def test_chunked_array_str():
+    data = [
+        pa.array([1, 2, 3]),
+        pa.array([4, 5, 6])
+    ]
+    data = pa.chunked_array(data)
+    assert str(data) == """[
+  [
+    1,
+    2,
+    3
+  ],
+  [
+    4,
+    5,
+    6
+  ]
+]"""
+
+
+def test_chunked_array_getitem():
+    data = [
+        pa.array([1, 2, 3]),
+        pa.array([4, 5, 6])
+    ]
+    data = pa.chunked_array(data)
+    assert data[1].as_py() == 2
+    assert data[-1].as_py() == 6
+    assert data[-6].as_py() == 1
+    with pytest.raises(IndexError):
+        data[6]
+    with pytest.raises(IndexError):
+        data[-7]
+    # Ensure this works with numpy scalars
+    assert data[np.int32(1)].as_py() == 2
+
+    data_slice = data[2:4]
+    assert data_slice.to_pylist() == [3, 4]
+
+    data_slice = data[4:-1]
+    assert data_slice.to_pylist() == [5]
+
+    data_slice = data[99:99]
+    assert data_slice.type == data.type
+    assert data_slice.to_pylist() == []
+
+
+def test_chunked_array_slice():
+    data = [
+        pa.array([1, 2, 3]),
+        pa.array([4, 5, 6])
+    ]
+    data = pa.chunked_array(data)
+
+    data_slice = data.slice(len(data))
+    assert data_slice.type == data.type
+    assert data_slice.to_pylist() == []
+
+    data_slice = data.slice(len(data) + 10)
+    assert data_slice.type == data.type
+    assert data_slice.to_pylist() == []
+
+    table = pa.Table.from_arrays([data], names=["a"])
+    table_slice = table.slice(len(table))
+    assert len(table_slice) == 0
+
+    table = pa.Table.from_arrays([data], names=["a"])
+    table_slice = table.slice(len(table) + 10)
+    assert len(table_slice) == 0
+
+
+def test_chunked_array_iter():
+    data = [
+        pa.array([0]),
+        pa.array([1, 2, 3]),
+        pa.array([4, 5, 6]),
+        pa.array([7, 8, 9])
+    ]
+    arr = pa.chunked_array(data)
+
+    for i, j in zip(range(10), arr):
+        assert i == j.as_py()
+
+    assert isinstance(arr, Iterable)
+
+
+def test_chunked_array_equals():
+    def eq(xarrs, yarrs):
+        if isinstance(xarrs, pa.ChunkedArray):
+            x = xarrs
+        else:
+            x = pa.chunked_array(xarrs)
+        if isinstance(yarrs, pa.ChunkedArray):
+            y = yarrs
+        else:
+            y = pa.chunked_array(yarrs)
+        assert x.equals(y)
+        assert y.equals(x)
+        assert x == y
+        assert x != str(y)
+
+    def ne(xarrs, yarrs):
+        if isinstance(xarrs, pa.ChunkedArray):
+            x = xarrs
+        else:
+            x = pa.chunked_array(xarrs)
+        if isinstance(yarrs, pa.ChunkedArray):
+            y = yarrs
+        else:
+            y = pa.chunked_array(yarrs)
+        assert not x.equals(y)
+        assert not y.equals(x)
+        assert x != y
+
+    eq(pa.chunked_array([], type=pa.int32()),
+       pa.chunked_array([], type=pa.int32()))
+    ne(pa.chunked_array([], type=pa.int32()),
+       pa.chunked_array([], type=pa.int64()))
+
+    a = pa.array([0, 2], type=pa.int32())
+    b = pa.array([0, 2], type=pa.int64())
+    c = pa.array([0, 3], type=pa.int32())
+    d = pa.array([0, 2, 0, 3], type=pa.int32())
+
+    eq([a], [a])
+    ne([a], [b])
+    eq([a, c], [a, c])
+    eq([a, c], [d])
+    ne([c, a], [a, c])
+
+    # ARROW-4822
+    assert not pa.chunked_array([], type=pa.int32()).equals(None)
+
+
+@pytest.mark.parametrize(
+    ('data', 'typ'),
+    [
+        ([True, False, True, True], pa.bool_()),
+        ([1, 2, 4, 6], pa.int64()),
+        ([1.0, 2.5, None], pa.float64()),
+        (['a', None, 'b'], pa.string()),
+        ([], pa.list_(pa.uint8())),
+        ([[1, 2], [3]], pa.list_(pa.int64())),
+        ([['a'], None, ['b', 'c']], pa.list_(pa.string())),
+        ([(1, 'a'), (2, 'c'), None],
+            pa.struct([pa.field('a', pa.int64()), pa.field('b', pa.string())]))
+    ]
+)
+def test_chunked_array_pickle(data, typ):
+    arrays = []
+    while data:
+        arrays.append(pa.array(data[:2], type=typ))
+        data = data[2:]
+    array = pa.chunked_array(arrays, type=typ)
+    array.validate()
+    result = pickle.loads(pickle.dumps(array))
+    result.validate()
+    assert result.equals(array)
+
+
+@pytest.mark.pandas
+def test_chunked_array_to_pandas():
+    import pandas as pd
+
+    data = [
+        pa.array([-10, -5, 0, 5, 10])
+    ]
+    table = pa.table(data, names=['a'])
+    col = table.column(0)
+    assert isinstance(col, pa.ChunkedArray)
+    series = col.to_pandas()
+    assert isinstance(series, pd.Series)
+    assert series.shape == (5,)
+    assert series[0] == -10
+    assert series.name == 'a'
+
+
+@pytest.mark.pandas
+def test_chunked_array_to_pandas_preserve_name():
+    # https://issues.apache.org/jira/browse/ARROW-7709
+    import pandas as pd
+    import pandas.testing as tm
+
+    for data in [
+            pa.array([1, 2, 3]),
+            pa.array(pd.Categorical(["a", "b", "a"])),
+            pa.array(pd.date_range("2012", periods=3)),
+            pa.array(pd.date_range("2012", periods=3, tz="Europe/Brussels")),
+            pa.array([1, 2, 3], pa.timestamp("ms")),
+            pa.array([1, 2, 3], pa.timestamp("ms", "Europe/Brussels"))]:
+        table = pa.table({"name": data})
+        result = table.column("name").to_pandas()
+        assert result.name == "name"
+        expected = pd.Series(data.to_pandas(), name="name")
+        tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.pandas
+@pytest.mark.nopandas
+def test_chunked_array_asarray():
+    # ensure this is tested both when pandas is present or not (ARROW-6564)
+
+    data = [
+        pa.array([0]),
+        pa.array([1, 2, 3])
+    ]
+    chunked_arr = pa.chunked_array(data)
+
+    np_arr = np.asarray(chunked_arr)
+    assert np_arr.tolist() == [0, 1, 2, 3]
+    assert np_arr.dtype == np.dtype('int64')
+
+    # An optional type can be specified when calling np.asarray
+    np_arr = np.asarray(chunked_arr, dtype='str')
+    assert np_arr.tolist() == ['0', '1', '2', '3']
+
+    # Types are modified when there are nulls
+    data = [
+        pa.array([1, None]),
+        pa.array([1, 2, 3])
+    ]
+    chunked_arr = pa.chunked_array(data)
+
+    np_arr = np.asarray(chunked_arr)
+    elements = np_arr.tolist()
+    assert elements[0] == 1.
+    assert np.isnan(elements[1])
+    assert elements[2:] == [1., 2., 3.]
+    assert np_arr.dtype == np.dtype('float64')
+
+    # DictionaryType data will be converted to dense numpy array
+    arr = pa.DictionaryArray.from_arrays(
+        pa.array([0, 1, 2, 0, 1]), pa.array(['a', 'b', 'c']))
+    chunked_arr = pa.chunked_array([arr, arr])
+    np_arr = np.asarray(chunked_arr)
+    assert np_arr.dtype == np.dtype('object')
+    assert np_arr.tolist() == ['a', 'b', 'c', 'a', 'b'] * 2
+
+
+def test_chunked_array_flatten():
+    ty = pa.struct([pa.field('x', pa.int16()),
+                    pa.field('y', pa.float32())])
+    a = pa.array([(1, 2.5), (3, 4.5), (5, 6.5)], type=ty)
+    carr = pa.chunked_array(a)
+    x, y = carr.flatten()
+    assert x.equals(pa.chunked_array(pa.array([1, 3, 5], type=pa.int16())))
+    assert y.equals(pa.chunked_array(pa.array([2.5, 4.5, 6.5],
+                                              type=pa.float32())))
+
+    # Empty column
+    a = pa.array([], type=ty)
+    carr = pa.chunked_array(a)
+    x, y = carr.flatten()
+    assert x.equals(pa.chunked_array(pa.array([], type=pa.int16())))
+    assert y.equals(pa.chunked_array(pa.array([], type=pa.float32())))
+
+
+def test_chunked_array_unify_dictionaries():
+    arr = pa.chunked_array([
+        pa.array(["foo", "bar", None, "foo"]).dictionary_encode(),
+        pa.array(["quux", None, "foo"]).dictionary_encode(),
+    ])
+    assert arr.chunk(0).dictionary.equals(pa.array(["foo", "bar"]))
+    assert arr.chunk(1).dictionary.equals(pa.array(["quux", "foo"]))
+    arr = arr.unify_dictionaries()
+    expected_dict = pa.array(["foo", "bar", "quux"])
+    assert arr.chunk(0).dictionary.equals(expected_dict)
+    assert arr.chunk(1).dictionary.equals(expected_dict)
+    assert arr.to_pylist() == ["foo", "bar", None, "foo", "quux", None, "foo"]
+
+
+def test_recordbatch_basics():
+    data = [
+        pa.array(range(5), type='int16'),
+        pa.array([-10, -5, 0, None, 10], type='int32')
+    ]
+
+    batch = pa.record_batch(data, ['c0', 'c1'])
+    assert not batch.schema.metadata
+
+    assert len(batch) == 5
+    assert batch.num_rows == 5
+    assert batch.num_columns == len(data)
+    # (only the second array has a null bitmap)
+    assert batch.nbytes == (5 * 2) + (5 * 4 + 1)
+    assert sys.getsizeof(batch) >= object.__sizeof__(batch) + batch.nbytes
+    pydict = batch.to_pydict()
+    assert pydict == OrderedDict([
+        ('c0', [0, 1, 2, 3, 4]),
+        ('c1', [-10, -5, 0, None, 10])
+    ])
+    if sys.version_info >= (3, 7):
+        assert type(pydict) == dict
+    else:
+        assert type(pydict) == OrderedDict
+
+    with pytest.raises(IndexError):
+        # bounds checking
+        batch[2]
+
+    # Schema passed explicitly
+    schema = pa.schema([pa.field('c0', pa.int16(),
+                                 metadata={'key': 'value'}),
+                        pa.field('c1', pa.int32())],
+                       metadata={b'foo': b'bar'})
+    batch = pa.record_batch(data, schema=schema)
+    assert batch.schema == schema
+    # schema as first positional argument
+    batch = pa.record_batch(data, schema)
+    assert batch.schema == schema
+    assert str(batch) == """pyarrow.RecordBatch
+c0: int16
+c1: int32"""
+
+    assert batch.to_string(show_metadata=True) == """\
+pyarrow.RecordBatch
+c0: int16
+  -- field metadata --
+  key: 'value'
+c1: int32
+-- schema metadata --
+foo: 'bar'"""
+
+    wr = weakref.ref(batch)
+    assert wr() is not None
+    del batch
+    assert wr() is None
+
+
+def test_recordbatch_equals():
+    data1 = [
+        pa.array(range(5), type='int16'),
+        pa.array([-10, -5, 0, None, 10], type='int32')
+    ]
+    data2 = [
+        pa.array(['a', 'b', 'c']),
+        pa.array([['d'], ['e'], ['f']]),
+    ]
+    column_names = ['c0', 'c1']
+
+    batch = pa.record_batch(data1, column_names)
+    assert batch == pa.record_batch(data1, column_names)
+    assert batch.equals(pa.record_batch(data1, column_names))
+
+    assert batch != pa.record_batch(data2, column_names)
+    assert not batch.equals(pa.record_batch(data2, column_names))
+
+    batch_meta = pa.record_batch(data1, names=column_names,
+                                 metadata={'key': 'value'})
+    assert batch_meta.equals(batch)
+    assert not batch_meta.equals(batch, check_metadata=True)
+
+    # ARROW-8889
+    assert not batch.equals(None)
+    assert batch != "foo"
+
+
+def test_recordbatch_take():
+    batch = pa.record_batch(
+        [pa.array([1, 2, 3, None, 5]),
+         pa.array(['a', 'b', 'c', 'd', 'e'])],
+        ['f1', 'f2'])
+    assert batch.take(pa.array([2, 3])).equals(batch.slice(2, 2))
+    assert batch.take(pa.array([2, None])).equals(
+        pa.record_batch([pa.array([3, None]), pa.array(['c', None])],
+                        ['f1', 'f2']))
+
+
+def test_recordbatch_column_sets_private_name():
+    # ARROW-6429
+    rb = pa.record_batch([pa.array([1, 2, 3, 4])], names=['a0'])
+    assert rb[0]._name == 'a0'
+
+
+def test_recordbatch_from_arrays_validate_schema():
+    # ARROW-6263
+    arr = pa.array([1, 2])
+    schema = pa.schema([pa.field('f0', pa.list_(pa.utf8()))])
+    with pytest.raises(NotImplementedError):
+        pa.record_batch([arr], schema=schema)
+
+
+def test_recordbatch_from_arrays_validate_lengths():
+    # ARROW-2820
+    data = [pa.array([1]), pa.array(["tokyo", "like", "happy"]),
+            pa.array(["derek"])]
+
+    with pytest.raises(ValueError):
+        pa.record_batch(data, ['id', 'tags', 'name'])
+
+
+def test_recordbatch_no_fields():
+    batch = pa.record_batch([], [])
+
+    assert len(batch) == 0
+    assert batch.num_rows == 0
+    assert batch.num_columns == 0
+
+
+def test_recordbatch_from_arrays_invalid_names():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10])
+    ]
+    with pytest.raises(ValueError):
+        pa.record_batch(data, names=['a', 'b', 'c'])
+
+    with pytest.raises(ValueError):
+        pa.record_batch(data, names=['a'])
+
+
+def test_recordbatch_empty_metadata():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10])
+    ]
+
+    batch = pa.record_batch(data, ['c0', 'c1'])
+    assert batch.schema.metadata is None
+
+
+def test_recordbatch_pickle():
+    data = [
+        pa.array(range(5), type='int8'),
+        pa.array([-10, -5, 0, 5, 10], type='float32')
+    ]
+    fields = [
+        pa.field('ints', pa.int8()),
+        pa.field('floats', pa.float32()),
+    ]
+    schema = pa.schema(fields, metadata={b'foo': b'bar'})
+    batch = pa.record_batch(data, schema=schema)
+
+    result = pickle.loads(pickle.dumps(batch))
+    assert result.equals(batch)
+    assert result.schema == schema
+
+
+def test_recordbatch_get_field():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10]),
+        pa.array(range(5, 10))
+    ]
+    batch = pa.RecordBatch.from_arrays(data, names=('a', 'b', 'c'))
+
+    assert batch.field('a').equals(batch.schema.field('a'))
+    assert batch.field(0).equals(batch.schema.field('a'))
+
+    with pytest.raises(KeyError):
+        batch.field('d')
+
+    with pytest.raises(TypeError):
+        batch.field(None)
+
+    with pytest.raises(IndexError):
+        batch.field(4)
+
+
+def test_recordbatch_select_column():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10]),
+        pa.array(range(5, 10))
+    ]
+    batch = pa.RecordBatch.from_arrays(data, names=('a', 'b', 'c'))
+
+    assert batch.column('a').equals(batch.column(0))
+
+    with pytest.raises(
+            KeyError, match='Field "d" does not exist in record batch schema'):
+        batch.column('d')
+
+    with pytest.raises(TypeError):
+        batch.column(None)
+
+    with pytest.raises(IndexError):
+        batch.column(4)
+
+
+def test_recordbatch_from_struct_array_invalid():
+    with pytest.raises(TypeError):
+        pa.RecordBatch.from_struct_array(pa.array(range(5)))
+
+
+def test_recordbatch_from_struct_array():
+    struct_array = pa.array(
+        [{"ints": 1}, {"floats": 1.0}],
+        type=pa.struct([("ints", pa.int32()), ("floats", pa.float32())]),
+    )
+    result = pa.RecordBatch.from_struct_array(struct_array)
+    assert result.equals(pa.RecordBatch.from_arrays(
+        [
+            pa.array([1, None], type=pa.int32()),
+            pa.array([None, 1.0], type=pa.float32()),
+        ], ["ints", "floats"]
+    ))
+
+
+def _table_like_slice_tests(factory):
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10])
+    ]
+    names = ['c0', 'c1']
+
+    obj = factory(data, names=names)
+
+    sliced = obj.slice(2)
+    assert sliced.num_rows == 3
+
+    expected = factory([x.slice(2) for x in data], names=names)
+    assert sliced.equals(expected)
+
+    sliced2 = obj.slice(2, 2)
+    expected2 = factory([x.slice(2, 2) for x in data], names=names)
+    assert sliced2.equals(expected2)
+
+    # 0 offset
+    assert obj.slice(0).equals(obj)
+
+    # Slice past end of array
+    assert len(obj.slice(len(obj))) == 0
+
+    with pytest.raises(IndexError):
+        obj.slice(-1)
+
+    # Check __getitem__-based slicing
+    assert obj.slice(0, 0).equals(obj[:0])
+    assert obj.slice(0, 2).equals(obj[:2])
+    assert obj.slice(2, 2).equals(obj[2:4])
+    assert obj.slice(2, len(obj) - 2).equals(obj[2:])
+    assert obj.slice(len(obj) - 2, 2).equals(obj[-2:])
+    assert obj.slice(len(obj) - 4, 2).equals(obj[-4:-2])
+
+
+def test_recordbatch_slice_getitem():
+    return _table_like_slice_tests(pa.RecordBatch.from_arrays)
+
+
+def test_table_slice_getitem():
+    return _table_like_slice_tests(pa.table)
+
+
+@pytest.mark.pandas
+def test_slice_zero_length_table():
+    # ARROW-7907: a segfault on this code was fixed after 0.16.0
+    table = pa.table({'a': pa.array([], type=pa.timestamp('us'))})
+    table_slice = table.slice(0, 0)
+    table_slice.to_pandas()
+
+    table = pa.table({'a': pa.chunked_array([], type=pa.string())})
+    table.to_pandas()
+
+
+def test_recordbatchlist_schema_equals():
+    a1 = np.array([1], dtype='uint32')
+    a2 = np.array([4.0, 5.0], dtype='float64')
+    batch1 = pa.record_batch([pa.array(a1)], ['c1'])
+    batch2 = pa.record_batch([pa.array(a2)], ['c1'])
+
+    with pytest.raises(pa.ArrowInvalid):
+        pa.Table.from_batches([batch1, batch2])
+
+
+def test_table_column_sets_private_name():
+    # ARROW-6429
+    t = pa.table([pa.array([1, 2, 3, 4])], names=['a0'])
+    assert t[0]._name == 'a0'
+
+
+def test_table_equals():
+    table = pa.Table.from_arrays([], names=[])
+    assert table.equals(table)
+
+    # ARROW-4822
+    assert not table.equals(None)
+
+    other = pa.Table.from_arrays([], names=[], metadata={'key': 'value'})
+    assert not table.equals(other, check_metadata=True)
+    assert table.equals(other)
+
+
+def test_table_from_batches_and_schema():
+    schema = pa.schema([
+        pa.field('a', pa.int64()),
+        pa.field('b', pa.float64()),
+    ])
+    batch = pa.record_batch([pa.array([1]), pa.array([3.14])],
+                            names=['a', 'b'])
+    table = pa.Table.from_batches([batch], schema)
+    assert table.schema.equals(schema)
+    assert table.column(0) == pa.chunked_array([[1]])
+    assert table.column(1) == pa.chunked_array([[3.14]])
+
+    incompatible_schema = pa.schema([pa.field('a', pa.int64())])
+    with pytest.raises(pa.ArrowInvalid):
+        pa.Table.from_batches([batch], incompatible_schema)
+
+    incompatible_batch = pa.record_batch([pa.array([1])], ['a'])
+    with pytest.raises(pa.ArrowInvalid):
+        pa.Table.from_batches([incompatible_batch], schema)
+
+
+@pytest.mark.pandas
+def test_table_to_batches():
+    from pandas.testing import assert_frame_equal
+    import pandas as pd
+
+    df1 = pd.DataFrame({'a': list(range(10))})
+    df2 = pd.DataFrame({'a': list(range(10, 30))})
+
+    batch1 = pa.RecordBatch.from_pandas(df1, preserve_index=False)
+    batch2 = pa.RecordBatch.from_pandas(df2, preserve_index=False)
+
+    table = pa.Table.from_batches([batch1, batch2, batch1])
+
+    expected_df = pd.concat([df1, df2, df1], ignore_index=True)
+
+    batches = table.to_batches()
+    assert len(batches) == 3
+
+    assert_frame_equal(pa.Table.from_batches(batches).to_pandas(),
+                       expected_df)
+
+    batches = table.to_batches(max_chunksize=15)
+    assert list(map(len, batches)) == [10, 15, 5, 10]
+
+    assert_frame_equal(table.to_pandas(), expected_df)
+    assert_frame_equal(pa.Table.from_batches(batches).to_pandas(),
+                       expected_df)
+
+    table_from_iter = pa.Table.from_batches(iter([batch1, batch2, batch1]))
+    assert table.equals(table_from_iter)
+
+
+def test_table_basics():
+    data = [
+        pa.array(range(5), type='int64'),
+        pa.array([-10, -5, 0, 5, 10], type='int64')
+    ]
+    table = pa.table(data, names=('a', 'b'))
+    table.validate()
+    assert len(table) == 5
+    assert table.num_rows == 5
+    assert table.num_columns == 2
+    assert table.shape == (5, 2)
+    assert table.nbytes == 2 * (5 * 8)
+    assert sys.getsizeof(table) >= object.__sizeof__(table) + table.nbytes
+    pydict = table.to_pydict()
+    assert pydict == OrderedDict([
+        ('a', [0, 1, 2, 3, 4]),
+        ('b', [-10, -5, 0, 5, 10])
+    ])
+    if sys.version_info >= (3, 7):
+        assert type(pydict) == dict
+    else:
+        assert type(pydict) == OrderedDict
+
+    columns = []
+    for col in table.itercolumns():
+        columns.append(col)
+        for chunk in col.iterchunks():
+            assert chunk is not None
+
+        with pytest.raises(IndexError):
+            col.chunk(-1)
+
+        with pytest.raises(IndexError):
+            col.chunk(col.num_chunks)
+
+    assert table.columns == columns
+    assert table == pa.table(columns, names=table.column_names)
+    assert table != pa.table(columns[1:], names=table.column_names[1:])
+    assert table != columns
+
+    wr = weakref.ref(table)
+    assert wr() is not None
+    del table
+    assert wr() is None
+
+
+def test_table_from_arrays_preserves_column_metadata():
+    # Added to test https://issues.apache.org/jira/browse/ARROW-3866
+    arr0 = pa.array([1, 2])
+    arr1 = pa.array([3, 4])
+    field0 = pa.field('field1', pa.int64(), metadata=dict(a="A", b="B"))
+    field1 = pa.field('field2', pa.int64(), nullable=False)
+    table = pa.Table.from_arrays([arr0, arr1],
+                                 schema=pa.schema([field0, field1]))
+    assert b"a" in table.field(0).metadata
+    assert table.field(1).nullable is False
+
+
+def test_table_from_arrays_invalid_names():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10])
+    ]
+    with pytest.raises(ValueError):
+        pa.Table.from_arrays(data, names=['a', 'b', 'c'])
+
+    with pytest.raises(ValueError):
+        pa.Table.from_arrays(data, names=['a'])
+
+
+def test_table_from_lists():
+    data = [
+        list(range(5)),
+        [-10, -5, 0, 5, 10]
+    ]
+
+    result = pa.table(data, names=['a', 'b'])
+    expected = pa.Table.from_arrays(data, names=['a', 'b'])
+    assert result.equals(expected)
+
+    schema = pa.schema([
+        pa.field('a', pa.uint16()),
+        pa.field('b', pa.int64())
+    ])
+    result = pa.table(data, schema=schema)
+    expected = pa.Table.from_arrays(data, schema=schema)
+    assert result.equals(expected)
+
+
+def test_table_pickle():
+    data = [
+        pa.chunked_array([[1, 2], [3, 4]], type=pa.uint32()),
+        pa.chunked_array([["some", "strings", None, ""]], type=pa.string()),
+    ]
+    schema = pa.schema([pa.field('ints', pa.uint32()),
+                        pa.field('strs', pa.string())],
+                       metadata={b'foo': b'bar'})
+    table = pa.Table.from_arrays(data, schema=schema)
+
+    result = pickle.loads(pickle.dumps(table))
+    result.validate()
+    assert result.equals(table)
+
+
+def test_table_get_field():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10]),
+        pa.array(range(5, 10))
+    ]
+    table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))
+
+    assert table.field('a').equals(table.schema.field('a'))
+    assert table.field(0).equals(table.schema.field('a'))
+
+    with pytest.raises(KeyError):
+        table.field('d')
+
+    with pytest.raises(TypeError):
+        table.field(None)
+
+    with pytest.raises(IndexError):
+        table.field(4)
+
+
+def test_table_select_column():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10]),
+        pa.array(range(5, 10))
+    ]
+    table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))
+
+    assert table.column('a').equals(table.column(0))
+
+    with pytest.raises(KeyError,
+                       match='Field "d" does not exist in table schema'):
+        table.column('d')
+
+    with pytest.raises(TypeError):
+        table.column(None)
+
+    with pytest.raises(IndexError):
+        table.column(4)
+
+
+def test_table_column_with_duplicates():
+    # ARROW-8209
+    table = pa.table([pa.array([1, 2, 3]),
+                      pa.array([4, 5, 6]),
+                      pa.array([7, 8, 9])], names=['a', 'b', 'a'])
+
+    with pytest.raises(KeyError,
+                       match='Field "a" exists 2 times in table schema'):
+        table.column('a')
+
+
+def test_table_add_column():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10]),
+        pa.array(range(5, 10))
+    ]
+    table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))
+
+    new_field = pa.field('d', data[1].type)
+    t2 = table.add_column(3, new_field, data[1])
+    t3 = table.append_column(new_field, data[1])
+
+    expected = pa.Table.from_arrays(data + [data[1]],
+                                    names=('a', 'b', 'c', 'd'))
+    assert t2.equals(expected)
+    assert t3.equals(expected)
+
+    t4 = table.add_column(0, new_field, data[1])
+    expected = pa.Table.from_arrays([data[1]] + data,
+                                    names=('d', 'a', 'b', 'c'))
+    assert t4.equals(expected)
+
+
+def test_table_set_column():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10]),
+        pa.array(range(5, 10))
+    ]
+    table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))
+
+    new_field = pa.field('d', data[1].type)
+    t2 = table.set_column(0, new_field, data[1])
+
+    expected_data = list(data)
+    expected_data[0] = data[1]
+    expected = pa.Table.from_arrays(expected_data,
+                                    names=('d', 'b', 'c'))
+    assert t2.equals(expected)
+
+
+def test_table_drop():
+    """ drop one or more columns given labels"""
+    a = pa.array(range(5))
+    b = pa.array([-10, -5, 0, 5, 10])
+    c = pa.array(range(5, 10))
+
+    table = pa.Table.from_arrays([a, b, c], names=('a', 'b', 'c'))
+    t2 = table.drop(['a', 'b'])
+
+    exp = pa.Table.from_arrays([c], names=('c',))
+    assert exp.equals(t2)
+
+    # -- raise KeyError if column not in Table
+    with pytest.raises(KeyError, match="Column 'd' not found"):
+        table.drop(['d'])
+
+
+def test_table_remove_column():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10]),
+        pa.array(range(5, 10))
+    ]
+    table = pa.Table.from_arrays(data, names=('a', 'b', 'c'))
+
+    t2 = table.remove_column(0)
+    t2.validate()
+    expected = pa.Table.from_arrays(data[1:], names=('b', 'c'))
+    assert t2.equals(expected)
+
+
+def test_table_remove_column_empty():
+    # ARROW-1865
+    data = [
+        pa.array(range(5)),
+    ]
+    table = pa.Table.from_arrays(data, names=['a'])
+
+    t2 = table.remove_column(0)
+    t2.validate()
+    assert len(t2) == len(table)
+
+    t3 = t2.add_column(0, table.field(0), table[0])
+    t3.validate()
+    assert t3.equals(table)
+
+
+def test_empty_table_with_names():
+    # ARROW-13784
+    data = []
+    names = ["a", "b"]
+    message = (
+        'Length of names [(]2[)] does not match length of arrays [(]0[)]')
+    with pytest.raises(ValueError, match=message):
+        pa.Table.from_arrays(data, names=names)
+
+
+def test_empty_table():
+    table = pa.table([])
+
+    assert table.column_names == []
+    assert table.equals(pa.Table.from_arrays([], []))
+
+
+def test_table_rename_columns():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10]),
+        pa.array(range(5, 10))
+    ]
+    table = pa.Table.from_arrays(data, names=['a', 'b', 'c'])
+    assert table.column_names == ['a', 'b', 'c']
+
+    t2 = table.rename_columns(['eh', 'bee', 'sea'])
+    t2.validate()
+    assert t2.column_names == ['eh', 'bee', 'sea']
+
+    expected = pa.Table.from_arrays(data, names=['eh', 'bee', 'sea'])
+    assert t2.equals(expected)
+
+
+def test_table_flatten():
+    ty1 = pa.struct([pa.field('x', pa.int16()),
+                     pa.field('y', pa.float32())])
+    ty2 = pa.struct([pa.field('nest', ty1)])
+    a = pa.array([(1, 2.5), (3, 4.5)], type=ty1)
+    b = pa.array([((11, 12.5),), ((13, 14.5),)], type=ty2)
+    c = pa.array([False, True], type=pa.bool_())
+
+    table = pa.Table.from_arrays([a, b, c], names=['a', 'b', 'c'])
+    t2 = table.flatten()
+    t2.validate()
+    expected = pa.Table.from_arrays([
+        pa.array([1, 3], type=pa.int16()),
+        pa.array([2.5, 4.5], type=pa.float32()),
+        pa.array([(11, 12.5), (13, 14.5)], type=ty1),
+        c],
+        names=['a.x', 'a.y', 'b.nest', 'c'])
+    assert t2.equals(expected)
+
+
+def test_table_combine_chunks():
+    batch1 = pa.record_batch([pa.array([1]), pa.array(["a"])],
+                             names=['f1', 'f2'])
+    batch2 = pa.record_batch([pa.array([2]), pa.array(["b"])],
+                             names=['f1', 'f2'])
+    table = pa.Table.from_batches([batch1, batch2])
+    combined = table.combine_chunks()
+    combined.validate()
+    assert combined.equals(table)
+    for c in combined.columns:
+        assert c.num_chunks == 1
+
+
+def test_table_unify_dictionaries():
+    batch1 = pa.record_batch([
+        pa.array(["foo", "bar", None, "foo"]).dictionary_encode(),
+        pa.array([123, 456, 456, 789]).dictionary_encode(),
+        pa.array([True, False, None, None])], names=['a', 'b', 'c'])
+    batch2 = pa.record_batch([
+        pa.array(["quux", "foo", None, "quux"]).dictionary_encode(),
+        pa.array([456, 789, 789, None]).dictionary_encode(),
+        pa.array([False, None, None, True])], names=['a', 'b', 'c'])
+
+    table = pa.Table.from_batches([batch1, batch2])
+    table = table.replace_schema_metadata({b"key1": b"value1"})
+    assert table.column(0).chunk(0).dictionary.equals(
+        pa.array(["foo", "bar"]))
+    assert table.column(0).chunk(1).dictionary.equals(
+        pa.array(["quux", "foo"]))
+    assert table.column(1).chunk(0).dictionary.equals(
+        pa.array([123, 456, 789]))
+    assert table.column(1).chunk(1).dictionary.equals(
+        pa.array([456, 789]))
+
+    table = table.unify_dictionaries(pa.default_memory_pool())
+    expected_dict_0 = pa.array(["foo", "bar", "quux"])
+    expected_dict_1 = pa.array([123, 456, 789])
+    assert table.column(0).chunk(0).dictionary.equals(expected_dict_0)
+    assert table.column(0).chunk(1).dictionary.equals(expected_dict_0)
+    assert table.column(1).chunk(0).dictionary.equals(expected_dict_1)
+    assert table.column(1).chunk(1).dictionary.equals(expected_dict_1)
+
+    assert table.to_pydict() == {
+        'a': ["foo", "bar", None, "foo", "quux", "foo", None, "quux"],
+        'b': [123, 456, 456, 789, 456, 789, 789, None],
+        'c': [True, False, None, None, False, None, None, True],
+    }
+    assert table.schema.metadata == {b"key1": b"value1"}
+
+
+def test_concat_tables():
+    data = [
+        list(range(5)),
+        [-10., -5., 0., 5., 10.]
+    ]
+    data2 = [
+        list(range(5, 10)),
+        [1., 2., 3., 4., 5.]
+    ]
+
+    t1 = pa.Table.from_arrays([pa.array(x) for x in data],
+                              names=('a', 'b'))
+    t2 = pa.Table.from_arrays([pa.array(x) for x in data2],
+                              names=('a', 'b'))
+
+    result = pa.concat_tables([t1, t2])
+    result.validate()
+    assert len(result) == 10
+
+    expected = pa.Table.from_arrays([pa.array(x + y)
+                                     for x, y in zip(data, data2)],
+                                    names=('a', 'b'))
+
+    assert result.equals(expected)
+
+
+def test_concat_tables_none_table():
+    # ARROW-11997
+    with pytest.raises(AttributeError):
+        pa.concat_tables([None])
+
+
+@pytest.mark.pandas
+def test_concat_tables_with_different_schema_metadata():
+    import pandas as pd
+
+    schema = pa.schema([
+        pa.field('a', pa.string()),
+        pa.field('b', pa.string()),
+    ])
+
+    values = list('abcdefgh')
+    df1 = pd.DataFrame({'a': values, 'b': values})
+    df2 = pd.DataFrame({'a': [np.nan] * 8, 'b': values})
+
+    table1 = pa.Table.from_pandas(df1, schema=schema, preserve_index=False)
+    table2 = pa.Table.from_pandas(df2, schema=schema, preserve_index=False)
+    assert table1.schema.equals(table2.schema)
+    assert not table1.schema.equals(table2.schema, check_metadata=True)
+
+    table3 = pa.concat_tables([table1, table2])
+    assert table1.schema.equals(table3.schema, check_metadata=True)
+    assert table2.schema.equals(table3.schema)
+
+
+def test_concat_tables_with_promotion():
+    t1 = pa.Table.from_arrays(
+        [pa.array([1, 2], type=pa.int64())], ["int64_field"])
+    t2 = pa.Table.from_arrays(
+        [pa.array([1.0, 2.0], type=pa.float32())], ["float_field"])
+
+    result = pa.concat_tables([t1, t2], promote=True)
+
+    assert result.equals(pa.Table.from_arrays([
+        pa.array([1, 2, None, None], type=pa.int64()),
+        pa.array([None, None, 1.0, 2.0], type=pa.float32()),
+    ], ["int64_field", "float_field"]))
+
+
+def test_concat_tables_with_promotion_error():
+    t1 = pa.Table.from_arrays(
+        [pa.array([1, 2], type=pa.int64())], ["f"])
+    t2 = pa.Table.from_arrays(
+        [pa.array([1, 2], type=pa.float32())], ["f"])
+
+    with pytest.raises(pa.ArrowInvalid):
+        pa.concat_tables([t1, t2], promote=True)
+
+
+def test_table_negative_indexing():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10]),
+        pa.array([1.0, 2.0, 3.0, 4.0, 5.0]),
+        pa.array(['ab', 'bc', 'cd', 'de', 'ef']),
+    ]
+    table = pa.Table.from_arrays(data, names=tuple('abcd'))
+
+    assert table[-1].equals(table[3])
+    assert table[-2].equals(table[2])
+    assert table[-3].equals(table[1])
+    assert table[-4].equals(table[0])
+
+    with pytest.raises(IndexError):
+        table[-5]
+
+    with pytest.raises(IndexError):
+        table[4]
+
+
+def test_table_cast_to_incompatible_schema():
+    data = [
+        pa.array(range(5)),
+        pa.array([-10, -5, 0, 5, 10]),
+    ]
+    table = pa.Table.from_arrays(data, names=tuple('ab'))
+
+    target_schema1 = pa.schema([
+        pa.field('A', pa.int32()),
+        pa.field('b', pa.int16()),
+    ])
+    target_schema2 = pa.schema([
+        pa.field('a', pa.int32()),
+    ])
+    message = ("Target schema's field names are not matching the table's "
+               "field names:.*")
+    with pytest.raises(ValueError, match=message):
+        table.cast(target_schema1)
+    with pytest.raises(ValueError, match=message):
+        table.cast(target_schema2)
+
+
+def test_table_safe_casting():
+    data = [
+        pa.array(range(5), type=pa.int64()),
+        pa.array([-10, -5, 0, 5, 10], type=pa.int32()),
+        pa.array([1.0, 2.0, 3.0, 4.0, 5.0], type=pa.float64()),
+        pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
+    ]
+    table = pa.Table.from_arrays(data, names=tuple('abcd'))
+
+    expected_data = [
+        pa.array(range(5), type=pa.int32()),
+        pa.array([-10, -5, 0, 5, 10], type=pa.int16()),
+        pa.array([1, 2, 3, 4, 5], type=pa.int64()),
+        pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
+    ]
+    expected_table = pa.Table.from_arrays(expected_data, names=tuple('abcd'))
+
+    target_schema = pa.schema([
+        pa.field('a', pa.int32()),
+        pa.field('b', pa.int16()),
+        pa.field('c', pa.int64()),
+        pa.field('d', pa.string())
+    ])
+    casted_table = table.cast(target_schema)
+
+    assert casted_table.equals(expected_table)
+
+
+def test_table_unsafe_casting():
+    data = [
+        pa.array(range(5), type=pa.int64()),
+        pa.array([-10, -5, 0, 5, 10], type=pa.int32()),
+        pa.array([1.1, 2.2, 3.3, 4.4, 5.5], type=pa.float64()),
+        pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
+    ]
+    table = pa.Table.from_arrays(data, names=tuple('abcd'))
+
+    expected_data = [
+        pa.array(range(5), type=pa.int32()),
+        pa.array([-10, -5, 0, 5, 10], type=pa.int16()),
+        pa.array([1, 2, 3, 4, 5], type=pa.int64()),
+        pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
+    ]
+    expected_table = pa.Table.from_arrays(expected_data, names=tuple('abcd'))
+
+    target_schema = pa.schema([
+        pa.field('a', pa.int32()),
+        pa.field('b', pa.int16()),
+        pa.field('c', pa.int64()),
+        pa.field('d', pa.string())
+    ])
+
+    with pytest.raises(pa.ArrowInvalid, match='truncated'):
+        table.cast(target_schema)
+
+    casted_table = table.cast(target_schema, safe=False)
+    assert casted_table.equals(expected_table)
+
+
+def test_invalid_table_construct():
+    array = np.array([0, 1], dtype=np.uint8)
+    u8 = pa.uint8()
+    arrays = [pa.array(array, type=u8), pa.array(array[1:], type=u8)]
+
+    with pytest.raises(pa.lib.ArrowInvalid):
+        pa.Table.from_arrays(arrays, names=["a1", "a2"])
+
+
+@pytest.mark.parametrize('data, klass', [
+    ((['', 'foo', 'bar'], [4.5, 5, None]), list),
+    ((['', 'foo', 'bar'], [4.5, 5, None]), pa.array),
+    (([[''], ['foo', 'bar']], [[4.5], [5., None]]), pa.chunked_array),
+])
+def test_from_arrays_schema(data, klass):
+    data = [klass(data[0]), klass(data[1])]
+    schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float32())])
+
+    table = pa.Table.from_arrays(data, schema=schema)
+    assert table.num_columns == 2
+    assert table.num_rows == 3
+    assert table.schema == schema
+
+    # length of data and schema not matching
+    schema = pa.schema([('strs', pa.utf8())])
+    with pytest.raises(ValueError):
+        pa.Table.from_arrays(data, schema=schema)
+
+    # with different but compatible schema
+    schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float32())])
+    table = pa.Table.from_arrays(data, schema=schema)
+    assert pa.types.is_float32(table.column('floats').type)
+    assert table.num_columns == 2
+    assert table.num_rows == 3
+    assert table.schema == schema
+
+    # with different and incompatible schema
+    schema = pa.schema([('strs', pa.utf8()), ('floats', pa.timestamp('s'))])
+    with pytest.raises((NotImplementedError, TypeError)):
+        pa.Table.from_pydict(data, schema=schema)
+
+    # Cannot pass both schema and metadata / names
+    with pytest.raises(ValueError):
+        pa.Table.from_arrays(data, schema=schema, names=['strs', 'floats'])
+
+    with pytest.raises(ValueError):
+        pa.Table.from_arrays(data, schema=schema, metadata={b'foo': b'bar'})
+
+
+@pytest.mark.parametrize(
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_from_pydict(cls):
+    table = cls.from_pydict({})
+    assert table.num_columns == 0
+    assert table.num_rows == 0
+    assert table.schema == pa.schema([])
+    assert table.to_pydict() == {}
+
+    schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float64())])
+
+    # With lists as values
+    data = OrderedDict([('strs', ['', 'foo', 'bar']),
+                        ('floats', [4.5, 5, None])])
+    table = cls.from_pydict(data)
+    assert table.num_columns == 2
+    assert table.num_rows == 3
+    assert table.schema == schema
+    assert table.to_pydict() == data
+
+    # With metadata and inferred schema
+    metadata = {b'foo': b'bar'}
+    schema = schema.with_metadata(metadata)
+    table = cls.from_pydict(data, metadata=metadata)
+    assert table.schema == schema
+    assert table.schema.metadata == metadata
+    assert table.to_pydict() == data
+
+    # With explicit schema
+    table = cls.from_pydict(data, schema=schema)
+    assert table.schema == schema
+    assert table.schema.metadata == metadata
+    assert table.to_pydict() == data
+
+    # Cannot pass both schema and metadata
+    with pytest.raises(ValueError):
+        cls.from_pydict(data, schema=schema, metadata=metadata)
+
+    # Non-convertible values given schema
+    with pytest.raises(TypeError):
+        cls.from_pydict({'c0': [0, 1, 2]},
+                        schema=pa.schema([("c0", pa.string())]))
+
+    # Missing schema fields from the passed mapping
+    with pytest.raises(KeyError, match="doesn\'t contain.* c, d"):
+        cls.from_pydict(
+            {'a': [1, 2, 3], 'b': [3, 4, 5]},
+            schema=pa.schema([
+                ('a', pa.int64()),
+                ('c', pa.int32()),
+                ('d', pa.int16())
+            ])
+        )
+
+    # Passed wrong schema type
+    with pytest.raises(TypeError):
+        cls.from_pydict({'a': [1, 2, 3]}, schema={})
+
+
+@pytest.mark.parametrize('data, klass', [
+    ((['', 'foo', 'bar'], [4.5, 5, None]), pa.array),
+    (([[''], ['foo', 'bar']], [[4.5], [5., None]]), pa.chunked_array),
+])
+def test_table_from_pydict_arrow_arrays(data, klass):
+    data = OrderedDict([('strs', klass(data[0])), ('floats', klass(data[1]))])
+    schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float64())])
+
+    # With arrays as values
+    table = pa.Table.from_pydict(data)
+    assert table.num_columns == 2
+    assert table.num_rows == 3
+    assert table.schema == schema
+
+    # With explicit (matching) schema
+    table = pa.Table.from_pydict(data, schema=schema)
+    assert table.num_columns == 2
+    assert table.num_rows == 3
+    assert table.schema == schema
+
+    # with different but compatible schema
+    schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float32())])
+    table = pa.Table.from_pydict(data, schema=schema)
+    assert pa.types.is_float32(table.column('floats').type)
+    assert table.num_columns == 2
+    assert table.num_rows == 3
+    assert table.schema == schema
+
+    # with different and incompatible schema
+    schema = pa.schema([('strs', pa.utf8()), ('floats', pa.timestamp('s'))])
+    with pytest.raises((NotImplementedError, TypeError)):
+        pa.Table.from_pydict(data, schema=schema)
+
+
+@pytest.mark.parametrize('data, klass', [
+    ((['', 'foo', 'bar'], [4.5, 5, None]), list),
+    ((['', 'foo', 'bar'], [4.5, 5, None]), pa.array),
+    (([[''], ['foo', 'bar']], [[4.5], [5., None]]), pa.chunked_array),
+])
+def test_table_from_pydict_schema(data, klass):
+    # passed schema is source of truth for the columns
+
+    data = OrderedDict([('strs', klass(data[0])), ('floats', klass(data[1]))])
+
+    # schema has columns not present in data -> error
+    schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float64()),
+                        ('ints', pa.int64())])
+    with pytest.raises(KeyError, match='ints'):
+        pa.Table.from_pydict(data, schema=schema)
+
+    # data has columns not present in schema -> ignored
+    schema = pa.schema([('strs', pa.utf8())])
+    table = pa.Table.from_pydict(data, schema=schema)
+    assert table.num_columns == 1
+    assert table.schema == schema
+    assert table.column_names == ['strs']
+
+
+@pytest.mark.pandas
+def test_table_from_pandas_schema():
+    # passed schema is source of truth for the columns
+    import pandas as pd
+
+    df = pd.DataFrame(OrderedDict([('strs', ['', 'foo', 'bar']),
+                                   ('floats', [4.5, 5, None])]))
+
+    # with different but compatible schema
+    schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float32())])
+    table = pa.Table.from_pandas(df, schema=schema)
+    assert pa.types.is_float32(table.column('floats').type)
+    assert table.schema.remove_metadata() == schema
+
+    # with different and incompatible schema
+    schema = pa.schema([('strs', pa.utf8()), ('floats', pa.timestamp('s'))])
+    with pytest.raises((NotImplementedError, TypeError)):
+        pa.Table.from_pandas(df, schema=schema)
+
+    # schema has columns not present in data -> error
+    schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float64()),
+                        ('ints', pa.int64())])
+    with pytest.raises(KeyError, match='ints'):
+        pa.Table.from_pandas(df, schema=schema)
+
+    # data has columns not present in schema -> ignored
+    schema = pa.schema([('strs', pa.utf8())])
+    table = pa.Table.from_pandas(df, schema=schema)
+    assert table.num_columns == 1
+    assert table.schema.remove_metadata() == schema
+    assert table.column_names == ['strs']
+
+
+@pytest.mark.pandas
+def test_table_factory_function():
+    import pandas as pd
+
+    # Put in wrong order to make sure that lines up with schema
+    d = OrderedDict([('b', ['a', 'b', 'c']), ('a', [1, 2, 3])])
+
+    d_explicit = {'b': pa.array(['a', 'b', 'c'], type='string'),
+                  'a': pa.array([1, 2, 3], type='int32')}
+
+    schema = pa.schema([('a', pa.int32()), ('b', pa.string())])
+
+    df = pd.DataFrame(d)
+    table1 = pa.table(df)
+    table2 = pa.Table.from_pandas(df)
+    assert table1.equals(table2)
+    table1 = pa.table(df, schema=schema)
+    table2 = pa.Table.from_pandas(df, schema=schema)
+    assert table1.equals(table2)
+
+    table1 = pa.table(d_explicit)
+    table2 = pa.Table.from_pydict(d_explicit)
+    assert table1.equals(table2)
+
+    # schema coerces type
+    table1 = pa.table(d, schema=schema)
+    table2 = pa.Table.from_pydict(d, schema=schema)
+    assert table1.equals(table2)
+
+
+def test_table_factory_function_args():
+    # from_pydict not accepting names:
+    with pytest.raises(ValueError):
+        pa.table({'a': [1, 2, 3]}, names=['a'])
+
+    # backwards compatibility for schema as first positional argument
+    schema = pa.schema([('a', pa.int32())])
+    table = pa.table({'a': pa.array([1, 2, 3], type=pa.int64())}, schema)
+    assert table.column('a').type == pa.int32()
+
+    # from_arrays: accept both names and schema as positional first argument
+    data = [pa.array([1, 2, 3], type='int64')]
+    names = ['a']
+    table = pa.table(data, names)
+    assert table.column_names == names
+    schema = pa.schema([('a', pa.int64())])
+    table = pa.table(data, schema)
+    assert table.column_names == names
+
+
+@pytest.mark.pandas
+def test_table_factory_function_args_pandas():
+    import pandas as pd
+
+    # from_pandas not accepting names or metadata:
+    with pytest.raises(ValueError):
+        pa.table(pd.DataFrame({'a': [1, 2, 3]}), names=['a'])
+
+    with pytest.raises(ValueError):
+        pa.table(pd.DataFrame({'a': [1, 2, 3]}), metadata={b'foo': b'bar'})
+
+    # backwards compatibility for schema as first positional argument
+    schema = pa.schema([('a', pa.int32())])
+    table = pa.table(pd.DataFrame({'a': [1, 2, 3]}), schema)
+    assert table.column('a').type == pa.int32()
+
+
+def test_factory_functions_invalid_input():
+    with pytest.raises(TypeError, match="Expected pandas DataFrame, python"):
+        pa.table("invalid input")
+
+    with pytest.raises(TypeError, match="Expected pandas DataFrame"):
+        pa.record_batch("invalid input")
+
+
+def test_table_repr_to_string():
+    # Schema passed explicitly
+    schema = pa.schema([pa.field('c0', pa.int16(),
+                                 metadata={'key': 'value'}),
+                        pa.field('c1', pa.int32())],
+                       metadata={b'foo': b'bar'})
+
+    tab = pa.table([pa.array([1, 2, 3, 4], type='int16'),
+                    pa.array([10, 20, 30, 40], type='int32')], schema=schema)
+    assert str(tab) == """pyarrow.Table
+c0: int16
+c1: int32
+----
+c0: [[1,2,3,4]]
+c1: [[10,20,30,40]]"""
+
+    assert tab.to_string(show_metadata=True) == """\
+pyarrow.Table
+c0: int16
+  -- field metadata --
+  key: 'value'
+c1: int32
+-- schema metadata --
+foo: 'bar'"""
+
+    assert tab.to_string(preview_cols=5) == """\
+pyarrow.Table
+c0: int16
+c1: int32
+----
+c0: [[1,2,3,4]]
+c1: [[10,20,30,40]]"""
+
+    assert tab.to_string(preview_cols=1) == """\
+pyarrow.Table
+c0: int16
+c1: int32
+----
+c0: [[1,2,3,4]]
+..."""
+
+
+def test_table_repr_to_string_ellipsis():
+    # Schema passed explicitly
+    schema = pa.schema([pa.field('c0', pa.int16(),
+                                 metadata={'key': 'value'}),
+                        pa.field('c1', pa.int32())],
+                       metadata={b'foo': b'bar'})
+
+    tab = pa.table([pa.array([1, 2, 3, 4]*10, type='int16'),
+                    pa.array([10, 20, 30, 40]*10, type='int32')],
+                   schema=schema)
+    assert str(tab) == """pyarrow.Table
+c0: int16
+c1: int32
+----
+c0: [[1,2,3,4,1,2,3,4,1,2,...,3,4,1,2,3,4,1,2,3,4]]
+c1: [[10,20,30,40,10,20,30,40,10,20,...,30,40,10,20,30,40,10,20,30,40]]"""
+
+
+def test_table_function_unicode_schema():
+    col_a = "äääh"
+    col_b = "öööf"
+
+    # Put in wrong order to make sure that lines up with schema
+    d = OrderedDict([(col_b, ['a', 'b', 'c']), (col_a, [1, 2, 3])])
+
+    schema = pa.schema([(col_a, pa.int32()), (col_b, pa.string())])
+
+    result = pa.table(d, schema=schema)
+    assert result[0].chunk(0).equals(pa.array([1, 2, 3], type='int32'))
+    assert result[1].chunk(0).equals(pa.array(['a', 'b', 'c'], type='string'))
+
+
+def test_table_take_vanilla_functionality():
+    table = pa.table(
+        [pa.array([1, 2, 3, None, 5]),
+         pa.array(['a', 'b', 'c', 'd', 'e'])],
+        ['f1', 'f2'])
+
+    assert table.take(pa.array([2, 3])).equals(table.slice(2, 2))
+
+
+def test_table_take_null_index():
+    table = pa.table(
+        [pa.array([1, 2, 3, None, 5]),
+         pa.array(['a', 'b', 'c', 'd', 'e'])],
+        ['f1', 'f2'])
+
+    result_with_null_index = pa.table(
+        [pa.array([1, None]),
+         pa.array(['a', None])],
+        ['f1', 'f2'])
+
+    assert table.take(pa.array([0, None])).equals(result_with_null_index)
+
+
+def test_table_take_non_consecutive():
+    table = pa.table(
+        [pa.array([1, 2, 3, None, 5]),
+         pa.array(['a', 'b', 'c', 'd', 'e'])],
+        ['f1', 'f2'])
+
+    result_non_consecutive = pa.table(
+        [pa.array([2, None]),
+         pa.array(['b', 'd'])],
+        ['f1', 'f2'])
+
+    assert table.take(pa.array([1, 3])).equals(result_non_consecutive)
+
+
+def test_table_select():
+    a1 = pa.array([1, 2, 3, None, 5])
+    a2 = pa.array(['a', 'b', 'c', 'd', 'e'])
+    a3 = pa.array([[1, 2], [3, 4], [5, 6], None, [9, 10]])
+    table = pa.table([a1, a2, a3], ['f1', 'f2', 'f3'])
+
+    # selecting with string names
+    result = table.select(['f1'])
+    expected = pa.table([a1], ['f1'])
+    assert result.equals(expected)
+
+    result = table.select(['f3', 'f2'])
+    expected = pa.table([a3, a2], ['f3', 'f2'])
+    assert result.equals(expected)
+
+    # selecting with integer indices
+    result = table.select([0])
+    expected = pa.table([a1], ['f1'])
+    assert result.equals(expected)
+
+    result = table.select([2, 1])
+    expected = pa.table([a3, a2], ['f3', 'f2'])
+    assert result.equals(expected)
+
+    # preserve metadata
+    table2 = table.replace_schema_metadata({"a": "test"})
+    result = table2.select(["f1", "f2"])
+    assert b"a" in result.schema.metadata
+
+    # selecting non-existing column raises
+    with pytest.raises(KeyError, match='Field "f5" does not exist'):
+        table.select(['f5'])
+
+    with pytest.raises(IndexError, match="index out of bounds"):
+        table.select([5])
+
+    # duplicate selection gives duplicated names in resulting table
+    result = table.select(['f2', 'f2'])
+    expected = pa.table([a2, a2], ['f2', 'f2'])
+    assert result.equals(expected)
+
+    # selection duplicated column raises
+    table = pa.table([a1, a2, a3], ['f1', 'f2', 'f1'])
+    with pytest.raises(KeyError, match='Field "f1" exists 2 times'):
+        table.select(['f1'])
+
+    result = table.select(['f2'])
+    expected = pa.table([a2], ['f2'])
+    assert result.equals(expected)