summaryrefslogtreecommitdiffstats
path: root/src/arrow/python/pyarrow/builder.pxi
blob: a34ea5412e14a8e5e7433a8b640f31fd41be1107 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


cdef class StringBuilder(_Weakrefable):
    """
    Builder class for UTF8 strings.

    This class exposes facilities for incrementally adding string values and
    building the null bitmap for a pyarrow.Array (type='string').
    """
    cdef:
        unique_ptr[CStringBuilder] builder

    def __cinit__(self, MemoryPool memory_pool=None):
        cdef CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
        self.builder.reset(new CStringBuilder(pool))

    def append(self, value):
        """
        Append a single value to the builder.

        The value can either be a string/bytes object or a null value
        (np.nan or None).

        Parameters
        ----------
        value : string/bytes or np.nan/None
            The value to append to the string array builder.
        """
        if value is None or value is np.nan:
            self.builder.get().AppendNull()
        elif isinstance(value, (bytes, str)):
            self.builder.get().Append(tobytes(value))
        else:
            raise TypeError('StringBuilder only accepts string objects')

    def append_values(self, values):
        """
        Append all the values from an iterable.

        Parameters
        ----------
        values : iterable of string/bytes or np.nan/None values
            The values to append to the string array builder.
        """
        for value in values:
            self.append(value)

    def finish(self):
        """
        Return result of builder as an Array object; also resets the builder.

        Returns
        -------
        array : pyarrow.Array
        """
        cdef shared_ptr[CArray] out
        with nogil:
            self.builder.get().Finish(&out)
        return pyarrow_wrap_array(out)

    @property
    def null_count(self):
        return self.builder.get().null_count()

    def __len__(self):
        return self.builder.get().length()