src/arrow/python/pyarrow/memory.pxi


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# cython: profile=False
# distutils: language = c++
# cython: embedsignature = True


cdef class MemoryPool(_Weakrefable):
    """
    Base class for memory allocation.

    Besides tracking its number of allocated bytes, a memory pool also
    takes care of the required 64-byte alignment for Arrow data.
    """

    def __init__(self):
        raise TypeError("Do not call {}'s constructor directly, "
                        "use pyarrow.*_memory_pool instead."
                        .format(self.__class__.__name__))

    cdef void init(self, CMemoryPool* pool):
        self.pool = pool

    def release_unused(self):
        """
        Attempt to return to the OS any memory being held onto by the pool.

        This function should not be called except potentially for
        benchmarking or debugging as it could be expensive and detrimental to
        performance.

        This is best effort and may not have any effect on some memory pools
        or in some situations (e.g. fragmentation).
        """
        cdef CMemoryPool* pool = c_get_memory_pool()
        with nogil:
            pool.ReleaseUnused()

    def bytes_allocated(self):
        """
        Return the number of bytes that are currently allocated from this
        memory pool.
        """
        return self.pool.bytes_allocated()

    def max_memory(self):
        """
        Return the peak memory allocation in this memory pool.
        This can be an approximate number in multi-threaded applications.

        None is returned if the pool implementation doesn't know how to
        compute this number.
        """
        ret = self.pool.max_memory()
        return ret if ret >= 0 else None

    @property
    def backend_name(self):
        """
        The name of the backend used by this MemoryPool (e.g. "jemalloc").
        """
        return frombytes(self.pool.backend_name())


cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
    if memory_pool is None:
        return c_get_memory_pool()
    else:
        return memory_pool.pool


cdef class LoggingMemoryPool(MemoryPool):
    cdef:
        unique_ptr[CLoggingMemoryPool] logging_pool

    def __init__(self):
        raise TypeError("Do not call {}'s constructor directly, "
                        "use pyarrow.logging_memory_pool instead."
                        .format(self.__class__.__name__))


cdef class ProxyMemoryPool(MemoryPool):
    """
    Memory pool implementation that tracks the number of bytes and
    maximum memory allocated through its direct calls, while redirecting
    to another memory pool.
    """
    cdef:
        unique_ptr[CProxyMemoryPool] proxy_pool

    def __init__(self):
        raise TypeError("Do not call {}'s constructor directly, "
                        "use pyarrow.proxy_memory_pool instead."
                        .format(self.__class__.__name__))


def default_memory_pool():
    """
    Return the process-global memory pool.
    """
    cdef:
        MemoryPool pool = MemoryPool.__new__(MemoryPool)
    pool.init(c_get_memory_pool())
    return pool


def proxy_memory_pool(MemoryPool parent):
    """
    Create and return a MemoryPool instance that redirects to the
    *parent*, but with separate allocation statistics.

    Parameters
    ----------
    parent : MemoryPool
        The real memory pool that should be used for allocations.
    """
    cdef ProxyMemoryPool out = ProxyMemoryPool.__new__(ProxyMemoryPool)
    out.proxy_pool.reset(new CProxyMemoryPool(parent.pool))
    out.init(out.proxy_pool.get())
    return out


def logging_memory_pool(MemoryPool parent):
    """
    Create and return a MemoryPool instance that redirects to the
    *parent*, but also dumps allocation logs on stderr.

    Parameters
    ----------
    parent : MemoryPool
        The real memory pool that should be used for allocations.
    """
    cdef LoggingMemoryPool out = LoggingMemoryPool.__new__(
        LoggingMemoryPool, parent)
    out.logging_pool.reset(new CLoggingMemoryPool(parent.pool))
    out.init(out.logging_pool.get())
    return out


def system_memory_pool():
    """
    Return a memory pool based on the C malloc heap.
    """
    cdef:
        MemoryPool pool = MemoryPool.__new__(MemoryPool)
    pool.init(c_system_memory_pool())
    return pool


def jemalloc_memory_pool():
    """
    Return a memory pool based on the jemalloc heap.

    NotImplementedError is raised if jemalloc support is not enabled.
    """
    cdef:
        CMemoryPool* c_pool
        MemoryPool pool = MemoryPool.__new__(MemoryPool)
    check_status(c_jemalloc_memory_pool(&c_pool))
    pool.init(c_pool)
    return pool


def mimalloc_memory_pool():
    """
    Return a memory pool based on the mimalloc heap.

    NotImplementedError is raised if mimalloc support is not enabled.
    """
    cdef:
        CMemoryPool* c_pool
        MemoryPool pool = MemoryPool.__new__(MemoryPool)
    check_status(c_mimalloc_memory_pool(&c_pool))
    pool.init(c_pool)
    return pool


def set_memory_pool(MemoryPool pool):
    """
    Set the default memory pool.

    Parameters
    ----------
    pool : MemoryPool
        The memory pool that should be used by default.
    """
    c_set_default_memory_pool(pool.pool)


cdef MemoryPool _default_memory_pool = default_memory_pool()
cdef LoggingMemoryPool _logging_memory_pool = logging_memory_pool(
    _default_memory_pool)


def log_memory_allocations(enable=True):
    """
    Enable or disable memory allocator logging for debugging purposes

    Parameters
    ----------
    enable : bool, default True
        Pass False to disable logging
    """
    if enable:
        set_memory_pool(_logging_memory_pool)
    else:
        set_memory_pool(_default_memory_pool)


def total_allocated_bytes():
    """
    Return the currently allocated bytes from the default memory pool.
    Other memory pools may not be accounted for.
    """
    cdef CMemoryPool* pool = c_get_memory_pool()
    return pool.bytes_allocated()


def jemalloc_set_decay_ms(decay_ms):
    """
    Set arenas.dirty_decay_ms and arenas.muzzy_decay_ms to indicated number of
    milliseconds. A value of 0 (the default) results in dirty / muzzy memory
    pages being released right away to the OS, while a higher value will result
    in a time-based decay. See the jemalloc docs for more information

    It's best to set this at the start of your application.

    Parameters
    ----------
    decay_ms : int
        Number of milliseconds to set for jemalloc decay conf parameters. Note
        that this change will only affect future memory arenas
    """
    check_status(c_jemalloc_set_decay_ms(decay_ms))