summaryrefslogtreecommitdiffstats
path: root/src/arrow/python/pyarrow/memory.pxi
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/python/pyarrow/memory.pxi
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/python/pyarrow/memory.pxi')
-rw-r--r--src/arrow/python/pyarrow/memory.pxi249
1 files changed, 249 insertions, 0 deletions
diff --git a/src/arrow/python/pyarrow/memory.pxi b/src/arrow/python/pyarrow/memory.pxi
new file mode 100644
index 000000000..8ccb35058
--- /dev/null
+++ b/src/arrow/python/pyarrow/memory.pxi
@@ -0,0 +1,249 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+
+cdef class MemoryPool(_Weakrefable):
+ """
+ Base class for memory allocation.
+
+ Besides tracking its number of allocated bytes, a memory pool also
+ takes care of the required 64-byte alignment for Arrow data.
+ """
+
+ def __init__(self):
+ raise TypeError("Do not call {}'s constructor directly, "
+ "use pyarrow.*_memory_pool instead."
+ .format(self.__class__.__name__))
+
+ cdef void init(self, CMemoryPool* pool):
+ self.pool = pool
+
+ def release_unused(self):
+ """
+ Attempt to return to the OS any memory being held onto by the pool.
+
+ This function should not be called except potentially for
+ benchmarking or debugging as it could be expensive and detrimental to
+ performance.
+
+ This is best effort and may not have any effect on some memory pools
+ or in some situations (e.g. fragmentation).
+ """
+ cdef CMemoryPool* pool = c_get_memory_pool()
+ with nogil:
+ pool.ReleaseUnused()
+
+ def bytes_allocated(self):
+ """
+ Return the number of bytes that are currently allocated from this
+ memory pool.
+ """
+ return self.pool.bytes_allocated()
+
+ def max_memory(self):
+ """
+ Return the peak memory allocation in this memory pool.
+ This can be an approximate number in multi-threaded applications.
+
+ None is returned if the pool implementation doesn't know how to
+ compute this number.
+ """
+ ret = self.pool.max_memory()
+ return ret if ret >= 0 else None
+
+ @property
+ def backend_name(self):
+ """
+ The name of the backend used by this MemoryPool (e.g. "jemalloc").
+ """
+ return frombytes(self.pool.backend_name())
+
+
+cdef CMemoryPool* maybe_unbox_memory_pool(MemoryPool memory_pool):
+ if memory_pool is None:
+ return c_get_memory_pool()
+ else:
+ return memory_pool.pool
+
+
+cdef class LoggingMemoryPool(MemoryPool):
+ cdef:
+ unique_ptr[CLoggingMemoryPool] logging_pool
+
+ def __init__(self):
+ raise TypeError("Do not call {}'s constructor directly, "
+ "use pyarrow.logging_memory_pool instead."
+ .format(self.__class__.__name__))
+
+
+cdef class ProxyMemoryPool(MemoryPool):
+ """
+ Memory pool implementation that tracks the number of bytes and
+ maximum memory allocated through its direct calls, while redirecting
+ to another memory pool.
+ """
+ cdef:
+ unique_ptr[CProxyMemoryPool] proxy_pool
+
+ def __init__(self):
+ raise TypeError("Do not call {}'s constructor directly, "
+ "use pyarrow.proxy_memory_pool instead."
+ .format(self.__class__.__name__))
+
+
+def default_memory_pool():
+ """
+ Return the process-global memory pool.
+ """
+ cdef:
+ MemoryPool pool = MemoryPool.__new__(MemoryPool)
+ pool.init(c_get_memory_pool())
+ return pool
+
+
+def proxy_memory_pool(MemoryPool parent):
+ """
+ Create and return a MemoryPool instance that redirects to the
+ *parent*, but with separate allocation statistics.
+
+ Parameters
+ ----------
+ parent : MemoryPool
+ The real memory pool that should be used for allocations.
+ """
+ cdef ProxyMemoryPool out = ProxyMemoryPool.__new__(ProxyMemoryPool)
+ out.proxy_pool.reset(new CProxyMemoryPool(parent.pool))
+ out.init(out.proxy_pool.get())
+ return out
+
+
+def logging_memory_pool(MemoryPool parent):
+ """
+ Create and return a MemoryPool instance that redirects to the
+ *parent*, but also dumps allocation logs on stderr.
+
+ Parameters
+ ----------
+ parent : MemoryPool
+ The real memory pool that should be used for allocations.
+ """
+ cdef LoggingMemoryPool out = LoggingMemoryPool.__new__(
+ LoggingMemoryPool, parent)
+ out.logging_pool.reset(new CLoggingMemoryPool(parent.pool))
+ out.init(out.logging_pool.get())
+ return out
+
+
+def system_memory_pool():
+ """
+ Return a memory pool based on the C malloc heap.
+ """
+ cdef:
+ MemoryPool pool = MemoryPool.__new__(MemoryPool)
+ pool.init(c_system_memory_pool())
+ return pool
+
+
+def jemalloc_memory_pool():
+ """
+ Return a memory pool based on the jemalloc heap.
+
+ NotImplementedError is raised if jemalloc support is not enabled.
+ """
+ cdef:
+ CMemoryPool* c_pool
+ MemoryPool pool = MemoryPool.__new__(MemoryPool)
+ check_status(c_jemalloc_memory_pool(&c_pool))
+ pool.init(c_pool)
+ return pool
+
+
+def mimalloc_memory_pool():
+ """
+ Return a memory pool based on the mimalloc heap.
+
+ NotImplementedError is raised if mimalloc support is not enabled.
+ """
+ cdef:
+ CMemoryPool* c_pool
+ MemoryPool pool = MemoryPool.__new__(MemoryPool)
+ check_status(c_mimalloc_memory_pool(&c_pool))
+ pool.init(c_pool)
+ return pool
+
+
+def set_memory_pool(MemoryPool pool):
+ """
+ Set the default memory pool.
+
+ Parameters
+ ----------
+ pool : MemoryPool
+ The memory pool that should be used by default.
+ """
+ c_set_default_memory_pool(pool.pool)
+
+
+cdef MemoryPool _default_memory_pool = default_memory_pool()
+cdef LoggingMemoryPool _logging_memory_pool = logging_memory_pool(
+ _default_memory_pool)
+
+
+def log_memory_allocations(enable=True):
+ """
+ Enable or disable memory allocator logging for debugging purposes
+
+ Parameters
+ ----------
+ enable : bool, default True
+ Pass False to disable logging
+ """
+ if enable:
+ set_memory_pool(_logging_memory_pool)
+ else:
+ set_memory_pool(_default_memory_pool)
+
+
+def total_allocated_bytes():
+ """
+ Return the currently allocated bytes from the default memory pool.
+ Other memory pools may not be accounted for.
+ """
+ cdef CMemoryPool* pool = c_get_memory_pool()
+ return pool.bytes_allocated()
+
+
+def jemalloc_set_decay_ms(decay_ms):
+ """
+ Set arenas.dirty_decay_ms and arenas.muzzy_decay_ms to indicated number of
+ milliseconds. A value of 0 (the default) results in dirty / muzzy memory
+ pages being released right away to the OS, while a higher value will result
+ in a time-based decay. See the jemalloc docs for more information
+
+ It's best to set this at the start of your application.
+
+ Parameters
+ ----------
+ decay_ms : int
+ Number of milliseconds to set for jemalloc decay conf parameters. Note
+ that this change will only affect future memory arenas
+ """
+ check_status(c_jemalloc_set_decay_ms(decay_ms))