From 62e4c68907d8d33709c2c1f92a161dff00b3d5f2 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 22:01:36 +0200 Subject: Adding upstream version 0.11.2. Signed-off-by: Daniel Baumann --- src/third-party/ArenaAlloc/arenaalloc.h | 186 ++++++++++++++++++ src/third-party/ArenaAlloc/arenaallocimpl.h | 290 ++++++++++++++++++++++++++++ src/third-party/ArenaAlloc/recyclealloc.h | 184 ++++++++++++++++++ 3 files changed, 660 insertions(+) create mode 100644 src/third-party/ArenaAlloc/arenaalloc.h create mode 100644 src/third-party/ArenaAlloc/arenaallocimpl.h create mode 100644 src/third-party/ArenaAlloc/recyclealloc.h (limited to 'src/third-party/ArenaAlloc') diff --git a/src/third-party/ArenaAlloc/arenaalloc.h b/src/third-party/ArenaAlloc/arenaalloc.h new file mode 100644 index 0000000..dfd648d --- /dev/null +++ b/src/third-party/ArenaAlloc/arenaalloc.h @@ -0,0 +1,186 @@ +// -*- c++ -*- +/****************************************************************************** + * arenaalloc.h + * + * Arena allocator based on the example logic provided by Nicolai Josuttis + * and available at http://www.josuttis.com/libbook/examples.html. + * This enhanced work is provided under the terms of the MIT license. + * + *****************************************************************************/ + +#ifndef _ARENA_ALLOC_H +#define _ARENA_ALLOC_H + +#include +#include + +#if __cplusplus >= 201103L +#include +#include +#endif + +// Define macro ARENA_ALLOC_DEBUG to enable some tracing of the allocator +#include "arenaallocimpl.h" + +namespace ArenaAlloc +{ + + struct _newAllocatorImpl + { + // these two functions should be supported by a specialized + // allocator for shared memory or another source of specialized + // memory such as device mapped memory. + void* allocate( size_t numBytes ) { return new char[ numBytes ]; } + void deallocate( void* ptr ) { delete[]( (char*)ptr ); } + }; + + template > + class Alloc { + + private: + MemblockImpl* m_impl; + + public: + // type definitions + typedef T value_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + +#if __cplusplus >= 201103L + // when containers are swapped, (i.e. vector.swap) + // swap the allocators also. This was not specified in c++98 + // thus users of this code not using c++11 must + // exercise caution when using the swap algorithm or + // specialized swap member function. Specifically, + // don't swap containers not sharing the same + // allocator internal implementation in c++98. This is ok + // in c++11. + typedef std::true_type propagate_on_container_swap; + + // container moves should move the allocator also. + typedef std::true_type propagate_on_container_move_assignment; +#endif + + // rebind allocator to type U + template + struct rebind { + typedef Alloc other; + }; + + // return address of values + pointer address (reference value) const { + return &value; + } + const_pointer address (const_reference value) const { + return &value; + } + + Alloc( std::size_t defaultSize = 32768, AllocatorImpl allocImpl = AllocatorImpl() ) throw(): + m_impl( MemblockImpl::create( defaultSize, allocImpl ) ) + { + } + + Alloc(const Alloc& src) throw(): + m_impl( src.m_impl ) + { + m_impl->incrementRefCount(); + } + + template + Alloc (const Alloc& src) throw(): + m_impl( 0 ) + { + MemblockImpl::assign( src, m_impl ); + m_impl->incrementRefCount(); + } + + ~Alloc() throw() + { + m_impl->decrementRefCount(); + } + + // return maximum number of elements that can be allocated + size_type max_size () const throw() + { + return std::numeric_limits::max() / sizeof(T); + } + + // allocate but don't initialize num elements of type T + pointer allocate (size_type num, const void* = 0) + { + return reinterpret_cast( m_impl->allocate(num*sizeof(T)) ); + } + + // initialize elements of allocated storage p with value value +#if __cplusplus >= 201103L + + // use c++11 style forwarding to construct the object + template< typename P, typename... Args> + void construct( P* obj, Args&&... args ) + { + ::new((void*) obj ) P( std::forward( args )... ); + } + + template< typename P > + void destroy( P* obj ) { obj->~P(); } + +#else + void construct (pointer p, const T& value) + { + new((void*)p)T(value); + } + void destroy (pointer p) { p->~T(); } +#endif + + // deallocate storage p of deleted elements + void deallocate (pointer p, size_type num) + { + m_impl->deallocate( p ); + } + + bool equals( const MemblockImpl * impl ) const + { + return impl == m_impl; + } + + bool operator == ( const Alloc& t2 ) const + { + return m_impl == t2.m_impl; + } + + friend MemblockImpl; + + template< typename Other > + bool operator == ( const Alloc< Other, AllocatorImpl, MemblockImpl >& t2 ) + { + return t2.equals( m_impl ); + } + + template< typename Other > + bool operator != ( const Alloc< Other, AllocatorImpl, MemblockImpl >& t2 ) + { + return !t2.equals( m_impl ); + } + + // These are extension functions not required for an stl allocator + size_t getNumAllocations() { return m_impl->getNumAllocations(); } + size_t getNumDeallocations() { return m_impl->getNumDeallocations(); } + size_t getNumBytesAllocated() { return m_impl->getNumBytesAllocated(); } + }; + + template + template + void _memblockimpl::assign( const Alloc >& src, _memblockimpl *& dest ) + { + dest = const_cast<_memblockimpl* >(src.m_impl); + } + +} + +#endif diff --git a/src/third-party/ArenaAlloc/arenaallocimpl.h b/src/third-party/ArenaAlloc/arenaallocimpl.h new file mode 100644 index 0000000..879e0d2 --- /dev/null +++ b/src/third-party/ArenaAlloc/arenaallocimpl.h @@ -0,0 +1,290 @@ +// -*- c++ -*- +/****************************************************************************** + ** arenaallocimpl.h + ** + ** Internal implementation types of the arena allocator + ** MIT license + *****************************************************************************/ + +#ifndef _ARENA_ALLOC_IMPL_H +#define _ARENA_ALLOC_IMPL_H + +#ifdef ARENA_ALLOC_DEBUG +#include +#endif + +#include + +namespace ArenaAlloc +{ + + template< typename T, typename A, typename M > + class Alloc; + + // internal structure for tracking memory blocks + template < typename AllocImpl > + struct _memblock + { + // allocations are rounded up to a multiple of the size of this + // struct to maintain proper alignment for any pointer and double + // values stored in the allocation. + // A future goal is to support even stricter alignment for example + // to support cache alignment, special device dependent mappings, + // or GPU ops. + union _roundsize { + double d; + void* p; + }; + + _memblock* m_next{nullptr}; // blocks kept link listed for cleanup at end + std::size_t m_bufferSize; // size of the buffer + std::size_t m_index; // index of next allocatable byte in the block + char* m_buffer; // pointer to large block to allocate from + + _memblock(std::size_t bufferSize, AllocImpl& allocImpl) + : m_bufferSize(roundSize(bufferSize)), m_index(0), + m_buffer(reinterpret_cast(allocImpl.allocate( + bufferSize))) // this works b/c of order of decl + { + } + + std::size_t roundSize( std::size_t numBytes ) + { + // this is subject to overflow. calling logic should not permit + // an attempt to allocate a really massive size. + // i.e. an attempt to allocate 10s of terabytes should be an error + return ( ( numBytes + sizeof( _roundsize ) - 1 ) / + sizeof( _roundsize ) ) * sizeof( _roundsize ); + } + + char * allocate( std::size_t numBytes ) + { + std::size_t roundedSize = roundSize( numBytes ); + if( roundedSize + m_index > m_bufferSize ) + return 0; + + char * ptrToReturn = &m_buffer[ m_index ]; + m_index += roundedSize; + return ptrToReturn; + } + + void dispose( AllocImpl& impl ) + { + impl.deallocate( m_buffer ); + } + + ~_memblock() + { + } + }; + + template< typename AllocatorImpl, typename Derived > + struct _memblockimplbase + { + AllocatorImpl m_alloc; + std::size_t m_refCount; // when refs -> 0 delete this + std::size_t m_defaultSize; + + std::size_t m_numAllocate; // number of times allocate called + std::size_t m_numDeallocate; // number of time deallocate called + std::size_t m_numBytesAllocated; // A good estimate of amount of space used + + _memblock * m_head; + _memblock * m_current; + + // round up 2 next power of 2 if not already + // a power of 2 + std::size_t roundpow2( std::size_t value ) + { + // note this works because subtracting 1 is equivalent to + // inverting the lowest set bit and complementing any + // bits lower than that. only a power of 2 + // will yield 0 in the following check + if( 0 == ( value & ( value - 1 ) ) ) + return value; // already a power of 2 + + // fold t over itself. This will set all bits after the highest set bit of t to 1 + // who said bit twiddling wasn't practical? + value |= value >> 1; + value |= value >> 2; + value |= value >> 4; + value |= value >> 8; + value |= value >> 16; +#if SIZE_MAX > UINT32_MAX + value |= value >> 32; +#endif + + return value + 1; + } + + _memblockimplbase( std::size_t defaultSize, AllocatorImpl& allocator ): + m_alloc( allocator ), + m_refCount( 1 ), + m_defaultSize( defaultSize ), + m_numAllocate( 0 ), + m_numDeallocate( 0 ), + m_numBytesAllocated( 0 ), + m_head( 0 ), + m_current( 0 ) + { + if( m_defaultSize < 256 ) + { + m_defaultSize = 256; // anything less is academic. a more practical size is 4k or more + } + else if ( m_defaultSize > 1024UL*1024*1024*16 ) + { + // when this becomes a problem, this package has succeeded beyond my wildest expectations + m_defaultSize = 1024UL*1024*1024*16; + } + + // for convenience block size should be a power of 2 + // round up to next power of 2 + m_defaultSize = roundpow2( m_defaultSize ); + allocateNewBlock( m_defaultSize ); + } + + char * allocate( std::size_t numBytes ) + { + char * ptrToReturn = m_current->allocate( numBytes ); + if( !ptrToReturn ) + { + allocateNewBlock( numBytes > m_defaultSize / 2 ? roundpow2( numBytes*2 ) : + m_defaultSize ); + + ptrToReturn = m_current->allocate( numBytes ); + } + +#ifdef ARENA_ALLOC_DEBUG + fprintf( stdout, "_memblockimpl=%p allocated %ld bytes at address=%p\n", this, numBytes, ptrToReturn ); +#endif + + ++ m_numAllocate; + m_numBytesAllocated += numBytes; // does not account for the small overhead in tracking the allocation + + return ptrToReturn; + } + + void allocateNewBlock( std::size_t blockSize ) + { + _memblock * newBlock = new ( m_alloc.allocate( sizeof( _memblock ) ) ) + _memblock( blockSize, m_alloc ); + +#ifdef ARENA_ALLOC_DEBUG + fprintf( stdout, "_memblockimplbase=%p allocating a new block of size=%ld\n", this, blockSize ); +#endif + + if( m_head == 0 ) + { + m_head = m_current = newBlock; + } + else + { + m_current->m_next = newBlock; + m_current = newBlock; + } + } + + void deallocate( void * ptr ) + { + ++ m_numDeallocate; + } + + size_t getNumAllocations() { return m_numAllocate; } + size_t getNumDeallocations() { return m_numDeallocate; } + size_t getNumBytesAllocated() { return m_numBytesAllocated; } + + void clear() + { + _memblock * block = m_head; + while( block ) + { + _memblock * curr = block; + block = block->m_next; + curr->dispose( m_alloc ); + curr->~_memblock(); + m_alloc.deallocate( curr ); + } + } + + // The ref counting model does not permit the sharing of + // this object across multiple threads unless an external locking mechanism is applied + // to ensure the atomicity of the reference count. + void incrementRefCount() + { + ++m_refCount; +#ifdef ARENA_ALLOC_DEBUG + fprintf( stdout, "ref count on _memblockimplbase=%p incremented to %ld\n", this, m_refCount ); +#endif + } + + void decrementRefCount() + { + --m_refCount; +#ifdef ARENA_ALLOC_DEBUG + fprintf( stdout, "ref count on _memblockimplbase=%p decremented to %ld\n", this, m_refCount ); +#endif + + if( m_refCount == 0 ) + { + Derived::destroy( static_cast(this) ); + } + } + }; + + + // Each allocator points to an instance of _memblockimpl which + // contains the list of _memblock objects and other tracking info + // including a refcount. + // This object is instantiated in space obtained from the allocator + // implementation. The allocator implementation is the component + // on which allocate/deallocate are called to obtain storage from. + template< typename AllocatorImpl > + struct _memblockimpl : public _memblockimplbase > + { + private: + + typedef struct _memblockimplbase< AllocatorImpl, _memblockimpl > base_t; + friend struct _memblockimplbase< AllocatorImpl, _memblockimpl >; + + // to get around some sticky access issues between Alloc and Alloc when sharing + // the implementation. + template + friend class Alloc; + + template< typename T > + static void assign( const Alloc >& src, + _memblockimpl *& dest ); + + static _memblockimpl * create( size_t defaultSize, AllocatorImpl& alloc ) + { + return new ( alloc.allocate( sizeof( _memblockimpl ) ) ) _memblockimpl( defaultSize, + alloc ); + } + + static void destroy( _memblockimpl * objToDestroy ) + { + AllocatorImpl allocImpl = objToDestroy->m_alloc; + objToDestroy-> ~_memblockimpl(); + allocImpl.deallocate( objToDestroy ); + } + + _memblockimpl( std::size_t defaultSize, AllocatorImpl& allocImpl ): + _memblockimplbase >( defaultSize, allocImpl ) + { +#ifdef ARENA_ALLOC_DEBUG + fprintf( stdout, "_memblockimpl=%p constructed with default size=%ld\n", this, + base_t::m_defaultSize ); +#endif + } + + ~_memblockimpl( ) + { +#ifdef ARENA_ALLOC_DEBUG + fprintf( stdout, "~memblockimpl() called on _memblockimpl=%p\n", this ); +#endif + base_t::clear(); + } + }; +} + +#endif diff --git a/src/third-party/ArenaAlloc/recyclealloc.h b/src/third-party/ArenaAlloc/recyclealloc.h new file mode 100644 index 0000000..129e43b --- /dev/null +++ b/src/third-party/ArenaAlloc/recyclealloc.h @@ -0,0 +1,184 @@ +// -*- c++ -*- +/****************************************************************************** + ** recyclealloc.h + ** + ** Arena allocator with some modest recycling of freed resources. + ** MIT license + ** + *****************************************************************************/ +#ifndef _RECYCLE_ALLOC_H +#define _RECYCLE_ALLOC_H + +#include "arenaalloc.h" +#include +#include + +namespace ArenaAlloc +{ + + // todo: + // attempt refactor of boilerplate in _memblockimpl and _recycleallocimpl + template< typename AllocatorImpl, uint16_t StepSize = 16, uint16_t NumBuckets = 256 > + struct _recycleallocimpl : public _memblockimplbase > + { + private: + + static_assert( ( StepSize >= 16 && NumBuckets >= 16 ), "Min step size=16, Min num buckets=16" ); + static_assert( !( StepSize & ( StepSize - 1 ) ), "Step size must be a power of 2" ); + + struct _freeEntry + { + // note: order of declaration matters + std::size_t m_size; + _freeEntry * m_next; + }; + + _freeEntry * m_buckets[ NumBuckets ]; // m_buckets[ NumBuckets - 1 ] is the oversize bucket + + typedef struct _memblockimplbase< AllocatorImpl, _recycleallocimpl > base_t; + friend struct _memblockimplbase< AllocatorImpl, _recycleallocimpl >; + + // to get around some sticky access issues between Alloc and Alloc when sharing + // the implementation. + template + friend class Alloc; + + template< typename T > + static void assign( const Alloc >& src, + _recycleallocimpl *& dest ) + { + dest = const_cast< _recycleallocimpl* >( src.m_impl ); + } + + static _recycleallocimpl * create( std::size_t defaultSize, AllocatorImpl& alloc ) + { + return new ( + alloc.allocate( sizeof( _recycleallocimpl ) ) ) _recycleallocimpl( defaultSize, + alloc ); + } + + static void destroy( _recycleallocimpl * objToDestroy ) + { + AllocatorImpl allocImpl = objToDestroy->m_alloc; + objToDestroy-> ~_recycleallocimpl(); + allocImpl.deallocate( objToDestroy ); + } + + _recycleallocimpl( std::size_t defaultSize, AllocatorImpl& allocImpl ): + _memblockimplbase >( defaultSize, allocImpl ) + { + memset( m_buckets, 0, sizeof( m_buckets ) ); + +#ifdef ARENA_ALLOC_DEBUG + fprintf( stdout, "_recycleallocimpl=%p constructed with default size=%ld\n", this, + base_t::m_defaultSize ); +#endif + } + + ~_recycleallocimpl( ) + { +#ifdef ARENA_ALLOC_DEBUG + fprintf( stdout, "~_recycleallocimpl() called on _recycleallocimpl=%p\n", this ); +#endif + base_t::clear(); + } + + char * allocate( std::size_t numBytes ) + { + + numBytes = ( (numBytes + sizeof( std::size_t ) + StepSize - 1) / StepSize ) * StepSize; + + char * returnValue = allocateInternal( numBytes ); + if( !returnValue ) + { + char * allocValue = base_t::allocate( numBytes ); + + if( !allocValue ) + return 0; //allocation failure + + *((std::size_t*)allocValue ) = numBytes; // that includes the header + return allocValue + sizeof( std::size_t ); + } + + return returnValue; + } + + void deallocate( void * ptr ) + { + deallocateInternal( reinterpret_cast(ptr) ); + base_t::deallocate( ptr ); // this is called b/c it is known this just updates stats + } + + char * allocateInternal( std::size_t numBytes ) + { + // numBytes must already be rounded to a multiple of stepsize and have an + // extra sizeof( std::size_t ) bytes tacked on for the header + // pointer returned points sizeof( std::size_t ) bytes into the allocation + // bucket 0 is always null in this scheme. + + uint16_t bucketNumber = numBytes / StepSize; + + if( bucketNumber > NumBuckets - 1 ) + bucketNumber = NumBuckets - 1; // oversize alloc + + // search max 3 consecutive buckets for an item large enough. + // in the oversize bucket and only in the oversize bucket, + // search upto 3 items into the linked list for an entry + // large enough for the specified size + for( uint16_t bkt = bucketNumber, i = 0; i < 3 && bkt < NumBuckets; ++i, ++bkt ) + { + if( m_buckets[ bkt ] ) + return allocateFrom( numBytes, m_buckets[ bkt ] ); + } + + return 0; + } + + char * allocateFrom( std::size_t numBytes, _freeEntry *& head ) + { + _freeEntry * current = head; + _freeEntry * prev = 0; + + int count = 0; + + while( current && count < 3 ) + { + if( current->m_size >= numBytes ) + { + if( prev == 0 ) + head = current->m_next; + else + prev->m_next = current->m_next; + + return reinterpret_cast(¤t->m_next); + } + + ++count; + prev = current; + current = current->m_next; + } + + return 0; + } + + void deallocateInternal( char * ptr ) + { + _freeEntry * v = reinterpret_cast< _freeEntry* >( ptr - sizeof( std::size_t ) ); + uint16_t bucketNumber = v->m_size / StepSize; + + if( bucketNumber > NumBuckets - 1 ) + bucketNumber = NumBuckets - 1; + + _freeEntry * next = m_buckets[ bucketNumber ]; + v->m_next = next; + m_buckets[ bucketNumber ] = v; + } + + }; + + template< typename T, typename Allocator = _newAllocatorImpl > + using RecycleAlloc = Alloc< T, Allocator, _recycleallocimpl >; + +} + +#endif -- cgit v1.2.3