// -*- c++ -*- /****************************************************************************** ** arenaallocimpl.h ** ** Internal implementation types of the arena allocator ** MIT license *****************************************************************************/ #ifndef _ARENA_ALLOC_IMPL_H #define _ARENA_ALLOC_IMPL_H #ifdef ARENA_ALLOC_DEBUG #include #endif namespace ArenaAlloc { template< typename T, typename A, typename M > class Alloc; // internal structure for tracking memory blocks template < typename AllocImpl > struct _memblock { // allocations are rounded up to a multiple of the size of this // struct to maintain proper alignment for any pointer and double // values stored in the allocation. // A future goal is to support even stricter alignment for example // to support cache alignment, special device dependent mappings, // or GPU ops. union _roundsize { double d; void* p; }; _memblock* m_next{nullptr}; // blocks kept link listed for cleanup at end std::size_t m_bufferSize; // size of the buffer std::size_t m_index; // index of next allocatable byte in the block char* m_buffer; // pointer to large block to allocate from _memblock(std::size_t bufferSize, AllocImpl& allocImpl) : m_bufferSize(roundSize(bufferSize)), m_index(0), m_buffer(reinterpret_cast(allocImpl.allocate( bufferSize))) // this works b/c of order of decl { } std::size_t roundSize( std::size_t numBytes ) { // this is subject to overflow. calling logic should not permit // an attempt to allocate a really massive size. // i.e. an attempt to allocate 10s of terabytes should be an error return ( ( numBytes + sizeof( _roundsize ) - 1 ) / sizeof( _roundsize ) ) * sizeof( _roundsize ); } char * allocate( std::size_t numBytes ) { std::size_t roundedSize = roundSize( numBytes ); if( roundedSize + m_index > m_bufferSize ) return 0; char * ptrToReturn = &m_buffer[ m_index ]; m_index += roundedSize; return ptrToReturn; } void dispose( AllocImpl& impl ) { impl.deallocate( m_buffer ); } ~_memblock() { } }; template< typename AllocatorImpl, typename Derived > struct _memblockimplbase { AllocatorImpl m_alloc; std::size_t m_refCount; // when refs -> 0 delete this std::size_t m_defaultSize; std::size_t m_numAllocate; // number of times allocate called std::size_t m_numDeallocate; // number of time deallocate called std::size_t m_numBytesAllocated; // A good estimate of amount of space used _memblock * m_head; _memblock * m_current; // round up 2 next power of 2 if not already // a power of 2 std::size_t roundpow2( std::size_t value ) { // note this works because subtracting 1 is equivalent to // inverting the lowest set bit and complementing any // bits lower than that. only a power of 2 // will yield 0 in the following check if( 0 == ( value & ( value - 1 ) ) ) return value; // already a power of 2 // fold t over itself. This will set all bits after the highest set bit of t to 1 // who said bit twiddling wasn't practical? value |= value >> 1; value |= value >> 2; value |= value >> 4; value |= value >> 8; value |= value >> 16; value |= value >> 32; return value + 1; } _memblockimplbase( std::size_t defaultSize, AllocatorImpl& allocator ): m_alloc( allocator ), m_refCount( 1 ), m_defaultSize( defaultSize ), m_numAllocate( 0 ), m_numDeallocate( 0 ), m_numBytesAllocated( 0 ), m_head( 0 ), m_current( 0 ) { if( m_defaultSize < 256 ) { m_defaultSize = 256; // anything less is academic. a more practical size is 4k or more } else if ( m_defaultSize > 1024UL*1024*1024*16 ) { // when this becomes a problem, this package has succeeded beyond my wildest expectations m_defaultSize = 1024UL*1024*1024*16; } // for convenience block size should be a power of 2 // round up to next power of 2 m_defaultSize = roundpow2( m_defaultSize ); allocateNewBlock( m_defaultSize ); } char * allocate( std::size_t numBytes ) { char * ptrToReturn = m_current->allocate( numBytes ); if( !ptrToReturn ) { allocateNewBlock( numBytes > m_defaultSize / 2 ? roundpow2( numBytes*2 ) : m_defaultSize ); ptrToReturn = m_current->allocate( numBytes ); } #ifdef ARENA_ALLOC_DEBUG fprintf( stdout, "_memblockimpl=%p allocated %ld bytes at address=%p\n", this, numBytes, ptrToReturn ); #endif ++ m_numAllocate; m_numBytesAllocated += numBytes; // does not account for the small overhead in tracking the allocation return ptrToReturn; } void allocateNewBlock( std::size_t blockSize ) { _memblock * newBlock = new ( m_alloc.allocate( sizeof( _memblock ) ) ) _memblock( blockSize, m_alloc ); #ifdef ARENA_ALLOC_DEBUG fprintf( stdout, "_memblockimplbase=%p allocating a new block of size=%ld\n", this, blockSize ); #endif if( m_head == 0 ) { m_head = m_current = newBlock; } else { m_current->m_next = newBlock; m_current = newBlock; } } void deallocate( void * ptr ) { ++ m_numDeallocate; } size_t getNumAllocations() { return m_numAllocate; } size_t getNumDeallocations() { return m_numDeallocate; } size_t getNumBytesAllocated() { return m_numBytesAllocated; } void clear() { _memblock * block = m_head; while( block ) { _memblock * curr = block; block = block->m_next; curr->dispose( m_alloc ); curr->~_memblock(); m_alloc.deallocate( curr ); } } // The ref counting model does not permit the sharing of // this object across multiple threads unless an external locking mechanism is applied // to ensure the atomicity of the reference count. void incrementRefCount() { ++m_refCount; #ifdef ARENA_ALLOC_DEBUG fprintf( stdout, "ref count on _memblockimplbase=%p incremented to %ld\n", this, m_refCount ); #endif } void decrementRefCount() { --m_refCount; #ifdef ARENA_ALLOC_DEBUG fprintf( stdout, "ref count on _memblockimplbase=%p decremented to %ld\n", this, m_refCount ); #endif if( m_refCount == 0 ) { Derived::destroy( static_cast(this) ); } } }; // Each allocator points to an instance of _memblockimpl which // contains the list of _memblock objects and other tracking info // including a refcount. // This object is instantiated in space obtained from the allocator // implementation. The allocator implementation is the component // on which allocate/deallocate are called to obtain storage from. template< typename AllocatorImpl > struct _memblockimpl : public _memblockimplbase > { private: typedef struct _memblockimplbase< AllocatorImpl, _memblockimpl > base_t; friend struct _memblockimplbase< AllocatorImpl, _memblockimpl >; // to get around some sticky access issues between Alloc and Alloc when sharing // the implementation. template friend class Alloc; template< typename T > static void assign( const Alloc >& src, _memblockimpl *& dest ); static _memblockimpl * create( size_t defaultSize, AllocatorImpl& alloc ) { return new ( alloc.allocate( sizeof( _memblockimpl ) ) ) _memblockimpl( defaultSize, alloc ); } static void destroy( _memblockimpl * objToDestroy ) { AllocatorImpl allocImpl = objToDestroy->m_alloc; objToDestroy-> ~_memblockimpl(); allocImpl.deallocate( objToDestroy ); } _memblockimpl( std::size_t defaultSize, AllocatorImpl& allocImpl ): _memblockimplbase >( defaultSize, allocImpl ) { #ifdef ARENA_ALLOC_DEBUG fprintf( stdout, "_memblockimpl=%p constructed with default size=%ld\n", this, base_t::m_defaultSize ); #endif } ~_memblockimpl( ) { #ifdef ARENA_ALLOC_DEBUG fprintf( stdout, "~memblockimpl() called on _memblockimpl=%p\n", this ); #endif base_t::clear(); } }; } #endif