1 files changed, 286 insertions, 0 deletions
diff --git a/src/third-party/ArenaAlloc/arenaallocimpl.h b/src/third-party/ArenaAlloc/arenaallocimpl.h
new file mode 100644
index 0000000..12484f0
--- /dev/null
+++ b/src/third-party/ArenaAlloc/arenaallocimpl.h
@@ -0,0 +1,286 @@
+// -*- c++ -*-
+/******************************************************************************
+ **  arenaallocimpl.h
+ **  
+ **  Internal implementation types of the arena allocator
+ **  MIT license
+ *****************************************************************************/
+
+#ifndef _ARENA_ALLOC_IMPL_H
+#define _ARENA_ALLOC_IMPL_H
+
+#ifdef ARENA_ALLOC_DEBUG
+#include <stdio.h>
+#endif
+
+namespace ArenaAlloc
+{
+
+  template< typename T, typename A, typename M >
+  class Alloc;
+  
+  // internal structure for tracking memory blocks
+  template < typename AllocImpl >
+  struct _memblock
+  {
+    // allocations are rounded up to a multiple of the size of this
+      // struct to maintain proper alignment for any pointer and double
+      // values stored in the allocation.
+      // A future goal is to support even stricter alignment for example
+      // to support cache alignment, special device  dependent mappings,
+      // or GPU ops.
+      union _roundsize {
+          double d;
+          void* p;
+      };
+
+      _memblock* m_next{nullptr};  // blocks kept link listed for cleanup at end
+      std::size_t m_bufferSize;  // size of the buffer
+      std::size_t m_index;  // index of next allocatable byte in the block
+      char* m_buffer;  // pointer to large block to allocate from
+
+      _memblock(std::size_t bufferSize, AllocImpl& allocImpl)
+          : m_bufferSize(roundSize(bufferSize)), m_index(0),
+            m_buffer(reinterpret_cast<char*>(allocImpl.allocate(
+                bufferSize)))  // this works b/c of order of decl
+      {
+      }
+
+      std::size_t roundSize( std::size_t numBytes )
+    {
+      // this is subject to overflow.  calling logic should not permit
+      // an attempt to allocate a really massive size.
+      // i.e. an attempt to allocate 10s of terabytes should be an error      
+      return ( ( numBytes + sizeof( _roundsize ) - 1 ) / 
+	       sizeof( _roundsize ) ) * sizeof( _roundsize );
+    }
+
+    char * allocate( std::size_t numBytes )
+    {
+      std::size_t roundedSize = roundSize( numBytes );
+      if( roundedSize + m_index > m_bufferSize )
+	return 0;
+
+      char * ptrToReturn = &m_buffer[ m_index ];
+      m_index += roundedSize;
+      return ptrToReturn;
+    }
+  
+    void dispose( AllocImpl& impl )
+    {
+      impl.deallocate( m_buffer );
+    }
+
+    ~_memblock()
+    {
+    }    
+  };
+  
+  template< typename AllocatorImpl, typename Derived >
+  struct _memblockimplbase
+  {
+    AllocatorImpl m_alloc;
+    std::size_t m_refCount; // when refs -> 0 delete this
+    std::size_t m_defaultSize;
+        
+    std::size_t m_numAllocate; // number of times allocate called
+    std::size_t m_numDeallocate; // number of time deallocate called
+    std::size_t m_numBytesAllocated; // A good estimate of amount of space used
+    
+    _memblock<AllocatorImpl> * m_head;
+    _memblock<AllocatorImpl> * m_current;
+
+    // round up 2 next power of 2 if not already
+    // a power of 2
+    std::size_t roundpow2( std::size_t value )
+    {
+      // note this works because subtracting 1 is equivalent to
+      // inverting the lowest set bit and complementing any
+      // bits lower than that.  only a power of 2
+      // will yield 0 in the following check
+      if( 0 == ( value & ( value - 1 ) ) )
+	return value; // already a power of 2
+      
+      // fold t over itself. This will set all bits after the highest set bit of t to 1
+      // who said bit twiddling wasn't practical?
+      value |= value >> 1;
+      value |= value >> 2;
+      value |= value >> 4;
+      value |= value >> 8;
+      value |= value >> 16;
+      value |= value >> 32;
+
+      return value + 1;            
+    }
+
+    _memblockimplbase( std::size_t defaultSize, AllocatorImpl& allocator ):
+      m_alloc( allocator ),
+      m_refCount( 1 ),
+      m_defaultSize( defaultSize ),
+      m_numAllocate( 0 ),
+      m_numDeallocate( 0 ),
+      m_numBytesAllocated( 0 ),
+      m_head( 0 ),
+      m_current( 0 )
+    {      
+      if( m_defaultSize < 256 )
+      {
+	m_defaultSize = 256; // anything less is academic. a more practical size is 4k or more
+      }
+      else if ( m_defaultSize > 1024UL*1024*1024*16 ) 
+      {
+	// when this becomes a problem, this package has succeeded beyond my wildest expectations
+	m_defaultSize = 1024UL*1024*1024*16;
+      }
+      
+      // for convenience block size should be a power of 2
+      // round up to next power of 2
+      m_defaultSize = roundpow2( m_defaultSize );
+      allocateNewBlock( m_defaultSize );      
+    }
+        
+    char * allocate( std::size_t numBytes )
+    {
+      char * ptrToReturn = m_current->allocate( numBytes );
+      if( !ptrToReturn )
+      {
+	allocateNewBlock( numBytes > m_defaultSize / 2 ? roundpow2( numBytes*2 ) : 
+			  m_defaultSize );
+	
+	ptrToReturn = m_current->allocate( numBytes );	
+      }
+      
+#ifdef ARENA_ALLOC_DEBUG
+      fprintf( stdout, "_memblockimpl=%p allocated %ld bytes at address=%p\n", this, numBytes, ptrToReturn );
+#endif
+
+      ++ m_numAllocate;
+      m_numBytesAllocated += numBytes; // does not account for the small overhead in tracking the allocation
+      
+      return ptrToReturn;
+    }
+    
+    void allocateNewBlock( std::size_t blockSize )
+    {      
+      _memblock<AllocatorImpl> * newBlock = new ( m_alloc.allocate( sizeof( _memblock<AllocatorImpl> ) ) )
+	_memblock<AllocatorImpl>( blockSize, m_alloc );
+						  
+#ifdef ARENA_ALLOC_DEBUG
+      fprintf( stdout, "_memblockimplbase=%p allocating a new block of size=%ld\n", this, blockSize );
+#endif      
+      
+      if( m_head == 0 )
+      {
+	m_head = m_current = newBlock;
+      }
+      else
+      {
+	m_current->m_next = newBlock;
+	m_current = newBlock;
+      }      
+    }    
+    
+    void deallocate( void * ptr )
+    {
+      ++ m_numDeallocate;
+    }
+    
+    size_t getNumAllocations() { return m_numAllocate; }
+    size_t getNumDeallocations() { return m_numDeallocate; }
+    size_t getNumBytesAllocated() { return m_numBytesAllocated; }
+  
+    void clear()
+    {
+      _memblock<AllocatorImpl> * block = m_head;
+      while( block )
+      {
+	_memblock<AllocatorImpl> * curr = block;
+	block = block->m_next;
+	curr->dispose( m_alloc );
+	curr->~_memblock<AllocatorImpl>();
+	m_alloc.deallocate( curr );
+      }      
+    }    
+
+    // The ref counting model does not permit the sharing of 
+    // this object across multiple threads unless an external locking mechanism is applied 
+    // to ensure the atomicity of the reference count.  
+    void incrementRefCount() 
+    { 
+      ++m_refCount; 
+#ifdef ARENA_ALLOC_DEBUG
+      fprintf( stdout, "ref count on _memblockimplbase=%p incremented to %ld\n", this, m_refCount );
+#endif      
+    }
+
+    void decrementRefCount()
+    {
+      --m_refCount;
+#ifdef ARENA_ALLOC_DEBUG
+      fprintf( stdout, "ref count on _memblockimplbase=%p decremented to %ld\n", this, m_refCount );
+#endif      
+      
+      if( m_refCount == 0 )
+      {
+	Derived::destroy( static_cast<Derived*>(this) );
+      }
+    }                      
+  };
+
+
+  // Each allocator points to an instance of _memblockimpl which
+  // contains the list of _memblock objects and other tracking info
+  // including a refcount.
+  // This object is instantiated in space obtained from the allocator
+  // implementation. The allocator implementation is the component
+  // on which allocate/deallocate are called to obtain storage from.
+  template< typename AllocatorImpl >
+  struct _memblockimpl : public _memblockimplbase<AllocatorImpl, _memblockimpl<AllocatorImpl> >
+  {     
+  private:
+
+    typedef struct _memblockimplbase< AllocatorImpl, _memblockimpl<AllocatorImpl> > base_t;
+    friend struct _memblockimplbase< AllocatorImpl, _memblockimpl<AllocatorImpl> >;
+    
+    // to get around some sticky access issues between Alloc<T1> and Alloc<T2> when sharing
+    // the implementation.
+    template <typename U, typename A, typename M >
+    friend class Alloc;
+    
+    template< typename T >
+    static void assign( const Alloc<T,AllocatorImpl, _memblockimpl<AllocatorImpl> >& src, 
+			  _memblockimpl *& dest );
+        
+    static _memblockimpl<AllocatorImpl> * create( size_t defaultSize, AllocatorImpl& alloc )
+    {
+      return new ( alloc.allocate( sizeof( _memblockimpl ) ) ) _memblockimpl<AllocatorImpl>( defaultSize, 
+											     alloc );
+    }
+   
+    static void destroy( _memblockimpl<AllocatorImpl> * objToDestroy )
+    {      
+      AllocatorImpl allocImpl = objToDestroy->m_alloc;
+      objToDestroy-> ~_memblockimpl<AllocatorImpl>();
+      allocImpl.deallocate( objToDestroy );      
+    }
+    
+    _memblockimpl( std::size_t defaultSize, AllocatorImpl& allocImpl ):
+      _memblockimplbase<AllocatorImpl, _memblockimpl<AllocatorImpl> >( defaultSize, allocImpl )
+    {
+#ifdef ARENA_ALLOC_DEBUG
+      fprintf( stdout, "_memblockimpl=%p constructed with default size=%ld\n", this, 
+	       base_t::m_defaultSize );
+#endif
+    }
+  
+    ~_memblockimpl( )
+    {
+#ifdef ARENA_ALLOC_DEBUG
+      fprintf( stdout, "~memblockimpl() called on _memblockimpl=%p\n", this );
+#endif      
+      base_t::clear();
+    }  
+  };
+}
+
+#endif