src/third-party/ArenaAlloc/arenaallocimpl.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286

// -*- c++ -*-
/******************************************************************************
 **  arenaallocimpl.h
 **  
 **  Internal implementation types of the arena allocator
 **  MIT license
 *****************************************************************************/

#ifndef _ARENA_ALLOC_IMPL_H
#define _ARENA_ALLOC_IMPL_H

#ifdef ARENA_ALLOC_DEBUG
#include <stdio.h>
#endif

namespace ArenaAlloc
{

  template< typename T, typename A, typename M >
  class Alloc;
  
  // internal structure for tracking memory blocks
  template < typename AllocImpl >
  struct _memblock
  {
    // allocations are rounded up to a multiple of the size of this
      // struct to maintain proper alignment for any pointer and double
      // values stored in the allocation.
      // A future goal is to support even stricter alignment for example
      // to support cache alignment, special device  dependent mappings,
      // or GPU ops.
      union _roundsize {
          double d;
          void* p;
      };

      _memblock* m_next{nullptr};  // blocks kept link listed for cleanup at end
      std::size_t m_bufferSize;  // size of the buffer
      std::size_t m_index;  // index of next allocatable byte in the block
      char* m_buffer;  // pointer to large block to allocate from

      _memblock(std::size_t bufferSize, AllocImpl& allocImpl)
          : m_bufferSize(roundSize(bufferSize)), m_index(0),
            m_buffer(reinterpret_cast<char*>(allocImpl.allocate(
                bufferSize)))  // this works b/c of order of decl
      {
      }

      std::size_t roundSize( std::size_t numBytes )
    {
      // this is subject to overflow.  calling logic should not permit
      // an attempt to allocate a really massive size.
      // i.e. an attempt to allocate 10s of terabytes should be an error      
      return ( ( numBytes + sizeof( _roundsize ) - 1 ) / 
	       sizeof( _roundsize ) ) * sizeof( _roundsize );
    }

    char * allocate( std::size_t numBytes )
    {
      std::size_t roundedSize = roundSize( numBytes );
      if( roundedSize + m_index > m_bufferSize )
	return 0;

      char * ptrToReturn = &m_buffer[ m_index ];
      m_index += roundedSize;
      return ptrToReturn;
    }
  
    void dispose( AllocImpl& impl )
    {
      impl.deallocate( m_buffer );
    }

    ~_memblock()
    {
    }    
  };
  
  template< typename AllocatorImpl, typename Derived >
  struct _memblockimplbase
  {
    AllocatorImpl m_alloc;
    std::size_t m_refCount; // when refs -> 0 delete this
    std::size_t m_defaultSize;
        
    std::size_t m_numAllocate; // number of times allocate called
    std::size_t m_numDeallocate; // number of time deallocate called
    std::size_t m_numBytesAllocated; // A good estimate of amount of space used
    
    _memblock<AllocatorImpl> * m_head;
    _memblock<AllocatorImpl> * m_current;

    // round up 2 next power of 2 if not already
    // a power of 2
    std::size_t roundpow2( std::size_t value )
    {
      // note this works because subtracting 1 is equivalent to
      // inverting the lowest set bit and complementing any
      // bits lower than that.  only a power of 2
      // will yield 0 in the following check
      if( 0 == ( value & ( value - 1 ) ) )
	return value; // already a power of 2
      
      // fold t over itself. This will set all bits after the highest set bit of t to 1
      // who said bit twiddling wasn't practical?
      value |= value >> 1;
      value |= value >> 2;
      value |= value >> 4;
      value |= value >> 8;
      value |= value >> 16;
      value |= value >> 32;

      return value + 1;            
    }

    _memblockimplbase( std::size_t defaultSize, AllocatorImpl& allocator ):
      m_alloc( allocator ),
      m_refCount( 1 ),
      m_defaultSize( defaultSize ),
      m_numAllocate( 0 ),
      m_numDeallocate( 0 ),
      m_numBytesAllocated( 0 ),
      m_head( 0 ),
      m_current( 0 )
    {      
      if( m_defaultSize < 256 )
      {
	m_defaultSize = 256; // anything less is academic. a more practical size is 4k or more
      }
      else if ( m_defaultSize > 1024UL*1024*1024*16 ) 
      {
	// when this becomes a problem, this package has succeeded beyond my wildest expectations
	m_defaultSize = 1024UL*1024*1024*16;
      }
      
      // for convenience block size should be a power of 2
      // round up to next power of 2
      m_defaultSize = roundpow2( m_defaultSize );
      allocateNewBlock( m_defaultSize );      
    }
        
    char * allocate( std::size_t numBytes )
    {
      char * ptrToReturn = m_current->allocate( numBytes );
      if( !ptrToReturn )
      {
	allocateNewBlock( numBytes > m_defaultSize / 2 ? roundpow2( numBytes*2 ) : 
			  m_defaultSize );
	
	ptrToReturn = m_current->allocate( numBytes );	
      }
      
#ifdef ARENA_ALLOC_DEBUG
      fprintf( stdout, "_memblockimpl=%p allocated %ld bytes at address=%p\n", this, numBytes, ptrToReturn );
#endif

      ++ m_numAllocate;
      m_numBytesAllocated += numBytes; // does not account for the small overhead in tracking the allocation
      
      return ptrToReturn;
    }
    
    void allocateNewBlock( std::size_t blockSize )
    {      
      _memblock<AllocatorImpl> * newBlock = new ( m_alloc.allocate( sizeof( _memblock<AllocatorImpl> ) ) )
	_memblock<AllocatorImpl>( blockSize, m_alloc );
						  
#ifdef ARENA_ALLOC_DEBUG
      fprintf( stdout, "_memblockimplbase=%p allocating a new block of size=%ld\n", this, blockSize );
#endif      
      
      if( m_head == 0 )
      {
	m_head = m_current = newBlock;
      }
      else
      {
	m_current->m_next = newBlock;
	m_current = newBlock;
      }      
    }    
    
    void deallocate( void * ptr )
    {
      ++ m_numDeallocate;
    }
    
    size_t getNumAllocations() { return m_numAllocate; }
    size_t getNumDeallocations() { return m_numDeallocate; }
    size_t getNumBytesAllocated() { return m_numBytesAllocated; }
  
    void clear()
    {
      _memblock<AllocatorImpl> * block = m_head;
      while( block )
      {
	_memblock<AllocatorImpl> * curr = block;
	block = block->m_next;
	curr->dispose( m_alloc );
	curr->~_memblock<AllocatorImpl>();
	m_alloc.deallocate( curr );
      }      
    }    

    // The ref counting model does not permit the sharing of 
    // this object across multiple threads unless an external locking mechanism is applied 
    // to ensure the atomicity of the reference count.  
    void incrementRefCount() 
    { 
      ++m_refCount; 
#ifdef ARENA_ALLOC_DEBUG
      fprintf( stdout, "ref count on _memblockimplbase=%p incremented to %ld\n", this, m_refCount );
#endif      
    }

    void decrementRefCount()
    {
      --m_refCount;
#ifdef ARENA_ALLOC_DEBUG
      fprintf( stdout, "ref count on _memblockimplbase=%p decremented to %ld\n", this, m_refCount );
#endif      
      
      if( m_refCount == 0 )
      {
	Derived::destroy( static_cast<Derived*>(this) );
      }
    }                      
  };


  // Each allocator points to an instance of _memblockimpl which
  // contains the list of _memblock objects and other tracking info
  // including a refcount.
  // This object is instantiated in space obtained from the allocator
  // implementation. The allocator implementation is the component
  // on which allocate/deallocate are called to obtain storage from.
  template< typename AllocatorImpl >
  struct _memblockimpl : public _memblockimplbase<AllocatorImpl, _memblockimpl<AllocatorImpl> >
  {     
  private:

    typedef struct _memblockimplbase< AllocatorImpl, _memblockimpl<AllocatorImpl> > base_t;
    friend struct _memblockimplbase< AllocatorImpl, _memblockimpl<AllocatorImpl> >;
    
    // to get around some sticky access issues between Alloc<T1> and Alloc<T2> when sharing
    // the implementation.
    template <typename U, typename A, typename M >
    friend class Alloc;
    
    template< typename T >
    static void assign( const Alloc<T,AllocatorImpl, _memblockimpl<AllocatorImpl> >& src, 
			  _memblockimpl *& dest );
        
    static _memblockimpl<AllocatorImpl> * create( size_t defaultSize, AllocatorImpl& alloc )
    {
      return new ( alloc.allocate( sizeof( _memblockimpl ) ) ) _memblockimpl<AllocatorImpl>( defaultSize, 
											     alloc );
    }
   
    static void destroy( _memblockimpl<AllocatorImpl> * objToDestroy )
    {      
      AllocatorImpl allocImpl = objToDestroy->m_alloc;
      objToDestroy-> ~_memblockimpl<AllocatorImpl>();
      allocImpl.deallocate( objToDestroy );      
    }
    
    _memblockimpl( std::size_t defaultSize, AllocatorImpl& allocImpl ):
      _memblockimplbase<AllocatorImpl, _memblockimpl<AllocatorImpl> >( defaultSize, allocImpl )
    {
#ifdef ARENA_ALLOC_DEBUG
      fprintf( stdout, "_memblockimpl=%p constructed with default size=%ld\n", this, 
	       base_t::m_defaultSize );
#endif
    }
  
    ~_memblockimpl( )
    {
#ifdef ARENA_ALLOC_DEBUG
      fprintf( stdout, "~memblockimpl() called on _memblockimpl=%p\n", this );
#endif      
      base_t::clear();
    }  
  };
}

#endif