summaryrefslogtreecommitdiffstats
path: root/third_party/xsimd/include/xsimd/memory/xsimd_aligned_allocator.hpp
blob: 3918d68a704c0b62da47efe2f390e9131abd9ad7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
/***************************************************************************
 * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
 * Martin Renou                                                             *
 * Copyright (c) QuantStack                                                 *
 * Copyright (c) Serge Guelton                                              *
 *                                                                          *
 * Distributed under the terms of the BSD 3-Clause License.                 *
 *                                                                          *
 * The full license is in the file LICENSE, distributed with this software. *
 ****************************************************************************/

#ifndef XSIMD_ALIGNED_ALLOCATOR_HPP
#define XSIMD_ALIGNED_ALLOCATOR_HPP

#include <algorithm>
#include <cstddef>
#include <utility>
#ifdef _WIN32
#include <malloc.h>
#else
#include <cstdlib>
#endif

#include <cassert>
#include <memory>

#include "../config/xsimd_arch.hpp"

namespace xsimd
{

    /**
     * @class aligned_allocator
     * @brief Allocator for aligned memory
     *
     * The aligned_allocator class template is an allocator that
     * performs memory allocation aligned by the specified value.
     *
     * @tparam T type of objects to allocate.
     * @tparam Align alignment in bytes.
     */
    template <class T, size_t Align>
    class aligned_allocator
    {
    public:
        using value_type = T;
        using pointer = T*;
        using const_pointer = const T*;
        using reference = T&;
        using const_reference = const T&;
        using size_type = size_t;
        using difference_type = ptrdiff_t;

        static constexpr size_t alignment = Align;

        template <class U>
        struct rebind
        {
            using other = aligned_allocator<U, Align>;
        };

        inline aligned_allocator() noexcept;
        inline aligned_allocator(const aligned_allocator& rhs) noexcept;

        template <class U>
        inline aligned_allocator(const aligned_allocator<U, Align>& rhs) noexcept;

        inline ~aligned_allocator();

        inline pointer address(reference) noexcept;
        inline const_pointer address(const_reference) const noexcept;

        inline pointer allocate(size_type n, const void* hint = 0);
        inline void deallocate(pointer p, size_type n);

        inline size_type max_size() const noexcept;
        inline size_type size_max() const noexcept;

        template <class U, class... Args>
        inline void construct(U* p, Args&&... args);

        template <class U>
        inline void destroy(U* p);
    };

    template <class T1, size_t Align1, class T2, size_t Align2>
    inline bool operator==(const aligned_allocator<T1, Align1>& lhs,
                           const aligned_allocator<T2, Align2>& rhs) noexcept;

    template <class T1, size_t Align1, class T2, size_t Align2>
    inline bool operator!=(const aligned_allocator<T1, Align1>& lhs,
                           const aligned_allocator<T2, Align2>& rhs) noexcept;

    inline void* aligned_malloc(size_t size, size_t alignment);
    inline void aligned_free(void* ptr);

    template <class T>
    inline size_t get_alignment_offset(const T* p, size_t size, size_t block_size);

    /************************************
     * aligned_allocator implementation *
     ************************************/

    /**
     * Default constructor.
     */
    template <class T, size_t A>
    inline aligned_allocator<T, A>::aligned_allocator() noexcept
    {
    }

    /**
     * Copy constructor.
     */
    template <class T, size_t A>
    inline aligned_allocator<T, A>::aligned_allocator(const aligned_allocator&) noexcept
    {
    }

    /**
     * Extended copy constructor.
     */
    template <class T, size_t A>
    template <class U>
    inline aligned_allocator<T, A>::aligned_allocator(const aligned_allocator<U, A>&) noexcept
    {
    }

    /**
     * Destructor.
     */
    template <class T, size_t A>
    inline aligned_allocator<T, A>::~aligned_allocator()
    {
    }

    /**
     * Returns the actual address of \c r even in presence of overloaded \c operator&.
     * @param r the object to acquire address of.
     * @return the actual address of \c r.
     */
    template <class T, size_t A>
    inline auto
    aligned_allocator<T, A>::address(reference r) noexcept -> pointer
    {
        return &r;
    }

    /**
     * Returns the actual address of \c r even in presence of overloaded \c operator&.
     * @param r the object to acquire address of.
     * @return the actual address of \c r.
     */
    template <class T, size_t A>
    inline auto
    aligned_allocator<T, A>::address(const_reference r) const noexcept -> const_pointer
    {
        return &r;
    }

    /**
     * Allocates <tt>n * sizeof(T)</tt> bytes of uninitialized memory, aligned by \c A.
     * The alignment may require some extra memory allocation.
     * @param n the number of objects to allocate storage for.
     * @param hint unused parameter provided for standard compliance.
     * @return a pointer to the first byte of a memory block suitably aligned and sufficient to
     * hold an array of \c n objects of type \c T.
     */
    template <class T, size_t A>
    inline auto
    aligned_allocator<T, A>::allocate(size_type n, const void*) -> pointer
    {
        pointer res = reinterpret_cast<pointer>(aligned_malloc(sizeof(T) * n, A));
#if defined(_CPPUNWIND) || defined(__cpp_exceptions)
        if (res == nullptr)
            throw std::bad_alloc();
#endif
        return res;
    }

    /**
     * Deallocates the storage referenced by the pointer p, which must be a pointer obtained by
     * an earlier call to allocate(). The argument \c n must be equal to the first argument of the call
     * to allocate() that originally produced \c p; otherwise, the behavior is undefined.
     * @param p pointer obtained from allocate().
     * @param n number of objects earlier passed to allocate().
     */
    template <class T, size_t A>
    inline void aligned_allocator<T, A>::deallocate(pointer p, size_type)
    {
        aligned_free(p);
    }

    /**
     * Returns the maximum theoretically possible value of \c n, for which the
     * call allocate(n, 0) could succeed.
     * @return the maximum supported allocated size.
     */
    template <class T, size_t A>
    inline auto
    aligned_allocator<T, A>::max_size() const noexcept -> size_type
    {
        return size_type(-1) / sizeof(T);
    }

    /**
     * This method is deprecated, use max_size() instead
     */
    template <class T, size_t A>
    inline auto
    aligned_allocator<T, A>::size_max() const noexcept -> size_type
    {
        return size_type(-1) / sizeof(T);
    }

    /**
     * Constructs an object of type \c T in allocated uninitialized memory
     * pointed to by \c p, using placement-new.
     * @param p pointer to allocated uninitialized memory.
     * @param args the constructor arguments to use.
     */
    template <class T, size_t A>
    template <class U, class... Args>
    inline void aligned_allocator<T, A>::construct(U* p, Args&&... args)
    {
        new ((void*)p) U(std::forward<Args>(args)...);
    }

    /**
     * Calls the destructor of the object pointed to by \c p.
     * @param p pointer to the object that is going to be destroyed.
     */
    template <class T, size_t A>
    template <class U>
    inline void aligned_allocator<T, A>::destroy(U* p)
    {
        p->~U();
    }

    /**
     * @defgroup allocator_comparison Comparison operators
     */

    /**
     * @ingroup allocator_comparison
     * Compares two aligned memory allocator for equality. Since allocators
     * are stateless, return \c true iff <tt>A1 == A2</tt>.
     * @param lhs aligned_allocator to compare.
     * @param rhs aligned_allocator to compare.
     * @return true if the allocators have the same alignment.
     */
    template <class T1, size_t A1, class T2, size_t A2>
    inline bool operator==(const aligned_allocator<T1, A1>& lhs,
                           const aligned_allocator<T2, A2>& rhs) noexcept
    {
        return lhs.alignment == rhs.alignment;
    }

    /**
     * @ingroup allocator_comparison
     * Compares two aligned memory allocator for inequality. Since allocators
     * are stateless, return \c true iff <tt>A1 != A2</tt>.
     * @param lhs aligned_allocator to compare.
     * @param rhs aligned_allocator to compare.
     * @return true if the allocators have different alignments.
     */
    template <class T1, size_t A1, class T2, size_t A2>
    inline bool operator!=(const aligned_allocator<T1, A1>& lhs,
                           const aligned_allocator<T2, A2>& rhs) noexcept
    {
        return !(lhs == rhs);
    }

    /****************************************
     * aligned malloc / free implementation *
     ****************************************/

    namespace detail
    {
        inline void* xaligned_malloc(size_t size, size_t alignment)
        {
            assert(((alignment & (alignment - 1)) == 0) && "alignment must be a power of two");
            assert((alignment >= sizeof(void*)) && "alignment must be at least the size of a pointer");
            void* res = nullptr;
#ifdef _WIN32
            res = _aligned_malloc(size, alignment);
#else
            if (posix_memalign(&res, alignment, size) != 0)
            {
                res = nullptr;
            }
#endif
            return res;
        }

        inline void xaligned_free(void* ptr)
        {
#ifdef _WIN32
            _aligned_free(ptr);
#else
            free(ptr);
#endif
        }
    }

    inline void* aligned_malloc(size_t size, size_t alignment)
    {
        return detail::xaligned_malloc(size, alignment);
    }

    inline void aligned_free(void* ptr)
    {
        detail::xaligned_free(ptr);
    }

    template <class T>
    inline size_t get_alignment_offset(const T* p, size_t size, size_t block_size)
    {
        // size_t block_size = simd_traits<T>::size;
        if (block_size == 1)
        {
            // The simd_block consists of exactly one scalar so that all
            // elements of the array
            // are "well" aligned.
            return 0;
        }
        else if (size_t(p) & (sizeof(T) - 1))
        {
            // The array is not aligned to the size of a single element, so that
            // no element
            // of the array is well aligned
            return size;
        }
        else
        {
            size_t block_mask = block_size - 1;
            return std::min<size_t>(
                (block_size - ((size_t(p) / sizeof(T)) & block_mask)) & block_mask,
                size);
        }
    }

    template <class T, class A = default_arch>
    using default_allocator = typename std::conditional<A::requires_alignment(),
                                                        aligned_allocator<T, A::alignment()>,
                                                        std::allocator<T>>::type;
}

#endif