diff options
Diffstat (limited to 'third_party/highway/hwy/aligned_allocator.cc')
-rw-r--r-- | third_party/highway/hwy/aligned_allocator.cc | 152 |
1 files changed, 152 insertions, 0 deletions
diff --git a/third_party/highway/hwy/aligned_allocator.cc b/third_party/highway/hwy/aligned_allocator.cc new file mode 100644 index 0000000000..7b9947970e --- /dev/null +++ b/third_party/highway/hwy/aligned_allocator.cc @@ -0,0 +1,152 @@ +// Copyright 2019 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "hwy/aligned_allocator.h" + +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> // malloc + +#include <atomic> +#include <limits> + +#include "hwy/base.h" + +namespace hwy { +namespace { + +#if HWY_ARCH_RVV && defined(__riscv_vector) +// Not actually an upper bound on the size, but this value prevents crossing a +// 4K boundary (relevant on Andes). +constexpr size_t kAlignment = HWY_MAX(HWY_ALIGNMENT, 4096); +#else +constexpr size_t kAlignment = HWY_ALIGNMENT; +#endif + +#if HWY_ARCH_X86 +// On x86, aliasing can only occur at multiples of 2K, but that's too wasteful +// if this is used for single-vector allocations. 256 is more reasonable. +constexpr size_t kAlias = kAlignment * 4; +#else +constexpr size_t kAlias = kAlignment; +#endif + +#pragma pack(push, 1) +struct AllocationHeader { + void* allocated; + size_t payload_size; +}; +#pragma pack(pop) + +// Returns a 'random' (cyclical) offset for AllocateAlignedBytes. +size_t NextAlignedOffset() { + static std::atomic<uint32_t> next{0}; + constexpr uint32_t kGroups = kAlias / kAlignment; + const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups; + const size_t offset = kAlignment * group; + HWY_DASSERT((offset % kAlignment == 0) && offset <= kAlias); + return offset; +} + +} // namespace + +HWY_DLLEXPORT void* AllocateAlignedBytes(const size_t payload_size, + AllocPtr alloc_ptr, void* opaque_ptr) { + HWY_ASSERT(payload_size != 0); // likely a bug in caller + if (payload_size >= std::numeric_limits<size_t>::max() / 2) { + HWY_DASSERT(false && "payload_size too large"); + return nullptr; + } + + size_t offset = NextAlignedOffset(); + + // What: | misalign | unused | AllocationHeader |payload + // Size: |<= kAlias | offset |payload_size + // ^allocated.^aligned.^header............^payload + // The header must immediately precede payload, which must remain aligned. + // To avoid wasting space, the header resides at the end of `unused`, + // which therefore cannot be empty (offset == 0). + if (offset == 0) { + offset = kAlignment; // = RoundUpTo(sizeof(AllocationHeader), kAlignment) + static_assert(sizeof(AllocationHeader) <= kAlignment, "Else: round up"); + } + + const size_t allocated_size = kAlias + offset + payload_size; + void* allocated; + if (alloc_ptr == nullptr) { + allocated = malloc(allocated_size); + } else { + allocated = (*alloc_ptr)(opaque_ptr, allocated_size); + } + if (allocated == nullptr) return nullptr; + // Always round up even if already aligned - we already asked for kAlias + // extra bytes and there's no way to give them back. + uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated) + kAlias; + static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2"); + static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias"); + aligned &= ~(kAlias - 1); + + const uintptr_t payload = aligned + offset; // still aligned + + // Stash `allocated` and payload_size inside header for FreeAlignedBytes(). + // The allocated_size can be reconstructed from the payload_size. + AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1; + header->allocated = allocated; + header->payload_size = payload_size; + + return HWY_ASSUME_ALIGNED(reinterpret_cast<void*>(payload), kAlignment); +} + +HWY_DLLEXPORT void FreeAlignedBytes(const void* aligned_pointer, + FreePtr free_ptr, void* opaque_ptr) { + if (aligned_pointer == nullptr) return; + + const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer); + HWY_DASSERT(payload % kAlignment == 0); + const AllocationHeader* header = + reinterpret_cast<const AllocationHeader*>(payload) - 1; + + if (free_ptr == nullptr) { + free(header->allocated); + } else { + (*free_ptr)(opaque_ptr, header->allocated); + } +} + +// static +HWY_DLLEXPORT void AlignedDeleter::DeleteAlignedArray(void* aligned_pointer, + FreePtr free_ptr, + void* opaque_ptr, + ArrayDeleter deleter) { + if (aligned_pointer == nullptr) return; + + const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer); + HWY_DASSERT(payload % kAlignment == 0); + const AllocationHeader* header = + reinterpret_cast<const AllocationHeader*>(payload) - 1; + + if (deleter) { + (*deleter)(aligned_pointer, header->payload_size); + } + + if (free_ptr == nullptr) { + free(header->allocated); + } else { + (*free_ptr)(opaque_ptr, header->allocated); + } +} + +} // namespace hwy |