summaryrefslogtreecommitdiffstats
path: root/third_party/highway/hwy/aligned_allocator.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/highway/hwy/aligned_allocator.cc')
-rw-r--r--third_party/highway/hwy/aligned_allocator.cc152
1 files changed, 152 insertions, 0 deletions
diff --git a/third_party/highway/hwy/aligned_allocator.cc b/third_party/highway/hwy/aligned_allocator.cc
new file mode 100644
index 0000000000..7b9947970e
--- /dev/null
+++ b/third_party/highway/hwy/aligned_allocator.cc
@@ -0,0 +1,152 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/aligned_allocator.h"
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h> // malloc
+
+#include <atomic>
+#include <limits>
+
+#include "hwy/base.h"
+
+namespace hwy {
+namespace {
+
+#if HWY_ARCH_RVV && defined(__riscv_vector)
+// Not actually an upper bound on the size, but this value prevents crossing a
+// 4K boundary (relevant on Andes).
+constexpr size_t kAlignment = HWY_MAX(HWY_ALIGNMENT, 4096);
+#else
+constexpr size_t kAlignment = HWY_ALIGNMENT;
+#endif
+
+#if HWY_ARCH_X86
+// On x86, aliasing can only occur at multiples of 2K, but that's too wasteful
+// if this is used for single-vector allocations. 256 is more reasonable.
+constexpr size_t kAlias = kAlignment * 4;
+#else
+constexpr size_t kAlias = kAlignment;
+#endif
+
+#pragma pack(push, 1)
+struct AllocationHeader {
+ void* allocated;
+ size_t payload_size;
+};
+#pragma pack(pop)
+
+// Returns a 'random' (cyclical) offset for AllocateAlignedBytes.
+size_t NextAlignedOffset() {
+ static std::atomic<uint32_t> next{0};
+ constexpr uint32_t kGroups = kAlias / kAlignment;
+ const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups;
+ const size_t offset = kAlignment * group;
+ HWY_DASSERT((offset % kAlignment == 0) && offset <= kAlias);
+ return offset;
+}
+
+} // namespace
+
+HWY_DLLEXPORT void* AllocateAlignedBytes(const size_t payload_size,
+ AllocPtr alloc_ptr, void* opaque_ptr) {
+ HWY_ASSERT(payload_size != 0); // likely a bug in caller
+ if (payload_size >= std::numeric_limits<size_t>::max() / 2) {
+ HWY_DASSERT(false && "payload_size too large");
+ return nullptr;
+ }
+
+ size_t offset = NextAlignedOffset();
+
+ // What: | misalign | unused | AllocationHeader |payload
+ // Size: |<= kAlias | offset |payload_size
+ // ^allocated.^aligned.^header............^payload
+ // The header must immediately precede payload, which must remain aligned.
+ // To avoid wasting space, the header resides at the end of `unused`,
+ // which therefore cannot be empty (offset == 0).
+ if (offset == 0) {
+ offset = kAlignment; // = RoundUpTo(sizeof(AllocationHeader), kAlignment)
+ static_assert(sizeof(AllocationHeader) <= kAlignment, "Else: round up");
+ }
+
+ const size_t allocated_size = kAlias + offset + payload_size;
+ void* allocated;
+ if (alloc_ptr == nullptr) {
+ allocated = malloc(allocated_size);
+ } else {
+ allocated = (*alloc_ptr)(opaque_ptr, allocated_size);
+ }
+ if (allocated == nullptr) return nullptr;
+ // Always round up even if already aligned - we already asked for kAlias
+ // extra bytes and there's no way to give them back.
+ uintptr_t aligned = reinterpret_cast<uintptr_t>(allocated) + kAlias;
+ static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2");
+ static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias");
+ aligned &= ~(kAlias - 1);
+
+ const uintptr_t payload = aligned + offset; // still aligned
+
+ // Stash `allocated` and payload_size inside header for FreeAlignedBytes().
+ // The allocated_size can be reconstructed from the payload_size.
+ AllocationHeader* header = reinterpret_cast<AllocationHeader*>(payload) - 1;
+ header->allocated = allocated;
+ header->payload_size = payload_size;
+
+ return HWY_ASSUME_ALIGNED(reinterpret_cast<void*>(payload), kAlignment);
+}
+
+HWY_DLLEXPORT void FreeAlignedBytes(const void* aligned_pointer,
+ FreePtr free_ptr, void* opaque_ptr) {
+ if (aligned_pointer == nullptr) return;
+
+ const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
+ HWY_DASSERT(payload % kAlignment == 0);
+ const AllocationHeader* header =
+ reinterpret_cast<const AllocationHeader*>(payload) - 1;
+
+ if (free_ptr == nullptr) {
+ free(header->allocated);
+ } else {
+ (*free_ptr)(opaque_ptr, header->allocated);
+ }
+}
+
+// static
+HWY_DLLEXPORT void AlignedDeleter::DeleteAlignedArray(void* aligned_pointer,
+ FreePtr free_ptr,
+ void* opaque_ptr,
+ ArrayDeleter deleter) {
+ if (aligned_pointer == nullptr) return;
+
+ const uintptr_t payload = reinterpret_cast<uintptr_t>(aligned_pointer);
+ HWY_DASSERT(payload % kAlignment == 0);
+ const AllocationHeader* header =
+ reinterpret_cast<const AllocationHeader*>(payload) - 1;
+
+ if (deleter) {
+ (*deleter)(aligned_pointer, header->payload_size);
+ }
+
+ if (free_ptr == nullptr) {
+ free(header->allocated);
+ } else {
+ (*free_ptr)(opaque_ptr, header->allocated);
+ }
+}
+
+} // namespace hwy