// Copyright (c) the JPEG XL Project Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "lib/jxl/cache_aligned.h" #include #include #include // Disabled: slower than malloc + alignment. #define JXL_USE_MMAP 0 #if JXL_USE_MMAP #include #endif #include // std::max #include #include // kMaxVectorSize #include #include "lib/jxl/base/printf_macros.h" #include "lib/jxl/base/status.h" namespace jxl { namespace { #pragma pack(push, 1) struct AllocationHeader { void* allocated; size_t allocated_size; uint8_t left_padding[hwy::kMaxVectorSize]; }; #pragma pack(pop) std::atomic num_allocations{0}; std::atomic bytes_in_use{0}; std::atomic max_bytes_in_use{0}; } // namespace // Avoids linker errors in pre-C++17 builds. constexpr size_t CacheAligned::kPointerSize; constexpr size_t CacheAligned::kCacheLineSize; constexpr size_t CacheAligned::kAlignment; constexpr size_t CacheAligned::kAlias; void CacheAligned::PrintStats() { fprintf( stderr, "Allocations: %" PRIuS " (max bytes in use: %E)\n", static_cast(num_allocations.load(std::memory_order_relaxed)), static_cast(max_bytes_in_use.load(std::memory_order_relaxed))); } size_t CacheAligned::NextOffset() { static std::atomic next{0}; constexpr uint32_t kGroups = CacheAligned::kAlias / CacheAligned::kAlignment; const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups; return CacheAligned::kAlignment * group; } void* CacheAligned::Allocate(const size_t payload_size, size_t offset) { JXL_ASSERT(payload_size <= std::numeric_limits::max() / 2); JXL_ASSERT((offset % kAlignment == 0) && offset <= kAlias); // What: | misalign | unused | AllocationHeader |payload // Size: |<= kAlias | offset | |payload_size // ^allocated.^aligned.^header............^payload // The header must immediately precede payload, which must remain aligned. // To avoid wasting space, the header resides at the end of `unused`, // which therefore cannot be empty (offset == 0). if (offset == 0) { // SVE/RVV vectors can be large, so we cannot rely on them (including the // padding at the end of AllocationHeader) to fit in kAlignment. offset = hwy::RoundUpTo(sizeof(AllocationHeader), kAlignment); } #if JXL_USE_MMAP const size_t allocated_size = offset + payload_size; const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE; void* allocated = mmap(nullptr, allocated_size, PROT_READ | PROT_WRITE, flags, -1, 0); if (allocated == MAP_FAILED) return nullptr; const uintptr_t aligned = reinterpret_cast(allocated); #else const size_t allocated_size = kAlias + offset + payload_size; void* allocated = malloc(allocated_size); if (allocated == nullptr) return nullptr; // Always round up even if already aligned - we already asked for kAlias // extra bytes and there's no way to give them back. uintptr_t aligned = reinterpret_cast(allocated) + kAlias; static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2"); static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias"); aligned &= ~(kAlias - 1); #endif #if JXL_FALSE // No effect. uintptr_t page_aligned = reinterpret_cast(allocated); page_aligned &= ~(4096 - 1); if (madvise(reinterpret_cast(page_aligned), allocated_size, MADV_WILLNEED) != 0) { JXL_NOTIFY_ERROR("madvise failed"); } #elif 0 // INCREASES both first and subsequent decode times. if (mlock(allocated, allocated_size) != 0) { JXL_NOTIFY_ERROR("mlock failed"); } #endif // Update statistics (#allocations and max bytes in use) num_allocations.fetch_add(1, std::memory_order_relaxed); const uint64_t prev_bytes = bytes_in_use.fetch_add(allocated_size, std::memory_order_acq_rel); uint64_t expected_max = max_bytes_in_use.load(std::memory_order_acquire); for (;;) { const uint64_t desired = std::max(expected_max, prev_bytes + allocated_size); if (max_bytes_in_use.compare_exchange_strong(expected_max, desired, std::memory_order_acq_rel)) { break; } } const uintptr_t payload = aligned + offset; // still aligned // Stash `allocated` and payload_size inside header for use by Free(). AllocationHeader* header = reinterpret_cast(payload) - 1; // NOLINT header->allocated = allocated; header->allocated_size = allocated_size; return JXL_ASSUME_ALIGNED(reinterpret_cast(payload), 64); // NOLINT } void CacheAligned::Free(const void* aligned_pointer) { if (aligned_pointer == nullptr) { return; } const uintptr_t payload = reinterpret_cast(aligned_pointer); JXL_ASSERT(payload % kAlignment == 0); const AllocationHeader* header = reinterpret_cast(payload) - 1; // NOLINT // Subtract (2's complement negation). bytes_in_use.fetch_add(~header->allocated_size + 1, std::memory_order_acq_rel); #if JXL_USE_MMAP munmap(header->allocated, header->allocated_size); #else free(header->allocated); #endif } } // namespace jxl