// Copyright (c) the JPEG XL Project Authors. All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include "lib/jxl/simd_util.h" #include #include #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "lib/jxl/simd_util.cc" #include #include #include "lib/jxl/base/common.h" #include "lib/jxl/base/status.h" #include "lib/jxl/cache_aligned.h" HWY_BEFORE_NAMESPACE(); namespace jxl { namespace HWY_NAMESPACE { size_t MaxVectorSize() { HWY_FULL(float) df; return Lanes(df) * sizeof(float); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace jxl HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace jxl { HWY_EXPORT(MaxVectorSize); size_t MaxVectorSize() { // Ideally HWY framework should provide us this value. // Less than ideal is to check all available targets and choose maximal. // As for now, we just ask current active target, assuming it won't change. return HWY_DYNAMIC_DISPATCH(MaxVectorSize)(); } size_t BytesPerRow(const size_t xsize, const size_t sizeof_t) { // Special case: we don't allow any ops -> don't need extra padding/ if (xsize == 0) { return 0; } const size_t vec_size = MaxVectorSize(); size_t valid_bytes = xsize * sizeof_t; // Allow unaligned accesses starting at the last valid value. // Skip for the scalar case because no extra lanes will be loaded. if (vec_size != 0) { valid_bytes += vec_size - sizeof_t; } // Round up to vector and cache line size. const size_t align = std::max(vec_size, CacheAligned::kAlignment); size_t bytes_per_row = RoundUpTo(valid_bytes, align); // During the lengthy window before writes are committed to memory, CPUs // guard against read after write hazards by checking the address, but // only the lower 11 bits. We avoid a false dependency between writes to // consecutive rows by ensuring their sizes are not multiples of 2 KiB. // Avoid2K prevents the same problem for the planes of an Image3. if (bytes_per_row % CacheAligned::kAlias == 0) { bytes_per_row += align; } JXL_ASSERT(bytes_per_row % align == 0); return bytes_per_row; } } // namespace jxl #endif