From 40a355a42d4a9444dc753c04c6608dade2f06a23 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 03:13:27 +0200 Subject: Adding upstream version 125.0.1. Signed-off-by: Daniel Baumann --- third_party/jpeg-xl/lib/jxl/simd_util.cc | 39 ++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'third_party/jpeg-xl/lib/jxl/simd_util.cc') diff --git a/third_party/jpeg-xl/lib/jxl/simd_util.cc b/third_party/jpeg-xl/lib/jxl/simd_util.cc index a3971ff900..6515daaa2b 100644 --- a/third_party/jpeg-xl/lib/jxl/simd_util.cc +++ b/third_party/jpeg-xl/lib/jxl/simd_util.cc @@ -5,11 +5,18 @@ #include "lib/jxl/simd_util.h" +#include +#include + #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "lib/jxl/simd_util.cc" #include #include +#include "lib/jxl/base/common.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/cache_aligned.h" + HWY_BEFORE_NAMESPACE(); namespace jxl { namespace HWY_NAMESPACE { @@ -36,5 +43,37 @@ size_t MaxVectorSize() { return HWY_DYNAMIC_DISPATCH(MaxVectorSize)(); } +size_t BytesPerRow(const size_t xsize, const size_t sizeof_t) { + // Special case: we don't allow any ops -> don't need extra padding/ + if (xsize == 0) { + return 0; + } + + const size_t vec_size = MaxVectorSize(); + size_t valid_bytes = xsize * sizeof_t; + + // Allow unaligned accesses starting at the last valid value. + // Skip for the scalar case because no extra lanes will be loaded. + if (vec_size != 0) { + valid_bytes += vec_size - sizeof_t; + } + + // Round up to vector and cache line size. + const size_t align = std::max(vec_size, CacheAligned::kAlignment); + size_t bytes_per_row = RoundUpTo(valid_bytes, align); + + // During the lengthy window before writes are committed to memory, CPUs + // guard against read after write hazards by checking the address, but + // only the lower 11 bits. We avoid a false dependency between writes to + // consecutive rows by ensuring their sizes are not multiples of 2 KiB. + // Avoid2K prevents the same problem for the planes of an Image3. + if (bytes_per_row % CacheAligned::kAlias == 0) { + bytes_per_row += align; + } + + JXL_ASSERT(bytes_per_row % align == 0); + return bytes_per_row; +} + } // namespace jxl #endif -- cgit v1.2.3