summaryrefslogtreecommitdiffstats
path: root/third_party/jpeg-xl/lib/jxl/simd_util.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/jpeg-xl/lib/jxl/simd_util.cc')
-rw-r--r--third_party/jpeg-xl/lib/jxl/simd_util.cc39
1 files changed, 39 insertions, 0 deletions
diff --git a/third_party/jpeg-xl/lib/jxl/simd_util.cc b/third_party/jpeg-xl/lib/jxl/simd_util.cc
index a3971ff900..6515daaa2b 100644
--- a/third_party/jpeg-xl/lib/jxl/simd_util.cc
+++ b/third_party/jpeg-xl/lib/jxl/simd_util.cc
@@ -5,11 +5,18 @@
#include "lib/jxl/simd_util.h"
+#include <algorithm>
+#include <cstddef>
+
#undef HWY_TARGET_INCLUDE
#define HWY_TARGET_INCLUDE "lib/jxl/simd_util.cc"
#include <hwy/foreach_target.h>
#include <hwy/highway.h>
+#include "lib/jxl/base/common.h"
+#include "lib/jxl/base/status.h"
+#include "lib/jxl/cache_aligned.h"
+
HWY_BEFORE_NAMESPACE();
namespace jxl {
namespace HWY_NAMESPACE {
@@ -36,5 +43,37 @@ size_t MaxVectorSize() {
return HWY_DYNAMIC_DISPATCH(MaxVectorSize)();
}
+size_t BytesPerRow(const size_t xsize, const size_t sizeof_t) {
+ // Special case: we don't allow any ops -> don't need extra padding/
+ if (xsize == 0) {
+ return 0;
+ }
+
+ const size_t vec_size = MaxVectorSize();
+ size_t valid_bytes = xsize * sizeof_t;
+
+ // Allow unaligned accesses starting at the last valid value.
+ // Skip for the scalar case because no extra lanes will be loaded.
+ if (vec_size != 0) {
+ valid_bytes += vec_size - sizeof_t;
+ }
+
+ // Round up to vector and cache line size.
+ const size_t align = std::max(vec_size, CacheAligned::kAlignment);
+ size_t bytes_per_row = RoundUpTo(valid_bytes, align);
+
+ // During the lengthy window before writes are committed to memory, CPUs
+ // guard against read after write hazards by checking the address, but
+ // only the lower 11 bits. We avoid a false dependency between writes to
+ // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
+ // Avoid2K prevents the same problem for the planes of an Image3.
+ if (bytes_per_row % CacheAligned::kAlias == 0) {
+ bytes_per_row += align;
+ }
+
+ JXL_ASSERT(bytes_per_row % align == 0);
+ return bytes_per_row;
+}
+
} // namespace jxl
#endif