summaryrefslogtreecommitdiffstats
path: root/third_party/highway/hwy/contrib/image
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/highway/hwy/contrib/image')
-rw-r--r--third_party/highway/hwy/contrib/image/image.cc145
-rw-r--r--third_party/highway/hwy/contrib/image/image.h470
-rw-r--r--third_party/highway/hwy/contrib/image/image_test.cc152
3 files changed, 767 insertions, 0 deletions
diff --git a/third_party/highway/hwy/contrib/image/image.cc b/third_party/highway/hwy/contrib/image/image.cc
new file mode 100644
index 0000000000..67b37d2711
--- /dev/null
+++ b/third_party/highway/hwy/contrib/image/image.cc
@@ -0,0 +1,145 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/image/image.h"
+
+#include <algorithm> // std::swap
+#include <cstddef>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/image/image.cc"
+#include "hwy/foreach_target.h" // IWYU pragma: keep
+#include "hwy/highway.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+size_t GetVectorSize() { return Lanes(ScalableTag<uint8_t>()); }
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+
+} // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+namespace hwy {
+namespace {
+HWY_EXPORT(GetVectorSize); // Local function.
+} // namespace
+
+size_t ImageBase::VectorSize() {
+ // Do not cache result - must return the current value, which may be greater
+ // than the first call if it was subject to DisableTargets!
+ return HWY_DYNAMIC_DISPATCH(GetVectorSize)();
+}
+
+size_t ImageBase::BytesPerRow(const size_t xsize, const size_t sizeof_t) {
+ const size_t vec_size = VectorSize();
+ size_t valid_bytes = xsize * sizeof_t;
+
+ // Allow unaligned accesses starting at the last valid value - this may raise
+ // msan errors unless the user calls InitializePaddingForUnalignedAccesses.
+ // Skip for the scalar case because no extra lanes will be loaded.
+ if (vec_size != 1) {
+ HWY_DASSERT(vec_size >= sizeof_t);
+ valid_bytes += vec_size - sizeof_t;
+ }
+
+ // Round up to vector and cache line size.
+ const size_t align = HWY_MAX(vec_size, HWY_ALIGNMENT);
+ size_t bytes_per_row = RoundUpTo(valid_bytes, align);
+
+ // During the lengthy window before writes are committed to memory, CPUs
+ // guard against read after write hazards by checking the address, but
+ // only the lower 11 bits. We avoid a false dependency between writes to
+ // consecutive rows by ensuring their sizes are not multiples of 2 KiB.
+ // Avoid2K prevents the same problem for the planes of an Image3.
+ if (bytes_per_row % HWY_ALIGNMENT == 0) {
+ bytes_per_row += align;
+ }
+
+ HWY_DASSERT(bytes_per_row % align == 0);
+ return bytes_per_row;
+}
+
+ImageBase::ImageBase(const size_t xsize, const size_t ysize,
+ const size_t sizeof_t)
+ : xsize_(static_cast<uint32_t>(xsize)),
+ ysize_(static_cast<uint32_t>(ysize)),
+ bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
+ HWY_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8);
+
+ bytes_per_row_ = 0;
+ // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate
+ // if nonzero, because "zero" bytes still have padding/bookkeeping overhead.
+ if (xsize != 0 && ysize != 0) {
+ bytes_per_row_ = BytesPerRow(xsize, sizeof_t);
+ bytes_ = AllocateAligned<uint8_t>(bytes_per_row_ * ysize);
+ HWY_ASSERT(bytes_.get() != nullptr);
+ InitializePadding(sizeof_t, Padding::kRoundUp);
+ }
+}
+
+ImageBase::ImageBase(const size_t xsize, const size_t ysize,
+ const size_t bytes_per_row, void* const aligned)
+ : xsize_(static_cast<uint32_t>(xsize)),
+ ysize_(static_cast<uint32_t>(ysize)),
+ bytes_per_row_(bytes_per_row),
+ bytes_(static_cast<uint8_t*>(aligned),
+ AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {
+ const size_t vec_size = VectorSize();
+ HWY_ASSERT(bytes_per_row % vec_size == 0);
+ HWY_ASSERT(reinterpret_cast<uintptr_t>(aligned) % vec_size == 0);
+}
+
+void ImageBase::InitializePadding(const size_t sizeof_t, Padding padding) {
+#if HWY_IS_MSAN || HWY_IDE
+ if (xsize_ == 0 || ysize_ == 0) return;
+
+ const size_t vec_size = VectorSize(); // Bytes, independent of sizeof_t!
+ if (vec_size == 1) return; // Scalar mode: no padding needed
+
+ const size_t valid_size = xsize_ * sizeof_t;
+ const size_t initialize_size = padding == Padding::kRoundUp
+ ? RoundUpTo(valid_size, vec_size)
+ : valid_size + vec_size - sizeof_t;
+ if (valid_size == initialize_size) return;
+
+ for (size_t y = 0; y < ysize_; ++y) {
+ uint8_t* HWY_RESTRICT row = static_cast<uint8_t*>(VoidRow(y));
+#if defined(__clang__) && (__clang_major__ <= 6)
+ // There's a bug in msan in clang-6 when handling AVX2 operations. This
+ // workaround allows tests to pass on msan, although it is slower and
+ // prevents msan warnings from uninitialized images.
+ memset(row, 0, initialize_size);
+#else
+ memset(row + valid_size, 0, initialize_size - valid_size);
+#endif // clang6
+ }
+#else
+ (void)sizeof_t;
+ (void)padding;
+#endif // HWY_IS_MSAN
+}
+
+void ImageBase::Swap(ImageBase& other) {
+ std::swap(xsize_, other.xsize_);
+ std::swap(ysize_, other.ysize_);
+ std::swap(bytes_per_row_, other.bytes_per_row_);
+ std::swap(bytes_, other.bytes_);
+}
+
+} // namespace hwy
+#endif // HWY_ONCE
diff --git a/third_party/highway/hwy/contrib/image/image.h b/third_party/highway/hwy/contrib/image/image.h
new file mode 100644
index 0000000000..c99863b06c
--- /dev/null
+++ b/third_party/highway/hwy/contrib/image/image.h
@@ -0,0 +1,470 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
+#define HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
+
+// SIMD/multicore-friendly planar image representation with row accessors.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <utility> // std::move
+
+#include "hwy/aligned_allocator.h"
+#include "hwy/base.h"
+#include "hwy/highway_export.h"
+
+namespace hwy {
+
+// Type-independent parts of Image<> - reduces code duplication and facilitates
+// moving member function implementations to cc file.
+struct HWY_CONTRIB_DLLEXPORT ImageBase {
+ // Returns required alignment in bytes for externally allocated memory.
+ static size_t VectorSize();
+
+ // Returns distance [bytes] between the start of two consecutive rows, a
+ // multiple of VectorSize but NOT kAlias (see implementation).
+ static size_t BytesPerRow(const size_t xsize, const size_t sizeof_t);
+
+ // No allocation (for output params or unused images)
+ ImageBase()
+ : xsize_(0),
+ ysize_(0),
+ bytes_per_row_(0),
+ bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {}
+
+ // Allocates memory (this is the common case)
+ ImageBase(size_t xsize, size_t ysize, size_t sizeof_t);
+
+ // References but does not take ownership of external memory. Useful for
+ // interoperability with other libraries. `aligned` must be aligned to a
+ // multiple of VectorSize() and `bytes_per_row` must also be a multiple of
+ // VectorSize() or preferably equal to BytesPerRow().
+ ImageBase(size_t xsize, size_t ysize, size_t bytes_per_row, void* aligned);
+
+ // Copy construction/assignment is forbidden to avoid inadvertent copies,
+ // which can be very expensive. Use CopyImageTo() instead.
+ ImageBase(const ImageBase& other) = delete;
+ ImageBase& operator=(const ImageBase& other) = delete;
+
+ // Move constructor (required for returning Image from function)
+ ImageBase(ImageBase&& other) noexcept = default;
+
+ // Move assignment (required for std::vector)
+ ImageBase& operator=(ImageBase&& other) noexcept = default;
+
+ void Swap(ImageBase& other);
+
+ // Useful for pre-allocating image with some padding for alignment purposes
+ // and later reporting the actual valid dimensions. Caller is responsible
+ // for ensuring xsize/ysize are <= the original dimensions.
+ void ShrinkTo(const size_t xsize, const size_t ysize) {
+ xsize_ = static_cast<uint32_t>(xsize);
+ ysize_ = static_cast<uint32_t>(ysize);
+ // NOTE: we can't recompute bytes_per_row for more compact storage and
+ // better locality because that would invalidate the image contents.
+ }
+
+ // How many pixels.
+ HWY_INLINE size_t xsize() const { return xsize_; }
+ HWY_INLINE size_t ysize() const { return ysize_; }
+
+ // NOTE: do not use this for copying rows - the valid xsize may be much less.
+ HWY_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
+
+ // Raw access to byte contents, for interfacing with other libraries.
+ // Unsigned char instead of char to avoid surprises (sign extension).
+ HWY_INLINE uint8_t* bytes() {
+ void* p = bytes_.get();
+ return static_cast<uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
+ }
+ HWY_INLINE const uint8_t* bytes() const {
+ const void* p = bytes_.get();
+ return static_cast<const uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
+ }
+
+ protected:
+ // Returns pointer to the start of a row.
+ HWY_INLINE void* VoidRow(const size_t y) const {
+#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
+ if (y >= ysize_) {
+ HWY_ABORT("Row(%d) >= %u\n", static_cast<int>(y), ysize_);
+ }
+#endif
+
+ void* row = bytes_.get() + y * bytes_per_row_;
+ return HWY_ASSUME_ALIGNED(row, 64);
+ }
+
+ enum class Padding {
+ // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default.
+ kRoundUp,
+ // Allow LoadU(d, row + x) for x <= xsize() - 1. This requires an extra
+ // vector to be initialized. If done by default, this would suppress
+ // legitimate msan warnings. We therefore require users to explicitly call
+ // InitializePadding before using unaligned loads (e.g. convolution).
+ kUnaligned
+ };
+
+ // Initializes the minimum bytes required to suppress msan warnings from
+ // legitimate (according to Padding mode) vector loads/stores on the right
+ // border, where some lanes are uninitialized and assumed to be unused.
+ void InitializePadding(size_t sizeof_t, Padding padding);
+
+ // (Members are non-const to enable assignment during move-assignment.)
+ uint32_t xsize_; // In valid pixels, not including any padding.
+ uint32_t ysize_;
+ size_t bytes_per_row_; // Includes padding.
+ AlignedFreeUniquePtr<uint8_t[]> bytes_;
+};
+
+// Single channel, aligned rows separated by padding. T must be POD.
+//
+// 'Single channel' (one 2D array per channel) simplifies vectorization
+// (repeating the same operation on multiple adjacent components) without the
+// complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients
+// can easily iterate over all components in a row and Image requires no
+// knowledge of the pixel format beyond the component type "T".
+//
+// 'Aligned' means each row is aligned to the L1 cache line size. This prevents
+// false sharing between two threads operating on adjacent rows.
+//
+// 'Padding' is still relevant because vectors could potentially be larger than
+// a cache line. By rounding up row sizes to the vector size, we allow
+// reading/writing ALIGNED vectors whose first lane is a valid sample. This
+// avoids needing a separate loop to handle remaining unaligned lanes.
+//
+// This image layout could also be achieved with a vector and a row accessor
+// function, but a class wrapper with support for "deleter" allows wrapping
+// existing memory allocated by clients without copying the pixels. It also
+// provides convenient accessors for xsize/ysize, which shortens function
+// argument lists. Supports move-construction so it can be stored in containers.
+template <typename ComponentType>
+class Image : public ImageBase {
+ public:
+ using T = ComponentType;
+
+ Image() = default;
+ Image(const size_t xsize, const size_t ysize)
+ : ImageBase(xsize, ysize, sizeof(T)) {}
+ Image(const size_t xsize, const size_t ysize, size_t bytes_per_row,
+ void* aligned)
+ : ImageBase(xsize, ysize, bytes_per_row, aligned) {}
+
+ void InitializePaddingForUnalignedAccesses() {
+ InitializePadding(sizeof(T), Padding::kUnaligned);
+ }
+
+ HWY_INLINE const T* ConstRow(const size_t y) const {
+ return static_cast<const T*>(VoidRow(y));
+ }
+ HWY_INLINE const T* ConstRow(const size_t y) {
+ return static_cast<const T*>(VoidRow(y));
+ }
+
+ // Returns pointer to non-const. This allows passing const Image* parameters
+ // when the callee is only supposed to fill the pixels, as opposed to
+ // allocating or resizing the image.
+ HWY_INLINE T* MutableRow(const size_t y) const {
+ return static_cast<T*>(VoidRow(y));
+ }
+ HWY_INLINE T* MutableRow(const size_t y) {
+ return static_cast<T*>(VoidRow(y));
+ }
+
+ // Returns number of pixels (some of which are padding) per row. Useful for
+ // computing other rows via pointer arithmetic. WARNING: this must
+ // NOT be used to determine xsize.
+ HWY_INLINE intptr_t PixelsPerRow() const {
+ return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
+ }
+};
+
+using ImageF = Image<float>;
+
+// A bundle of 3 same-sized images. To fill an existing Image3 using
+// single-channel producers, we also need access to each const Image*. Const
+// prevents breaking the same-size invariant, while still allowing pixels to be
+// changed via MutableRow.
+template <typename ComponentType>
+class Image3 {
+ public:
+ using T = ComponentType;
+ using ImageT = Image<T>;
+ static constexpr size_t kNumPlanes = 3;
+
+ Image3() : planes_{ImageT(), ImageT(), ImageT()} {}
+
+ Image3(const size_t xsize, const size_t ysize)
+ : planes_{ImageT(xsize, ysize), ImageT(xsize, ysize),
+ ImageT(xsize, ysize)} {}
+
+ Image3(Image3&& other) noexcept {
+ for (size_t i = 0; i < kNumPlanes; i++) {
+ planes_[i] = std::move(other.planes_[i]);
+ }
+ }
+
+ Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) {
+ if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) {
+ HWY_ABORT(
+ "Not same size: %d x %d, %d x %d, %d x %d\n",
+ static_cast<int>(plane0.xsize()), static_cast<int>(plane0.ysize()),
+ static_cast<int>(plane1.xsize()), static_cast<int>(plane1.ysize()),
+ static_cast<int>(plane2.xsize()), static_cast<int>(plane2.ysize()));
+ }
+ planes_[0] = std::move(plane0);
+ planes_[1] = std::move(plane1);
+ planes_[2] = std::move(plane2);
+ }
+
+ // Copy construction/assignment is forbidden to avoid inadvertent copies,
+ // which can be very expensive. Use CopyImageTo instead.
+ Image3(const Image3& other) = delete;
+ Image3& operator=(const Image3& other) = delete;
+
+ Image3& operator=(Image3&& other) noexcept {
+ for (size_t i = 0; i < kNumPlanes; i++) {
+ planes_[i] = std::move(other.planes_[i]);
+ }
+ return *this;
+ }
+
+ HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const {
+ return static_cast<const T*>(VoidPlaneRow(c, y));
+ }
+ HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) {
+ return static_cast<const T*>(VoidPlaneRow(c, y));
+ }
+
+ HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) const {
+ return static_cast<T*>(VoidPlaneRow(c, y));
+ }
+ HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) {
+ return static_cast<T*>(VoidPlaneRow(c, y));
+ }
+
+ HWY_INLINE const ImageT& Plane(size_t idx) const { return planes_[idx]; }
+
+ void Swap(Image3& other) {
+ for (size_t c = 0; c < 3; ++c) {
+ other.planes_[c].Swap(planes_[c]);
+ }
+ }
+
+ void ShrinkTo(const size_t xsize, const size_t ysize) {
+ for (ImageT& plane : planes_) {
+ plane.ShrinkTo(xsize, ysize);
+ }
+ }
+
+ // Sizes of all three images are guaranteed to be equal.
+ HWY_INLINE size_t xsize() const { return planes_[0].xsize(); }
+ HWY_INLINE size_t ysize() const { return planes_[0].ysize(); }
+ // Returns offset [bytes] from one row to the next row of the same plane.
+ // WARNING: this must NOT be used to determine xsize, nor for copying rows -
+ // the valid xsize may be much less.
+ HWY_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
+ // Returns number of pixels (some of which are padding) per row. Useful for
+ // computing other rows via pointer arithmetic. WARNING: this must NOT be used
+ // to determine xsize.
+ HWY_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
+
+ private:
+ // Returns pointer to the start of a row.
+ HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const {
+#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
+ if (c >= kNumPlanes || y >= ysize()) {
+ HWY_ABORT("PlaneRow(%d, %d) >= %d\n", static_cast<int>(c),
+ static_cast<int>(y), static_cast<int>(ysize()));
+ }
+#endif
+ // Use the first plane's stride because the compiler might not realize they
+ // are all equal. Thus we only need a single multiplication for all planes.
+ const size_t row_offset = y * planes_[0].bytes_per_row();
+ const void* row = planes_[c].bytes() + row_offset;
+ return static_cast<const T * HWY_RESTRICT>(
+ HWY_ASSUME_ALIGNED(row, HWY_ALIGNMENT));
+ }
+
+ private:
+ ImageT planes_[kNumPlanes];
+};
+
+using Image3F = Image3<float>;
+
+// Rectangular region in image(s). Factoring this out of Image instead of
+// shifting the pointer by x0/y0 allows this to apply to multiple images with
+// different resolutions. Can compare size via SameSize(rect1, rect2).
+class Rect {
+ public:
+ // Most windows are xsize_max * ysize_max, except those on the borders where
+ // begin + size_max > end.
+ constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max,
+ size_t ysize_max, size_t xend, size_t yend)
+ : x0_(xbegin),
+ y0_(ybegin),
+ xsize_(ClampedSize(xbegin, xsize_max, xend)),
+ ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
+
+ // Construct with origin and known size (typically from another Rect).
+ constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize)
+ : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
+
+ // Construct a rect that covers a whole image.
+ template <typename Image>
+ explicit Rect(const Image& image)
+ : Rect(0, 0, image.xsize(), image.ysize()) {}
+
+ Rect() : Rect(0, 0, 0, 0) {}
+
+ Rect(const Rect&) = default;
+ Rect& operator=(const Rect&) = default;
+
+ Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max,
+ size_t ysize_max) {
+ return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_,
+ y0_ + ysize_);
+ }
+
+ template <typename T>
+ const T* ConstRow(const Image<T>* image, size_t y) const {
+ return image->ConstRow(y + y0_) + x0_;
+ }
+
+ template <typename T>
+ T* MutableRow(const Image<T>* image, size_t y) const {
+ return image->MutableRow(y + y0_) + x0_;
+ }
+
+ template <typename T>
+ const T* ConstPlaneRow(const Image3<T>& image, size_t c, size_t y) const {
+ return image.ConstPlaneRow(c, y + y0_) + x0_;
+ }
+
+ template <typename T>
+ T* MutablePlaneRow(Image3<T>* image, const size_t c, size_t y) const {
+ return image->MutablePlaneRow(c, y + y0_) + x0_;
+ }
+
+ // Returns true if this Rect fully resides in the given image. ImageT could be
+ // Image<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
+ template <class ImageT>
+ bool IsInside(const ImageT& image) const {
+ return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize());
+ }
+
+ size_t x0() const { return x0_; }
+ size_t y0() const { return y0_; }
+ size_t xsize() const { return xsize_; }
+ size_t ysize() const { return ysize_; }
+
+ private:
+ // Returns size_max, or whatever is left in [begin, end).
+ static constexpr size_t ClampedSize(size_t begin, size_t size_max,
+ size_t end) {
+ return (begin + size_max <= end) ? size_max
+ : (end > begin ? end - begin : 0);
+ }
+
+ size_t x0_;
+ size_t y0_;
+
+ size_t xsize_;
+ size_t ysize_;
+};
+
+// Works for any image-like input type(s).
+template <class Image1, class Image2>
+HWY_MAYBE_UNUSED bool SameSize(const Image1& image1, const Image2& image2) {
+ return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
+}
+
+// Mirrors out of bounds coordinates and returns valid coordinates unchanged.
+// We assume the radius (distance outside the image) is small compared to the
+// image size, otherwise this might not terminate.
+// The mirror is outside the last column (border pixel is also replicated).
+static HWY_INLINE HWY_MAYBE_UNUSED size_t Mirror(int64_t x,
+ const int64_t xsize) {
+ HWY_DASSERT(xsize != 0);
+
+ // TODO(janwas): replace with branchless version
+ while (x < 0 || x >= xsize) {
+ if (x < 0) {
+ x = -x - 1;
+ } else {
+ x = 2 * xsize - 1 - x;
+ }
+ }
+ return static_cast<size_t>(x);
+}
+
+// Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
+
+// Mirrors (repeating the edge pixel once). Useful for convolutions.
+struct WrapMirror {
+ HWY_INLINE size_t operator()(const int64_t coord, const size_t size) const {
+ return Mirror(coord, static_cast<int64_t>(size));
+ }
+};
+
+// Returns the same coordinate, for when we know "coord" is already valid (e.g.
+// interior of an image).
+struct WrapUnchanged {
+ HWY_INLINE size_t operator()(const int64_t coord, size_t /*size*/) const {
+ return static_cast<size_t>(coord);
+ }
+};
+
+// Similar to Wrap* but for row pointers (reduces Row() multiplications).
+
+class WrapRowMirror {
+ public:
+ template <class View>
+ WrapRowMirror(const View& image, size_t ysize)
+ : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
+
+ const float* operator()(const float* const HWY_RESTRICT row,
+ const int64_t stride) const {
+ if (row < first_row_) {
+ const int64_t num_before = first_row_ - row;
+ // Mirrored; one row before => row 0, two before = row 1, ...
+ return first_row_ + num_before - stride;
+ }
+ if (row > last_row_) {
+ const int64_t num_after = row - last_row_;
+ // Mirrored; one row after => last row, two after = last - 1, ...
+ return last_row_ - num_after + stride;
+ }
+ return row;
+ }
+
+ private:
+ const float* const HWY_RESTRICT first_row_;
+ const float* const HWY_RESTRICT last_row_;
+};
+
+struct WrapRowUnchanged {
+ HWY_INLINE const float* operator()(const float* const HWY_RESTRICT row,
+ int64_t /*stride*/) const {
+ return row;
+ }
+};
+
+} // namespace hwy
+
+#endif // HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
diff --git a/third_party/highway/hwy/contrib/image/image_test.cc b/third_party/highway/hwy/contrib/image/image_test.cc
new file mode 100644
index 0000000000..6886577a46
--- /dev/null
+++ b/third_party/highway/hwy/contrib/image/image_test.cc
@@ -0,0 +1,152 @@
+// Copyright (c) the JPEG XL Project
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hwy/contrib/image/image.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <random>
+#include <utility>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "hwy/contrib/image/image_test.cc"
+#include "hwy/foreach_target.h" // IWYU pragma: keep
+
+// After foreach_target:
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+// Ensure we can always write full aligned vectors.
+struct TestAlignedT {
+ template <typename T>
+ void operator()(T /*unused*/) const {
+ std::mt19937 rng(129);
+ std::uniform_int_distribution<int> dist(0, 16);
+ const ScalableTag<T> d;
+
+ for (size_t ysize = 1; ysize < 4; ++ysize) {
+ for (size_t xsize = 1; xsize < 64; ++xsize) {
+ Image<T> img(xsize, ysize);
+
+ for (size_t y = 0; y < ysize; ++y) {
+ T* HWY_RESTRICT row = img.MutableRow(y);
+ for (size_t x = 0; x < xsize; x += Lanes(d)) {
+ const auto values = Iota(d, static_cast<T>(dist(rng)));
+ Store(values, d, row + x);
+ }
+ }
+
+ // Sanity check to prevent optimizing out the writes
+ const auto x = std::uniform_int_distribution<size_t>(0, xsize - 1)(rng);
+ const auto y = std::uniform_int_distribution<size_t>(0, ysize - 1)(rng);
+ HWY_ASSERT(img.ConstRow(y)[x] < 16 + Lanes(d));
+ }
+ }
+ }
+};
+
+void TestAligned() { ForUnsignedTypes(TestAlignedT()); }
+
+// Ensure we can write an unaligned vector starting at the last valid value.
+struct TestUnalignedT {
+ template <typename T>
+ void operator()(T /*unused*/) const {
+ std::mt19937 rng(129);
+ std::uniform_int_distribution<int> dist(0, 3);
+ const ScalableTag<T> d;
+
+ for (size_t ysize = 1; ysize < 4; ++ysize) {
+ for (size_t xsize = 1; xsize < 128; ++xsize) {
+ Image<T> img(xsize, ysize);
+ img.InitializePaddingForUnalignedAccesses();
+
+// This test reads padding, which only works if it was initialized,
+// which only happens in MSAN builds.
+#if HWY_IS_MSAN || HWY_IDE
+ // Initialize only the valid samples
+ for (size_t y = 0; y < ysize; ++y) {
+ T* HWY_RESTRICT row = img.MutableRow(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row[x] = static_cast<T>(1u << dist(rng));
+ }
+ }
+
+ // Read padding bits
+ auto accum = Zero(d);
+ for (size_t y = 0; y < ysize; ++y) {
+ T* HWY_RESTRICT row = img.MutableRow(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ accum = Or(accum, LoadU(d, row + x));
+ }
+ }
+
+ // Ensure padding was zero
+ const size_t N = Lanes(d);
+ auto lanes = AllocateAligned<T>(N);
+ Store(accum, d, lanes.get());
+ for (size_t i = 0; i < N; ++i) {
+ HWY_ASSERT(lanes[i] < 16);
+ }
+#else // Check that writing padding does not overwrite valid samples
+ // Initialize only the valid samples
+ for (size_t y = 0; y < ysize; ++y) {
+ T* HWY_RESTRICT row = img.MutableRow(y);
+ for (size_t x = 0; x < xsize; ++x) {
+ row[x] = static_cast<T>(x);
+ }
+ }
+
+ // Zero padding and rightmost sample
+ for (size_t y = 0; y < ysize; ++y) {
+ T* HWY_RESTRICT row = img.MutableRow(y);
+ StoreU(Zero(d), d, row + xsize - 1);
+ }
+
+ // Ensure no samples except the rightmost were overwritten
+ for (size_t y = 0; y < ysize; ++y) {
+ T* HWY_RESTRICT row = img.MutableRow(y);
+ for (size_t x = 0; x < xsize - 1; ++x) {
+ HWY_ASSERT_EQ(static_cast<T>(x), row[x]);
+ }
+ }
+#endif
+ }
+ }
+ }
+};
+
+void TestUnaligned() { ForUnsignedTypes(TestUnalignedT()); }
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(ImageTest);
+HWY_EXPORT_AND_TEST_P(ImageTest, TestAligned);
+HWY_EXPORT_AND_TEST_P(ImageTest, TestUnaligned);
+} // namespace hwy
+
+#endif