From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- third_party/highway/hwy/contrib/image/image.cc | 145 +++++++ third_party/highway/hwy/contrib/image/image.h | 470 +++++++++++++++++++++ .../highway/hwy/contrib/image/image_test.cc | 152 +++++++ 3 files changed, 767 insertions(+) create mode 100644 third_party/highway/hwy/contrib/image/image.cc create mode 100644 third_party/highway/hwy/contrib/image/image.h create mode 100644 third_party/highway/hwy/contrib/image/image_test.cc (limited to 'third_party/highway/hwy/contrib/image') diff --git a/third_party/highway/hwy/contrib/image/image.cc b/third_party/highway/hwy/contrib/image/image.cc new file mode 100644 index 0000000000..67b37d2711 --- /dev/null +++ b/third_party/highway/hwy/contrib/image/image.cc @@ -0,0 +1,145 @@ +// Copyright 2020 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "hwy/contrib/image/image.h" + +#include // std::swap +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "hwy/contrib/image/image.cc" +#include "hwy/foreach_target.h" // IWYU pragma: keep +#include "hwy/highway.h" + +HWY_BEFORE_NAMESPACE(); +namespace hwy { +namespace HWY_NAMESPACE { +size_t GetVectorSize() { return Lanes(ScalableTag()); } +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE + +} // namespace hwy +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace hwy { +namespace { +HWY_EXPORT(GetVectorSize); // Local function. +} // namespace + +size_t ImageBase::VectorSize() { + // Do not cache result - must return the current value, which may be greater + // than the first call if it was subject to DisableTargets! + return HWY_DYNAMIC_DISPATCH(GetVectorSize)(); +} + +size_t ImageBase::BytesPerRow(const size_t xsize, const size_t sizeof_t) { + const size_t vec_size = VectorSize(); + size_t valid_bytes = xsize * sizeof_t; + + // Allow unaligned accesses starting at the last valid value - this may raise + // msan errors unless the user calls InitializePaddingForUnalignedAccesses. + // Skip for the scalar case because no extra lanes will be loaded. + if (vec_size != 1) { + HWY_DASSERT(vec_size >= sizeof_t); + valid_bytes += vec_size - sizeof_t; + } + + // Round up to vector and cache line size. + const size_t align = HWY_MAX(vec_size, HWY_ALIGNMENT); + size_t bytes_per_row = RoundUpTo(valid_bytes, align); + + // During the lengthy window before writes are committed to memory, CPUs + // guard against read after write hazards by checking the address, but + // only the lower 11 bits. We avoid a false dependency between writes to + // consecutive rows by ensuring their sizes are not multiples of 2 KiB. + // Avoid2K prevents the same problem for the planes of an Image3. + if (bytes_per_row % HWY_ALIGNMENT == 0) { + bytes_per_row += align; + } + + HWY_DASSERT(bytes_per_row % align == 0); + return bytes_per_row; +} + +ImageBase::ImageBase(const size_t xsize, const size_t ysize, + const size_t sizeof_t) + : xsize_(static_cast(xsize)), + ysize_(static_cast(ysize)), + bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) { + HWY_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8); + + bytes_per_row_ = 0; + // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate + // if nonzero, because "zero" bytes still have padding/bookkeeping overhead. + if (xsize != 0 && ysize != 0) { + bytes_per_row_ = BytesPerRow(xsize, sizeof_t); + bytes_ = AllocateAligned(bytes_per_row_ * ysize); + HWY_ASSERT(bytes_.get() != nullptr); + InitializePadding(sizeof_t, Padding::kRoundUp); + } +} + +ImageBase::ImageBase(const size_t xsize, const size_t ysize, + const size_t bytes_per_row, void* const aligned) + : xsize_(static_cast(xsize)), + ysize_(static_cast(ysize)), + bytes_per_row_(bytes_per_row), + bytes_(static_cast(aligned), + AlignedFreer(&AlignedFreer::DoNothing, nullptr)) { + const size_t vec_size = VectorSize(); + HWY_ASSERT(bytes_per_row % vec_size == 0); + HWY_ASSERT(reinterpret_cast(aligned) % vec_size == 0); +} + +void ImageBase::InitializePadding(const size_t sizeof_t, Padding padding) { +#if HWY_IS_MSAN || HWY_IDE + if (xsize_ == 0 || ysize_ == 0) return; + + const size_t vec_size = VectorSize(); // Bytes, independent of sizeof_t! + if (vec_size == 1) return; // Scalar mode: no padding needed + + const size_t valid_size = xsize_ * sizeof_t; + const size_t initialize_size = padding == Padding::kRoundUp + ? RoundUpTo(valid_size, vec_size) + : valid_size + vec_size - sizeof_t; + if (valid_size == initialize_size) return; + + for (size_t y = 0; y < ysize_; ++y) { + uint8_t* HWY_RESTRICT row = static_cast(VoidRow(y)); +#if defined(__clang__) && (__clang_major__ <= 6) + // There's a bug in msan in clang-6 when handling AVX2 operations. This + // workaround allows tests to pass on msan, although it is slower and + // prevents msan warnings from uninitialized images. + memset(row, 0, initialize_size); +#else + memset(row + valid_size, 0, initialize_size - valid_size); +#endif // clang6 + } +#else + (void)sizeof_t; + (void)padding; +#endif // HWY_IS_MSAN +} + +void ImageBase::Swap(ImageBase& other) { + std::swap(xsize_, other.xsize_); + std::swap(ysize_, other.ysize_); + std::swap(bytes_per_row_, other.bytes_per_row_); + std::swap(bytes_, other.bytes_); +} + +} // namespace hwy +#endif // HWY_ONCE diff --git a/third_party/highway/hwy/contrib/image/image.h b/third_party/highway/hwy/contrib/image/image.h new file mode 100644 index 0000000000..c99863b06c --- /dev/null +++ b/third_party/highway/hwy/contrib/image/image.h @@ -0,0 +1,470 @@ +// Copyright 2020 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_ +#define HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_ + +// SIMD/multicore-friendly planar image representation with row accessors. + +#include +#include +#include + +#include // std::move + +#include "hwy/aligned_allocator.h" +#include "hwy/base.h" +#include "hwy/highway_export.h" + +namespace hwy { + +// Type-independent parts of Image<> - reduces code duplication and facilitates +// moving member function implementations to cc file. +struct HWY_CONTRIB_DLLEXPORT ImageBase { + // Returns required alignment in bytes for externally allocated memory. + static size_t VectorSize(); + + // Returns distance [bytes] between the start of two consecutive rows, a + // multiple of VectorSize but NOT kAlias (see implementation). + static size_t BytesPerRow(const size_t xsize, const size_t sizeof_t); + + // No allocation (for output params or unused images) + ImageBase() + : xsize_(0), + ysize_(0), + bytes_per_row_(0), + bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {} + + // Allocates memory (this is the common case) + ImageBase(size_t xsize, size_t ysize, size_t sizeof_t); + + // References but does not take ownership of external memory. Useful for + // interoperability with other libraries. `aligned` must be aligned to a + // multiple of VectorSize() and `bytes_per_row` must also be a multiple of + // VectorSize() or preferably equal to BytesPerRow(). + ImageBase(size_t xsize, size_t ysize, size_t bytes_per_row, void* aligned); + + // Copy construction/assignment is forbidden to avoid inadvertent copies, + // which can be very expensive. Use CopyImageTo() instead. + ImageBase(const ImageBase& other) = delete; + ImageBase& operator=(const ImageBase& other) = delete; + + // Move constructor (required for returning Image from function) + ImageBase(ImageBase&& other) noexcept = default; + + // Move assignment (required for std::vector) + ImageBase& operator=(ImageBase&& other) noexcept = default; + + void Swap(ImageBase& other); + + // Useful for pre-allocating image with some padding for alignment purposes + // and later reporting the actual valid dimensions. Caller is responsible + // for ensuring xsize/ysize are <= the original dimensions. + void ShrinkTo(const size_t xsize, const size_t ysize) { + xsize_ = static_cast(xsize); + ysize_ = static_cast(ysize); + // NOTE: we can't recompute bytes_per_row for more compact storage and + // better locality because that would invalidate the image contents. + } + + // How many pixels. + HWY_INLINE size_t xsize() const { return xsize_; } + HWY_INLINE size_t ysize() const { return ysize_; } + + // NOTE: do not use this for copying rows - the valid xsize may be much less. + HWY_INLINE size_t bytes_per_row() const { return bytes_per_row_; } + + // Raw access to byte contents, for interfacing with other libraries. + // Unsigned char instead of char to avoid surprises (sign extension). + HWY_INLINE uint8_t* bytes() { + void* p = bytes_.get(); + return static_cast(HWY_ASSUME_ALIGNED(p, 64)); + } + HWY_INLINE const uint8_t* bytes() const { + const void* p = bytes_.get(); + return static_cast(HWY_ASSUME_ALIGNED(p, 64)); + } + + protected: + // Returns pointer to the start of a row. + HWY_INLINE void* VoidRow(const size_t y) const { +#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN + if (y >= ysize_) { + HWY_ABORT("Row(%d) >= %u\n", static_cast(y), ysize_); + } +#endif + + void* row = bytes_.get() + y * bytes_per_row_; + return HWY_ASSUME_ALIGNED(row, 64); + } + + enum class Padding { + // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default. + kRoundUp, + // Allow LoadU(d, row + x) for x <= xsize() - 1. This requires an extra + // vector to be initialized. If done by default, this would suppress + // legitimate msan warnings. We therefore require users to explicitly call + // InitializePadding before using unaligned loads (e.g. convolution). + kUnaligned + }; + + // Initializes the minimum bytes required to suppress msan warnings from + // legitimate (according to Padding mode) vector loads/stores on the right + // border, where some lanes are uninitialized and assumed to be unused. + void InitializePadding(size_t sizeof_t, Padding padding); + + // (Members are non-const to enable assignment during move-assignment.) + uint32_t xsize_; // In valid pixels, not including any padding. + uint32_t ysize_; + size_t bytes_per_row_; // Includes padding. + AlignedFreeUniquePtr bytes_; +}; + +// Single channel, aligned rows separated by padding. T must be POD. +// +// 'Single channel' (one 2D array per channel) simplifies vectorization +// (repeating the same operation on multiple adjacent components) without the +// complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients +// can easily iterate over all components in a row and Image requires no +// knowledge of the pixel format beyond the component type "T". +// +// 'Aligned' means each row is aligned to the L1 cache line size. This prevents +// false sharing between two threads operating on adjacent rows. +// +// 'Padding' is still relevant because vectors could potentially be larger than +// a cache line. By rounding up row sizes to the vector size, we allow +// reading/writing ALIGNED vectors whose first lane is a valid sample. This +// avoids needing a separate loop to handle remaining unaligned lanes. +// +// This image layout could also be achieved with a vector and a row accessor +// function, but a class wrapper with support for "deleter" allows wrapping +// existing memory allocated by clients without copying the pixels. It also +// provides convenient accessors for xsize/ysize, which shortens function +// argument lists. Supports move-construction so it can be stored in containers. +template +class Image : public ImageBase { + public: + using T = ComponentType; + + Image() = default; + Image(const size_t xsize, const size_t ysize) + : ImageBase(xsize, ysize, sizeof(T)) {} + Image(const size_t xsize, const size_t ysize, size_t bytes_per_row, + void* aligned) + : ImageBase(xsize, ysize, bytes_per_row, aligned) {} + + void InitializePaddingForUnalignedAccesses() { + InitializePadding(sizeof(T), Padding::kUnaligned); + } + + HWY_INLINE const T* ConstRow(const size_t y) const { + return static_cast(VoidRow(y)); + } + HWY_INLINE const T* ConstRow(const size_t y) { + return static_cast(VoidRow(y)); + } + + // Returns pointer to non-const. This allows passing const Image* parameters + // when the callee is only supposed to fill the pixels, as opposed to + // allocating or resizing the image. + HWY_INLINE T* MutableRow(const size_t y) const { + return static_cast(VoidRow(y)); + } + HWY_INLINE T* MutableRow(const size_t y) { + return static_cast(VoidRow(y)); + } + + // Returns number of pixels (some of which are padding) per row. Useful for + // computing other rows via pointer arithmetic. WARNING: this must + // NOT be used to determine xsize. + HWY_INLINE intptr_t PixelsPerRow() const { + return static_cast(bytes_per_row_ / sizeof(T)); + } +}; + +using ImageF = Image; + +// A bundle of 3 same-sized images. To fill an existing Image3 using +// single-channel producers, we also need access to each const Image*. Const +// prevents breaking the same-size invariant, while still allowing pixels to be +// changed via MutableRow. +template +class Image3 { + public: + using T = ComponentType; + using ImageT = Image; + static constexpr size_t kNumPlanes = 3; + + Image3() : planes_{ImageT(), ImageT(), ImageT()} {} + + Image3(const size_t xsize, const size_t ysize) + : planes_{ImageT(xsize, ysize), ImageT(xsize, ysize), + ImageT(xsize, ysize)} {} + + Image3(Image3&& other) noexcept { + for (size_t i = 0; i < kNumPlanes; i++) { + planes_[i] = std::move(other.planes_[i]); + } + } + + Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) { + if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) { + HWY_ABORT( + "Not same size: %d x %d, %d x %d, %d x %d\n", + static_cast(plane0.xsize()), static_cast(plane0.ysize()), + static_cast(plane1.xsize()), static_cast(plane1.ysize()), + static_cast(plane2.xsize()), static_cast(plane2.ysize())); + } + planes_[0] = std::move(plane0); + planes_[1] = std::move(plane1); + planes_[2] = std::move(plane2); + } + + // Copy construction/assignment is forbidden to avoid inadvertent copies, + // which can be very expensive. Use CopyImageTo instead. + Image3(const Image3& other) = delete; + Image3& operator=(const Image3& other) = delete; + + Image3& operator=(Image3&& other) noexcept { + for (size_t i = 0; i < kNumPlanes; i++) { + planes_[i] = std::move(other.planes_[i]); + } + return *this; + } + + HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const { + return static_cast(VoidPlaneRow(c, y)); + } + HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) { + return static_cast(VoidPlaneRow(c, y)); + } + + HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) const { + return static_cast(VoidPlaneRow(c, y)); + } + HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) { + return static_cast(VoidPlaneRow(c, y)); + } + + HWY_INLINE const ImageT& Plane(size_t idx) const { return planes_[idx]; } + + void Swap(Image3& other) { + for (size_t c = 0; c < 3; ++c) { + other.planes_[c].Swap(planes_[c]); + } + } + + void ShrinkTo(const size_t xsize, const size_t ysize) { + for (ImageT& plane : planes_) { + plane.ShrinkTo(xsize, ysize); + } + } + + // Sizes of all three images are guaranteed to be equal. + HWY_INLINE size_t xsize() const { return planes_[0].xsize(); } + HWY_INLINE size_t ysize() const { return planes_[0].ysize(); } + // Returns offset [bytes] from one row to the next row of the same plane. + // WARNING: this must NOT be used to determine xsize, nor for copying rows - + // the valid xsize may be much less. + HWY_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); } + // Returns number of pixels (some of which are padding) per row. Useful for + // computing other rows via pointer arithmetic. WARNING: this must NOT be used + // to determine xsize. + HWY_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); } + + private: + // Returns pointer to the start of a row. + HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const { +#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN + if (c >= kNumPlanes || y >= ysize()) { + HWY_ABORT("PlaneRow(%d, %d) >= %d\n", static_cast(c), + static_cast(y), static_cast(ysize())); + } +#endif + // Use the first plane's stride because the compiler might not realize they + // are all equal. Thus we only need a single multiplication for all planes. + const size_t row_offset = y * planes_[0].bytes_per_row(); + const void* row = planes_[c].bytes() + row_offset; + return static_cast( + HWY_ASSUME_ALIGNED(row, HWY_ALIGNMENT)); + } + + private: + ImageT planes_[kNumPlanes]; +}; + +using Image3F = Image3; + +// Rectangular region in image(s). Factoring this out of Image instead of +// shifting the pointer by x0/y0 allows this to apply to multiple images with +// different resolutions. Can compare size via SameSize(rect1, rect2). +class Rect { + public: + // Most windows are xsize_max * ysize_max, except those on the borders where + // begin + size_max > end. + constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max, + size_t ysize_max, size_t xend, size_t yend) + : x0_(xbegin), + y0_(ybegin), + xsize_(ClampedSize(xbegin, xsize_max, xend)), + ysize_(ClampedSize(ybegin, ysize_max, yend)) {} + + // Construct with origin and known size (typically from another Rect). + constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize) + : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {} + + // Construct a rect that covers a whole image. + template + explicit Rect(const Image& image) + : Rect(0, 0, image.xsize(), image.ysize()) {} + + Rect() : Rect(0, 0, 0, 0) {} + + Rect(const Rect&) = default; + Rect& operator=(const Rect&) = default; + + Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max, + size_t ysize_max) { + return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_, + y0_ + ysize_); + } + + template + const T* ConstRow(const Image* image, size_t y) const { + return image->ConstRow(y + y0_) + x0_; + } + + template + T* MutableRow(const Image* image, size_t y) const { + return image->MutableRow(y + y0_) + x0_; + } + + template + const T* ConstPlaneRow(const Image3& image, size_t c, size_t y) const { + return image.ConstPlaneRow(c, y + y0_) + x0_; + } + + template + T* MutablePlaneRow(Image3* image, const size_t c, size_t y) const { + return image->MutablePlaneRow(c, y + y0_) + x0_; + } + + // Returns true if this Rect fully resides in the given image. ImageT could be + // Image or Image3; however if ImageT is Rect, results are nonsensical. + template + bool IsInside(const ImageT& image) const { + return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize()); + } + + size_t x0() const { return x0_; } + size_t y0() const { return y0_; } + size_t xsize() const { return xsize_; } + size_t ysize() const { return ysize_; } + + private: + // Returns size_max, or whatever is left in [begin, end). + static constexpr size_t ClampedSize(size_t begin, size_t size_max, + size_t end) { + return (begin + size_max <= end) ? size_max + : (end > begin ? end - begin : 0); + } + + size_t x0_; + size_t y0_; + + size_t xsize_; + size_t ysize_; +}; + +// Works for any image-like input type(s). +template +HWY_MAYBE_UNUSED bool SameSize(const Image1& image1, const Image2& image2) { + return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize(); +} + +// Mirrors out of bounds coordinates and returns valid coordinates unchanged. +// We assume the radius (distance outside the image) is small compared to the +// image size, otherwise this might not terminate. +// The mirror is outside the last column (border pixel is also replicated). +static HWY_INLINE HWY_MAYBE_UNUSED size_t Mirror(int64_t x, + const int64_t xsize) { + HWY_DASSERT(xsize != 0); + + // TODO(janwas): replace with branchless version + while (x < 0 || x >= xsize) { + if (x < 0) { + x = -x - 1; + } else { + x = 2 * xsize - 1 - x; + } + } + return static_cast(x); +} + +// Wrap modes for ensuring X/Y coordinates are in the valid range [0, size): + +// Mirrors (repeating the edge pixel once). Useful for convolutions. +struct WrapMirror { + HWY_INLINE size_t operator()(const int64_t coord, const size_t size) const { + return Mirror(coord, static_cast(size)); + } +}; + +// Returns the same coordinate, for when we know "coord" is already valid (e.g. +// interior of an image). +struct WrapUnchanged { + HWY_INLINE size_t operator()(const int64_t coord, size_t /*size*/) const { + return static_cast(coord); + } +}; + +// Similar to Wrap* but for row pointers (reduces Row() multiplications). + +class WrapRowMirror { + public: + template + WrapRowMirror(const View& image, size_t ysize) + : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {} + + const float* operator()(const float* const HWY_RESTRICT row, + const int64_t stride) const { + if (row < first_row_) { + const int64_t num_before = first_row_ - row; + // Mirrored; one row before => row 0, two before = row 1, ... + return first_row_ + num_before - stride; + } + if (row > last_row_) { + const int64_t num_after = row - last_row_; + // Mirrored; one row after => last row, two after = last - 1, ... + return last_row_ - num_after + stride; + } + return row; + } + + private: + const float* const HWY_RESTRICT first_row_; + const float* const HWY_RESTRICT last_row_; +}; + +struct WrapRowUnchanged { + HWY_INLINE const float* operator()(const float* const HWY_RESTRICT row, + int64_t /*stride*/) const { + return row; + } +}; + +} // namespace hwy + +#endif // HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_ diff --git a/third_party/highway/hwy/contrib/image/image_test.cc b/third_party/highway/hwy/contrib/image/image_test.cc new file mode 100644 index 0000000000..6886577a46 --- /dev/null +++ b/third_party/highway/hwy/contrib/image/image_test.cc @@ -0,0 +1,152 @@ +// Copyright (c) the JPEG XL Project +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "hwy/contrib/image/image.h" + +#include +#include +#include +#include + +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "hwy/contrib/image/image_test.cc" +#include "hwy/foreach_target.h" // IWYU pragma: keep + +// After foreach_target: +#include "hwy/highway.h" +#include "hwy/tests/test_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace hwy { +namespace HWY_NAMESPACE { + +// Ensure we can always write full aligned vectors. +struct TestAlignedT { + template + void operator()(T /*unused*/) const { + std::mt19937 rng(129); + std::uniform_int_distribution dist(0, 16); + const ScalableTag d; + + for (size_t ysize = 1; ysize < 4; ++ysize) { + for (size_t xsize = 1; xsize < 64; ++xsize) { + Image img(xsize, ysize); + + for (size_t y = 0; y < ysize; ++y) { + T* HWY_RESTRICT row = img.MutableRow(y); + for (size_t x = 0; x < xsize; x += Lanes(d)) { + const auto values = Iota(d, static_cast(dist(rng))); + Store(values, d, row + x); + } + } + + // Sanity check to prevent optimizing out the writes + const auto x = std::uniform_int_distribution(0, xsize - 1)(rng); + const auto y = std::uniform_int_distribution(0, ysize - 1)(rng); + HWY_ASSERT(img.ConstRow(y)[x] < 16 + Lanes(d)); + } + } + } +}; + +void TestAligned() { ForUnsignedTypes(TestAlignedT()); } + +// Ensure we can write an unaligned vector starting at the last valid value. +struct TestUnalignedT { + template + void operator()(T /*unused*/) const { + std::mt19937 rng(129); + std::uniform_int_distribution dist(0, 3); + const ScalableTag d; + + for (size_t ysize = 1; ysize < 4; ++ysize) { + for (size_t xsize = 1; xsize < 128; ++xsize) { + Image img(xsize, ysize); + img.InitializePaddingForUnalignedAccesses(); + +// This test reads padding, which only works if it was initialized, +// which only happens in MSAN builds. +#if HWY_IS_MSAN || HWY_IDE + // Initialize only the valid samples + for (size_t y = 0; y < ysize; ++y) { + T* HWY_RESTRICT row = img.MutableRow(y); + for (size_t x = 0; x < xsize; ++x) { + row[x] = static_cast(1u << dist(rng)); + } + } + + // Read padding bits + auto accum = Zero(d); + for (size_t y = 0; y < ysize; ++y) { + T* HWY_RESTRICT row = img.MutableRow(y); + for (size_t x = 0; x < xsize; ++x) { + accum = Or(accum, LoadU(d, row + x)); + } + } + + // Ensure padding was zero + const size_t N = Lanes(d); + auto lanes = AllocateAligned(N); + Store(accum, d, lanes.get()); + for (size_t i = 0; i < N; ++i) { + HWY_ASSERT(lanes[i] < 16); + } +#else // Check that writing padding does not overwrite valid samples + // Initialize only the valid samples + for (size_t y = 0; y < ysize; ++y) { + T* HWY_RESTRICT row = img.MutableRow(y); + for (size_t x = 0; x < xsize; ++x) { + row[x] = static_cast(x); + } + } + + // Zero padding and rightmost sample + for (size_t y = 0; y < ysize; ++y) { + T* HWY_RESTRICT row = img.MutableRow(y); + StoreU(Zero(d), d, row + xsize - 1); + } + + // Ensure no samples except the rightmost were overwritten + for (size_t y = 0; y < ysize; ++y) { + T* HWY_RESTRICT row = img.MutableRow(y); + for (size_t x = 0; x < xsize - 1; ++x) { + HWY_ASSERT_EQ(static_cast(x), row[x]); + } + } +#endif + } + } + } +}; + +void TestUnaligned() { ForUnsignedTypes(TestUnalignedT()); } + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace hwy +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace hwy { +HWY_BEFORE_TEST(ImageTest); +HWY_EXPORT_AND_TEST_P(ImageTest, TestAligned); +HWY_EXPORT_AND_TEST_P(ImageTest, TestUnaligned); +} // namespace hwy + +#endif -- cgit v1.2.3