summaryrefslogtreecommitdiffstats
path: root/third_party/highway/hwy/contrib/image/image.h
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/highway/hwy/contrib/image/image.h')
-rw-r--r--third_party/highway/hwy/contrib/image/image.h470
1 files changed, 470 insertions, 0 deletions
diff --git a/third_party/highway/hwy/contrib/image/image.h b/third_party/highway/hwy/contrib/image/image.h
new file mode 100644
index 0000000000..c99863b06c
--- /dev/null
+++ b/third_party/highway/hwy/contrib/image/image.h
@@ -0,0 +1,470 @@
+// Copyright 2020 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
+#define HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_
+
+// SIMD/multicore-friendly planar image representation with row accessors.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <utility> // std::move
+
+#include "hwy/aligned_allocator.h"
+#include "hwy/base.h"
+#include "hwy/highway_export.h"
+
+namespace hwy {
+
+// Type-independent parts of Image<> - reduces code duplication and facilitates
+// moving member function implementations to cc file.
+struct HWY_CONTRIB_DLLEXPORT ImageBase {
+ // Returns required alignment in bytes for externally allocated memory.
+ static size_t VectorSize();
+
+ // Returns distance [bytes] between the start of two consecutive rows, a
+ // multiple of VectorSize but NOT kAlias (see implementation).
+ static size_t BytesPerRow(const size_t xsize, const size_t sizeof_t);
+
+ // No allocation (for output params or unused images)
+ ImageBase()
+ : xsize_(0),
+ ysize_(0),
+ bytes_per_row_(0),
+ bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) {}
+
+ // Allocates memory (this is the common case)
+ ImageBase(size_t xsize, size_t ysize, size_t sizeof_t);
+
+ // References but does not take ownership of external memory. Useful for
+ // interoperability with other libraries. `aligned` must be aligned to a
+ // multiple of VectorSize() and `bytes_per_row` must also be a multiple of
+ // VectorSize() or preferably equal to BytesPerRow().
+ ImageBase(size_t xsize, size_t ysize, size_t bytes_per_row, void* aligned);
+
+ // Copy construction/assignment is forbidden to avoid inadvertent copies,
+ // which can be very expensive. Use CopyImageTo() instead.
+ ImageBase(const ImageBase& other) = delete;
+ ImageBase& operator=(const ImageBase& other) = delete;
+
+ // Move constructor (required for returning Image from function)
+ ImageBase(ImageBase&& other) noexcept = default;
+
+ // Move assignment (required for std::vector)
+ ImageBase& operator=(ImageBase&& other) noexcept = default;
+
+ void Swap(ImageBase& other);
+
+ // Useful for pre-allocating image with some padding for alignment purposes
+ // and later reporting the actual valid dimensions. Caller is responsible
+ // for ensuring xsize/ysize are <= the original dimensions.
+ void ShrinkTo(const size_t xsize, const size_t ysize) {
+ xsize_ = static_cast<uint32_t>(xsize);
+ ysize_ = static_cast<uint32_t>(ysize);
+ // NOTE: we can't recompute bytes_per_row for more compact storage and
+ // better locality because that would invalidate the image contents.
+ }
+
+ // How many pixels.
+ HWY_INLINE size_t xsize() const { return xsize_; }
+ HWY_INLINE size_t ysize() const { return ysize_; }
+
+ // NOTE: do not use this for copying rows - the valid xsize may be much less.
+ HWY_INLINE size_t bytes_per_row() const { return bytes_per_row_; }
+
+ // Raw access to byte contents, for interfacing with other libraries.
+ // Unsigned char instead of char to avoid surprises (sign extension).
+ HWY_INLINE uint8_t* bytes() {
+ void* p = bytes_.get();
+ return static_cast<uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
+ }
+ HWY_INLINE const uint8_t* bytes() const {
+ const void* p = bytes_.get();
+ return static_cast<const uint8_t * HWY_RESTRICT>(HWY_ASSUME_ALIGNED(p, 64));
+ }
+
+ protected:
+ // Returns pointer to the start of a row.
+ HWY_INLINE void* VoidRow(const size_t y) const {
+#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
+ if (y >= ysize_) {
+ HWY_ABORT("Row(%d) >= %u\n", static_cast<int>(y), ysize_);
+ }
+#endif
+
+ void* row = bytes_.get() + y * bytes_per_row_;
+ return HWY_ASSUME_ALIGNED(row, 64);
+ }
+
+ enum class Padding {
+ // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default.
+ kRoundUp,
+ // Allow LoadU(d, row + x) for x <= xsize() - 1. This requires an extra
+ // vector to be initialized. If done by default, this would suppress
+ // legitimate msan warnings. We therefore require users to explicitly call
+ // InitializePadding before using unaligned loads (e.g. convolution).
+ kUnaligned
+ };
+
+ // Initializes the minimum bytes required to suppress msan warnings from
+ // legitimate (according to Padding mode) vector loads/stores on the right
+ // border, where some lanes are uninitialized and assumed to be unused.
+ void InitializePadding(size_t sizeof_t, Padding padding);
+
+ // (Members are non-const to enable assignment during move-assignment.)
+ uint32_t xsize_; // In valid pixels, not including any padding.
+ uint32_t ysize_;
+ size_t bytes_per_row_; // Includes padding.
+ AlignedFreeUniquePtr<uint8_t[]> bytes_;
+};
+
+// Single channel, aligned rows separated by padding. T must be POD.
+//
+// 'Single channel' (one 2D array per channel) simplifies vectorization
+// (repeating the same operation on multiple adjacent components) without the
+// complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients
+// can easily iterate over all components in a row and Image requires no
+// knowledge of the pixel format beyond the component type "T".
+//
+// 'Aligned' means each row is aligned to the L1 cache line size. This prevents
+// false sharing between two threads operating on adjacent rows.
+//
+// 'Padding' is still relevant because vectors could potentially be larger than
+// a cache line. By rounding up row sizes to the vector size, we allow
+// reading/writing ALIGNED vectors whose first lane is a valid sample. This
+// avoids needing a separate loop to handle remaining unaligned lanes.
+//
+// This image layout could also be achieved with a vector and a row accessor
+// function, but a class wrapper with support for "deleter" allows wrapping
+// existing memory allocated by clients without copying the pixels. It also
+// provides convenient accessors for xsize/ysize, which shortens function
+// argument lists. Supports move-construction so it can be stored in containers.
+template <typename ComponentType>
+class Image : public ImageBase {
+ public:
+ using T = ComponentType;
+
+ Image() = default;
+ Image(const size_t xsize, const size_t ysize)
+ : ImageBase(xsize, ysize, sizeof(T)) {}
+ Image(const size_t xsize, const size_t ysize, size_t bytes_per_row,
+ void* aligned)
+ : ImageBase(xsize, ysize, bytes_per_row, aligned) {}
+
+ void InitializePaddingForUnalignedAccesses() {
+ InitializePadding(sizeof(T), Padding::kUnaligned);
+ }
+
+ HWY_INLINE const T* ConstRow(const size_t y) const {
+ return static_cast<const T*>(VoidRow(y));
+ }
+ HWY_INLINE const T* ConstRow(const size_t y) {
+ return static_cast<const T*>(VoidRow(y));
+ }
+
+ // Returns pointer to non-const. This allows passing const Image* parameters
+ // when the callee is only supposed to fill the pixels, as opposed to
+ // allocating or resizing the image.
+ HWY_INLINE T* MutableRow(const size_t y) const {
+ return static_cast<T*>(VoidRow(y));
+ }
+ HWY_INLINE T* MutableRow(const size_t y) {
+ return static_cast<T*>(VoidRow(y));
+ }
+
+ // Returns number of pixels (some of which are padding) per row. Useful for
+ // computing other rows via pointer arithmetic. WARNING: this must
+ // NOT be used to determine xsize.
+ HWY_INLINE intptr_t PixelsPerRow() const {
+ return static_cast<intptr_t>(bytes_per_row_ / sizeof(T));
+ }
+};
+
+using ImageF = Image<float>;
+
+// A bundle of 3 same-sized images. To fill an existing Image3 using
+// single-channel producers, we also need access to each const Image*. Const
+// prevents breaking the same-size invariant, while still allowing pixels to be
+// changed via MutableRow.
+template <typename ComponentType>
+class Image3 {
+ public:
+ using T = ComponentType;
+ using ImageT = Image<T>;
+ static constexpr size_t kNumPlanes = 3;
+
+ Image3() : planes_{ImageT(), ImageT(), ImageT()} {}
+
+ Image3(const size_t xsize, const size_t ysize)
+ : planes_{ImageT(xsize, ysize), ImageT(xsize, ysize),
+ ImageT(xsize, ysize)} {}
+
+ Image3(Image3&& other) noexcept {
+ for (size_t i = 0; i < kNumPlanes; i++) {
+ planes_[i] = std::move(other.planes_[i]);
+ }
+ }
+
+ Image3(ImageT&& plane0, ImageT&& plane1, ImageT&& plane2) {
+ if (!SameSize(plane0, plane1) || !SameSize(plane0, plane2)) {
+ HWY_ABORT(
+ "Not same size: %d x %d, %d x %d, %d x %d\n",
+ static_cast<int>(plane0.xsize()), static_cast<int>(plane0.ysize()),
+ static_cast<int>(plane1.xsize()), static_cast<int>(plane1.ysize()),
+ static_cast<int>(plane2.xsize()), static_cast<int>(plane2.ysize()));
+ }
+ planes_[0] = std::move(plane0);
+ planes_[1] = std::move(plane1);
+ planes_[2] = std::move(plane2);
+ }
+
+ // Copy construction/assignment is forbidden to avoid inadvertent copies,
+ // which can be very expensive. Use CopyImageTo instead.
+ Image3(const Image3& other) = delete;
+ Image3& operator=(const Image3& other) = delete;
+
+ Image3& operator=(Image3&& other) noexcept {
+ for (size_t i = 0; i < kNumPlanes; i++) {
+ planes_[i] = std::move(other.planes_[i]);
+ }
+ return *this;
+ }
+
+ HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const {
+ return static_cast<const T*>(VoidPlaneRow(c, y));
+ }
+ HWY_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) {
+ return static_cast<const T*>(VoidPlaneRow(c, y));
+ }
+
+ HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) const {
+ return static_cast<T*>(VoidPlaneRow(c, y));
+ }
+ HWY_INLINE T* MutablePlaneRow(const size_t c, const size_t y) {
+ return static_cast<T*>(VoidPlaneRow(c, y));
+ }
+
+ HWY_INLINE const ImageT& Plane(size_t idx) const { return planes_[idx]; }
+
+ void Swap(Image3& other) {
+ for (size_t c = 0; c < 3; ++c) {
+ other.planes_[c].Swap(planes_[c]);
+ }
+ }
+
+ void ShrinkTo(const size_t xsize, const size_t ysize) {
+ for (ImageT& plane : planes_) {
+ plane.ShrinkTo(xsize, ysize);
+ }
+ }
+
+ // Sizes of all three images are guaranteed to be equal.
+ HWY_INLINE size_t xsize() const { return planes_[0].xsize(); }
+ HWY_INLINE size_t ysize() const { return planes_[0].ysize(); }
+ // Returns offset [bytes] from one row to the next row of the same plane.
+ // WARNING: this must NOT be used to determine xsize, nor for copying rows -
+ // the valid xsize may be much less.
+ HWY_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); }
+ // Returns number of pixels (some of which are padding) per row. Useful for
+ // computing other rows via pointer arithmetic. WARNING: this must NOT be used
+ // to determine xsize.
+ HWY_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); }
+
+ private:
+ // Returns pointer to the start of a row.
+ HWY_INLINE void* VoidPlaneRow(const size_t c, const size_t y) const {
+#if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN
+ if (c >= kNumPlanes || y >= ysize()) {
+ HWY_ABORT("PlaneRow(%d, %d) >= %d\n", static_cast<int>(c),
+ static_cast<int>(y), static_cast<int>(ysize()));
+ }
+#endif
+ // Use the first plane's stride because the compiler might not realize they
+ // are all equal. Thus we only need a single multiplication for all planes.
+ const size_t row_offset = y * planes_[0].bytes_per_row();
+ const void* row = planes_[c].bytes() + row_offset;
+ return static_cast<const T * HWY_RESTRICT>(
+ HWY_ASSUME_ALIGNED(row, HWY_ALIGNMENT));
+ }
+
+ private:
+ ImageT planes_[kNumPlanes];
+};
+
+using Image3F = Image3<float>;
+
+// Rectangular region in image(s). Factoring this out of Image instead of
+// shifting the pointer by x0/y0 allows this to apply to multiple images with
+// different resolutions. Can compare size via SameSize(rect1, rect2).
+class Rect {
+ public:
+ // Most windows are xsize_max * ysize_max, except those on the borders where
+ // begin + size_max > end.
+ constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize_max,
+ size_t ysize_max, size_t xend, size_t yend)
+ : x0_(xbegin),
+ y0_(ybegin),
+ xsize_(ClampedSize(xbegin, xsize_max, xend)),
+ ysize_(ClampedSize(ybegin, ysize_max, yend)) {}
+
+ // Construct with origin and known size (typically from another Rect).
+ constexpr Rect(size_t xbegin, size_t ybegin, size_t xsize, size_t ysize)
+ : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {}
+
+ // Construct a rect that covers a whole image.
+ template <typename Image>
+ explicit Rect(const Image& image)
+ : Rect(0, 0, image.xsize(), image.ysize()) {}
+
+ Rect() : Rect(0, 0, 0, 0) {}
+
+ Rect(const Rect&) = default;
+ Rect& operator=(const Rect&) = default;
+
+ Rect Subrect(size_t xbegin, size_t ybegin, size_t xsize_max,
+ size_t ysize_max) {
+ return Rect(x0_ + xbegin, y0_ + ybegin, xsize_max, ysize_max, x0_ + xsize_,
+ y0_ + ysize_);
+ }
+
+ template <typename T>
+ const T* ConstRow(const Image<T>* image, size_t y) const {
+ return image->ConstRow(y + y0_) + x0_;
+ }
+
+ template <typename T>
+ T* MutableRow(const Image<T>* image, size_t y) const {
+ return image->MutableRow(y + y0_) + x0_;
+ }
+
+ template <typename T>
+ const T* ConstPlaneRow(const Image3<T>& image, size_t c, size_t y) const {
+ return image.ConstPlaneRow(c, y + y0_) + x0_;
+ }
+
+ template <typename T>
+ T* MutablePlaneRow(Image3<T>* image, const size_t c, size_t y) const {
+ return image->MutablePlaneRow(c, y + y0_) + x0_;
+ }
+
+ // Returns true if this Rect fully resides in the given image. ImageT could be
+ // Image<T> or Image3<T>; however if ImageT is Rect, results are nonsensical.
+ template <class ImageT>
+ bool IsInside(const ImageT& image) const {
+ return (x0_ + xsize_ <= image.xsize()) && (y0_ + ysize_ <= image.ysize());
+ }
+
+ size_t x0() const { return x0_; }
+ size_t y0() const { return y0_; }
+ size_t xsize() const { return xsize_; }
+ size_t ysize() const { return ysize_; }
+
+ private:
+ // Returns size_max, or whatever is left in [begin, end).
+ static constexpr size_t ClampedSize(size_t begin, size_t size_max,
+ size_t end) {
+ return (begin + size_max <= end) ? size_max
+ : (end > begin ? end - begin : 0);
+ }
+
+ size_t x0_;
+ size_t y0_;
+
+ size_t xsize_;
+ size_t ysize_;
+};
+
+// Works for any image-like input type(s).
+template <class Image1, class Image2>
+HWY_MAYBE_UNUSED bool SameSize(const Image1& image1, const Image2& image2) {
+ return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize();
+}
+
+// Mirrors out of bounds coordinates and returns valid coordinates unchanged.
+// We assume the radius (distance outside the image) is small compared to the
+// image size, otherwise this might not terminate.
+// The mirror is outside the last column (border pixel is also replicated).
+static HWY_INLINE HWY_MAYBE_UNUSED size_t Mirror(int64_t x,
+ const int64_t xsize) {
+ HWY_DASSERT(xsize != 0);
+
+ // TODO(janwas): replace with branchless version
+ while (x < 0 || x >= xsize) {
+ if (x < 0) {
+ x = -x - 1;
+ } else {
+ x = 2 * xsize - 1 - x;
+ }
+ }
+ return static_cast<size_t>(x);
+}
+
+// Wrap modes for ensuring X/Y coordinates are in the valid range [0, size):
+
+// Mirrors (repeating the edge pixel once). Useful for convolutions.
+struct WrapMirror {
+ HWY_INLINE size_t operator()(const int64_t coord, const size_t size) const {
+ return Mirror(coord, static_cast<int64_t>(size));
+ }
+};
+
+// Returns the same coordinate, for when we know "coord" is already valid (e.g.
+// interior of an image).
+struct WrapUnchanged {
+ HWY_INLINE size_t operator()(const int64_t coord, size_t /*size*/) const {
+ return static_cast<size_t>(coord);
+ }
+};
+
+// Similar to Wrap* but for row pointers (reduces Row() multiplications).
+
+class WrapRowMirror {
+ public:
+ template <class View>
+ WrapRowMirror(const View& image, size_t ysize)
+ : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {}
+
+ const float* operator()(const float* const HWY_RESTRICT row,
+ const int64_t stride) const {
+ if (row < first_row_) {
+ const int64_t num_before = first_row_ - row;
+ // Mirrored; one row before => row 0, two before = row 1, ...
+ return first_row_ + num_before - stride;
+ }
+ if (row > last_row_) {
+ const int64_t num_after = row - last_row_;
+ // Mirrored; one row after => last row, two after = last - 1, ...
+ return last_row_ - num_after + stride;
+ }
+ return row;
+ }
+
+ private:
+ const float* const HWY_RESTRICT first_row_;
+ const float* const HWY_RESTRICT last_row_;
+};
+
+struct WrapRowUnchanged {
+ HWY_INLINE const float* operator()(const float* const HWY_RESTRICT row,
+ int64_t /*stride*/) const {
+ return row;
+ }
+};
+
+} // namespace hwy
+
+#endif // HIGHWAY_HWY_CONTRIB_IMAGE_IMAGE_H_