diff options
Diffstat (limited to 'third_party/highway/hwy/tests/arithmetic_test.cc')
-rw-r--r-- | third_party/highway/hwy/tests/arithmetic_test.cc | 543 |
1 files changed, 543 insertions, 0 deletions
diff --git a/third_party/highway/hwy/tests/arithmetic_test.cc b/third_party/highway/hwy/tests/arithmetic_test.cc new file mode 100644 index 0000000000..5ce93db608 --- /dev/null +++ b/third_party/highway/hwy/tests/arithmetic_test.cc @@ -0,0 +1,543 @@ +// Copyright 2019 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "tests/arithmetic_test.cc" +#include "hwy/foreach_target.h" // IWYU pragma: keep +#include "hwy/highway.h" +#include "hwy/tests/test_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace hwy { +namespace HWY_NAMESPACE { + +struct TestPlusMinus { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v2 = Iota(d, T{2}); + const auto v3 = Iota(d, T{3}); + const auto v4 = Iota(d, T{4}); + + const size_t N = Lanes(d); + auto lanes = AllocateAligned<T>(N); + HWY_ASSERT(lanes); + for (size_t i = 0; i < N; ++i) { + lanes[i] = static_cast<T>((2 + i) + (3 + i)); + } + HWY_ASSERT_VEC_EQ(d, lanes.get(), Add(v2, v3)); + HWY_ASSERT_VEC_EQ(d, Set(d, T{2}), Sub(v4, v2)); + + for (size_t i = 0; i < N; ++i) { + lanes[i] = static_cast<T>((2 + i) + (4 + i)); + } + auto sum = v2; + sum = Add(sum, v4); // sum == 6,8.. + HWY_ASSERT_VEC_EQ(d, Load(d, lanes.get()), sum); + + sum = Sub(sum, v4); + HWY_ASSERT_VEC_EQ(d, v2, sum); + } +}; + +struct TestPlusMinusOverflow { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v1 = Iota(d, T(1)); + const auto vMax = Iota(d, LimitsMax<T>()); + const auto vMin = Iota(d, LimitsMin<T>()); + + // Check that no UB triggered. + // "assert" here is formal - to avoid compiler dropping calculations + HWY_ASSERT_VEC_EQ(d, Add(v1, vMax), Add(vMax, v1)); + HWY_ASSERT_VEC_EQ(d, Add(vMax, vMax), Add(vMax, vMax)); + HWY_ASSERT_VEC_EQ(d, Sub(vMin, v1), Sub(vMin, v1)); + HWY_ASSERT_VEC_EQ(d, Sub(vMin, vMax), Sub(vMin, vMax)); + } +}; + +HWY_NOINLINE void TestAllPlusMinus() { + ForAllTypes(ForPartialVectors<TestPlusMinus>()); + ForIntegerTypes(ForPartialVectors<TestPlusMinusOverflow>()); +} + +struct TestUnsignedSaturatingArithmetic { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v0 = Zero(d); + const auto vi = Iota(d, T{1}); + const auto vm = Set(d, LimitsMax<T>()); + + HWY_ASSERT_VEC_EQ(d, Add(v0, v0), SaturatedAdd(v0, v0)); + HWY_ASSERT_VEC_EQ(d, Add(v0, vi), SaturatedAdd(v0, vi)); + HWY_ASSERT_VEC_EQ(d, Add(v0, vm), SaturatedAdd(v0, vm)); + HWY_ASSERT_VEC_EQ(d, vm, SaturatedAdd(vi, vm)); + HWY_ASSERT_VEC_EQ(d, vm, SaturatedAdd(vm, vm)); + + HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(v0, v0)); + HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(v0, vi)); + HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(vi, vi)); + HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(vi, vm)); + HWY_ASSERT_VEC_EQ(d, Sub(vm, vi), SaturatedSub(vm, vi)); + } +}; + +struct TestSignedSaturatingArithmetic { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v0 = Zero(d); + const auto vpm = Set(d, LimitsMax<T>()); + // Ensure all lanes are positive, even if Iota wraps around + const auto vi = Or(And(Iota(d, 0), vpm), Set(d, T{1})); + const auto vn = Sub(v0, vi); + const auto vnm = Set(d, LimitsMin<T>()); + HWY_ASSERT_MASK_EQ(d, MaskTrue(d), Gt(vi, v0)); + HWY_ASSERT_MASK_EQ(d, MaskTrue(d), Lt(vn, v0)); + + HWY_ASSERT_VEC_EQ(d, v0, SaturatedAdd(v0, v0)); + HWY_ASSERT_VEC_EQ(d, vi, SaturatedAdd(v0, vi)); + HWY_ASSERT_VEC_EQ(d, vpm, SaturatedAdd(v0, vpm)); + HWY_ASSERT_VEC_EQ(d, vpm, SaturatedAdd(vi, vpm)); + HWY_ASSERT_VEC_EQ(d, vpm, SaturatedAdd(vpm, vpm)); + + HWY_ASSERT_VEC_EQ(d, v0, SaturatedSub(v0, v0)); + HWY_ASSERT_VEC_EQ(d, Sub(v0, vi), SaturatedSub(v0, vi)); + HWY_ASSERT_VEC_EQ(d, vn, SaturatedSub(vn, v0)); + HWY_ASSERT_VEC_EQ(d, vnm, SaturatedSub(vnm, vi)); + HWY_ASSERT_VEC_EQ(d, vnm, SaturatedSub(vnm, vpm)); + } +}; + +struct TestSaturatingArithmeticOverflow { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v1 = Iota(d, T(1)); + const auto vMax = Iota(d, LimitsMax<T>()); + const auto vMin = Iota(d, LimitsMin<T>()); + + // Check that no UB triggered. + // "assert" here is formal - to avoid compiler dropping calculations + HWY_ASSERT_VEC_EQ(d, SaturatedAdd(v1, vMax), SaturatedAdd(vMax, v1)); + HWY_ASSERT_VEC_EQ(d, SaturatedAdd(vMax, vMax), SaturatedAdd(vMax, vMax)); + HWY_ASSERT_VEC_EQ(d, SaturatedAdd(vMin, vMax), SaturatedAdd(vMin, vMax)); + HWY_ASSERT_VEC_EQ(d, SaturatedAdd(vMin, vMin), SaturatedAdd(vMin, vMin)); + HWY_ASSERT_VEC_EQ(d, SaturatedSub(vMin, v1), SaturatedSub(vMin, v1)); + HWY_ASSERT_VEC_EQ(d, SaturatedSub(vMin, vMax), SaturatedSub(vMin, vMax)); + HWY_ASSERT_VEC_EQ(d, SaturatedSub(vMax, vMin), SaturatedSub(vMax, vMin)); + HWY_ASSERT_VEC_EQ(d, SaturatedSub(vMin, vMin), SaturatedSub(vMin, vMin)); + } +}; + +HWY_NOINLINE void TestAllSaturatingArithmetic() { + ForUnsignedTypes(ForPartialVectors<TestUnsignedSaturatingArithmetic>()); + ForSignedTypes(ForPartialVectors<TestSignedSaturatingArithmetic>()); + ForIntegerTypes(ForPartialVectors<TestSaturatingArithmeticOverflow>()); +} + +struct TestAverage { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v0 = Zero(d); + const auto v1 = Set(d, T{1}); + const auto v2 = Set(d, T{2}); + + HWY_ASSERT_VEC_EQ(d, v0, AverageRound(v0, v0)); + HWY_ASSERT_VEC_EQ(d, v1, AverageRound(v0, v1)); + HWY_ASSERT_VEC_EQ(d, v1, AverageRound(v1, v1)); + HWY_ASSERT_VEC_EQ(d, v2, AverageRound(v1, v2)); + HWY_ASSERT_VEC_EQ(d, v2, AverageRound(v2, v2)); + } +}; + +HWY_NOINLINE void TestAllAverage() { + const ForPartialVectors<TestAverage> test; + test(uint8_t()); + test(uint16_t()); +} + +struct TestAbs { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v0 = Zero(d); + const auto vp1 = Set(d, T{1}); + const auto vn1 = Set(d, T{-1}); + const auto vpm = Set(d, LimitsMax<T>()); + const auto vnm = Set(d, LimitsMin<T>()); + + HWY_ASSERT_VEC_EQ(d, v0, Abs(v0)); + HWY_ASSERT_VEC_EQ(d, vp1, Abs(vp1)); + HWY_ASSERT_VEC_EQ(d, vp1, Abs(vn1)); + HWY_ASSERT_VEC_EQ(d, vpm, Abs(vpm)); + HWY_ASSERT_VEC_EQ(d, vnm, Abs(vnm)); + } +}; + +struct TestFloatAbs { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v0 = Zero(d); + const auto vp1 = Set(d, T{1}); + const auto vn1 = Set(d, T{-1}); + const auto vp2 = Set(d, static_cast<T>(0.01)); + const auto vn2 = Set(d, static_cast<T>(-0.01)); + + HWY_ASSERT_VEC_EQ(d, v0, Abs(v0)); + HWY_ASSERT_VEC_EQ(d, vp1, Abs(vp1)); + HWY_ASSERT_VEC_EQ(d, vp1, Abs(vn1)); + HWY_ASSERT_VEC_EQ(d, vp2, Abs(vp2)); + HWY_ASSERT_VEC_EQ(d, vp2, Abs(vn2)); + } +}; + +HWY_NOINLINE void TestAllAbs() { + ForSignedTypes(ForPartialVectors<TestAbs>()); + ForFloatTypes(ForPartialVectors<TestFloatAbs>()); +} + +struct TestNeg { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v0 = Zero(d); + const auto vn = Set(d, T{-3}); + const auto vp = Set(d, T{3}); + HWY_ASSERT_VEC_EQ(d, v0, Neg(v0)); + HWY_ASSERT_VEC_EQ(d, vp, Neg(vn)); + HWY_ASSERT_VEC_EQ(d, vn, Neg(vp)); + } +}; + +struct TestNegOverflow { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto vn = Set(d, LimitsMin<T>()); + const auto vp = Set(d, LimitsMax<T>()); + HWY_ASSERT_VEC_EQ(d, Neg(vn), Neg(vn)); + HWY_ASSERT_VEC_EQ(d, Neg(vp), Neg(vp)); + } +}; + +HWY_NOINLINE void TestAllNeg() { + ForSignedTypes(ForPartialVectors<TestNeg>()); + ForFloatTypes(ForPartialVectors<TestNeg>()); + ForSignedTypes(ForPartialVectors<TestNegOverflow>()); +} + +struct TestUnsignedMinMax { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v0 = Zero(d); + // Leave headroom such that v1 < v2 even after wraparound. + const auto mod = And(Iota(d, 0), Set(d, LimitsMax<T>() >> 1)); + const auto v1 = Add(mod, Set(d, T{1})); + const auto v2 = Add(mod, Set(d, T{2})); + HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v2)); + HWY_ASSERT_VEC_EQ(d, v2, Max(v1, v2)); + HWY_ASSERT_VEC_EQ(d, v0, Min(v1, v0)); + HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v0)); + + const auto vmin = Set(d, LimitsMin<T>()); + const auto vmax = Set(d, LimitsMax<T>()); + + HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, vmax)); + HWY_ASSERT_VEC_EQ(d, vmin, Min(vmax, vmin)); + + HWY_ASSERT_VEC_EQ(d, vmax, Max(vmin, vmax)); + HWY_ASSERT_VEC_EQ(d, vmax, Max(vmax, vmin)); + } +}; + +struct TestSignedMinMax { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + // Leave headroom such that v1 < v2 even after wraparound. + const auto mod = + And(Iota(d, 0), Set(d, static_cast<T>(LimitsMax<T>() >> 1))); + const auto v1 = Add(mod, Set(d, T{1})); + const auto v2 = Add(mod, Set(d, T{2})); + const auto v_neg = Sub(Zero(d), v1); + HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v2)); + HWY_ASSERT_VEC_EQ(d, v2, Max(v1, v2)); + HWY_ASSERT_VEC_EQ(d, v_neg, Min(v1, v_neg)); + HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v_neg)); + + const auto v0 = Zero(d); + const auto vmin = Set(d, LimitsMin<T>()); + const auto vmax = Set(d, LimitsMax<T>()); + HWY_ASSERT_VEC_EQ(d, vmin, Min(v0, vmin)); + HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, v0)); + HWY_ASSERT_VEC_EQ(d, v0, Max(v0, vmin)); + HWY_ASSERT_VEC_EQ(d, v0, Max(vmin, v0)); + + HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, vmax)); + HWY_ASSERT_VEC_EQ(d, vmin, Min(vmax, vmin)); + + HWY_ASSERT_VEC_EQ(d, vmax, Max(vmin, vmax)); + HWY_ASSERT_VEC_EQ(d, vmax, Max(vmax, vmin)); + } +}; + +struct TestFloatMinMax { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v1 = Iota(d, 1); + const auto v2 = Iota(d, 2); + const auto v_neg = Iota(d, -T(Lanes(d))); + HWY_ASSERT_VEC_EQ(d, v1, Min(v1, v2)); + HWY_ASSERT_VEC_EQ(d, v2, Max(v1, v2)); + HWY_ASSERT_VEC_EQ(d, v_neg, Min(v1, v_neg)); + HWY_ASSERT_VEC_EQ(d, v1, Max(v1, v_neg)); + + const auto v0 = Zero(d); + const auto vmin = Set(d, static_cast<T>(-1E30)); + const auto vmax = Set(d, static_cast<T>(1E30)); + HWY_ASSERT_VEC_EQ(d, vmin, Min(v0, vmin)); + HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, v0)); + HWY_ASSERT_VEC_EQ(d, v0, Max(v0, vmin)); + HWY_ASSERT_VEC_EQ(d, v0, Max(vmin, v0)); + + HWY_ASSERT_VEC_EQ(d, vmin, Min(vmin, vmax)); + HWY_ASSERT_VEC_EQ(d, vmin, Min(vmax, vmin)); + + HWY_ASSERT_VEC_EQ(d, vmax, Max(vmin, vmax)); + HWY_ASSERT_VEC_EQ(d, vmax, Max(vmax, vmin)); + } +}; + +HWY_NOINLINE void TestAllMinMax() { + ForUnsignedTypes(ForPartialVectors<TestUnsignedMinMax>()); + ForSignedTypes(ForPartialVectors<TestSignedMinMax>()); + ForFloatTypes(ForPartialVectors<TestFloatMinMax>()); +} + +template <class D> +static HWY_NOINLINE Vec<D> Make128(D d, uint64_t hi, uint64_t lo) { + alignas(16) uint64_t in[2]; + in[0] = lo; + in[1] = hi; + return LoadDup128(d, in); +} + +struct TestMinMax128 { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + using V = Vec<D>; + const size_t N = Lanes(d); + auto a_lanes = AllocateAligned<T>(N); + auto b_lanes = AllocateAligned<T>(N); + auto min_lanes = AllocateAligned<T>(N); + auto max_lanes = AllocateAligned<T>(N); + RandomState rng; + + const V v00 = Zero(d); + const V v01 = Make128(d, 0, 1); + const V v10 = Make128(d, 1, 0); + const V v11 = Add(v01, v10); + + // Same arg + HWY_ASSERT_VEC_EQ(d, v00, Min128(d, v00, v00)); + HWY_ASSERT_VEC_EQ(d, v01, Min128(d, v01, v01)); + HWY_ASSERT_VEC_EQ(d, v10, Min128(d, v10, v10)); + HWY_ASSERT_VEC_EQ(d, v11, Min128(d, v11, v11)); + HWY_ASSERT_VEC_EQ(d, v00, Max128(d, v00, v00)); + HWY_ASSERT_VEC_EQ(d, v01, Max128(d, v01, v01)); + HWY_ASSERT_VEC_EQ(d, v10, Max128(d, v10, v10)); + HWY_ASSERT_VEC_EQ(d, v11, Max128(d, v11, v11)); + + // First arg less + HWY_ASSERT_VEC_EQ(d, v00, Min128(d, v00, v01)); + HWY_ASSERT_VEC_EQ(d, v01, Min128(d, v01, v10)); + HWY_ASSERT_VEC_EQ(d, v10, Min128(d, v10, v11)); + HWY_ASSERT_VEC_EQ(d, v01, Max128(d, v00, v01)); + HWY_ASSERT_VEC_EQ(d, v10, Max128(d, v01, v10)); + HWY_ASSERT_VEC_EQ(d, v11, Max128(d, v10, v11)); + + // Second arg less + HWY_ASSERT_VEC_EQ(d, v00, Min128(d, v01, v00)); + HWY_ASSERT_VEC_EQ(d, v01, Min128(d, v10, v01)); + HWY_ASSERT_VEC_EQ(d, v10, Min128(d, v11, v10)); + HWY_ASSERT_VEC_EQ(d, v01, Max128(d, v01, v00)); + HWY_ASSERT_VEC_EQ(d, v10, Max128(d, v10, v01)); + HWY_ASSERT_VEC_EQ(d, v11, Max128(d, v11, v10)); + + // Also check 128-bit blocks are independent + for (size_t rep = 0; rep < AdjustedReps(1000); ++rep) { + for (size_t i = 0; i < N; ++i) { + a_lanes[i] = Random64(&rng); + b_lanes[i] = Random64(&rng); + } + const V a = Load(d, a_lanes.get()); + const V b = Load(d, b_lanes.get()); + for (size_t i = 0; i < N; i += 2) { + const bool lt = a_lanes[i + 1] == b_lanes[i + 1] + ? (a_lanes[i] < b_lanes[i]) + : (a_lanes[i + 1] < b_lanes[i + 1]); + min_lanes[i + 0] = lt ? a_lanes[i + 0] : b_lanes[i + 0]; + min_lanes[i + 1] = lt ? a_lanes[i + 1] : b_lanes[i + 1]; + max_lanes[i + 0] = lt ? b_lanes[i + 0] : a_lanes[i + 0]; + max_lanes[i + 1] = lt ? b_lanes[i + 1] : a_lanes[i + 1]; + } + HWY_ASSERT_VEC_EQ(d, min_lanes.get(), Min128(d, a, b)); + HWY_ASSERT_VEC_EQ(d, max_lanes.get(), Max128(d, a, b)); + } + } +}; + +HWY_NOINLINE void TestAllMinMax128() { + ForGEVectors<128, TestMinMax128>()(uint64_t()); +} + +struct TestMinMax128Upper { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + using V = Vec<D>; + const size_t N = Lanes(d); + auto a_lanes = AllocateAligned<T>(N); + auto b_lanes = AllocateAligned<T>(N); + auto min_lanes = AllocateAligned<T>(N); + auto max_lanes = AllocateAligned<T>(N); + RandomState rng; + + const V v00 = Zero(d); + const V v01 = Make128(d, 0, 1); + const V v10 = Make128(d, 1, 0); + const V v11 = Add(v01, v10); + + // Same arg + HWY_ASSERT_VEC_EQ(d, v00, Min128Upper(d, v00, v00)); + HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v01, v01)); + HWY_ASSERT_VEC_EQ(d, v10, Min128Upper(d, v10, v10)); + HWY_ASSERT_VEC_EQ(d, v11, Min128Upper(d, v11, v11)); + HWY_ASSERT_VEC_EQ(d, v00, Max128Upper(d, v00, v00)); + HWY_ASSERT_VEC_EQ(d, v01, Max128Upper(d, v01, v01)); + HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v10, v10)); + HWY_ASSERT_VEC_EQ(d, v11, Max128Upper(d, v11, v11)); + + // Equivalent but not equal (chooses second arg) + HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v00, v01)); + HWY_ASSERT_VEC_EQ(d, v11, Min128Upper(d, v10, v11)); + HWY_ASSERT_VEC_EQ(d, v00, Min128Upper(d, v01, v00)); + HWY_ASSERT_VEC_EQ(d, v10, Min128Upper(d, v11, v10)); + HWY_ASSERT_VEC_EQ(d, v00, Max128Upper(d, v01, v00)); + HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v11, v10)); + HWY_ASSERT_VEC_EQ(d, v01, Max128Upper(d, v00, v01)); + HWY_ASSERT_VEC_EQ(d, v11, Max128Upper(d, v10, v11)); + + // First arg less + HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v01, v10)); + HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v01, v10)); + + // Second arg less + HWY_ASSERT_VEC_EQ(d, v01, Min128Upper(d, v10, v01)); + HWY_ASSERT_VEC_EQ(d, v10, Max128Upper(d, v10, v01)); + + // Also check 128-bit blocks are independent + for (size_t rep = 0; rep < AdjustedReps(1000); ++rep) { + for (size_t i = 0; i < N; ++i) { + a_lanes[i] = Random64(&rng); + b_lanes[i] = Random64(&rng); + } + const V a = Load(d, a_lanes.get()); + const V b = Load(d, b_lanes.get()); + for (size_t i = 0; i < N; i += 2) { + const bool lt = a_lanes[i + 1] < b_lanes[i + 1]; + min_lanes[i + 0] = lt ? a_lanes[i + 0] : b_lanes[i + 0]; + min_lanes[i + 1] = lt ? a_lanes[i + 1] : b_lanes[i + 1]; + max_lanes[i + 0] = lt ? b_lanes[i + 0] : a_lanes[i + 0]; + max_lanes[i + 1] = lt ? b_lanes[i + 1] : a_lanes[i + 1]; + } + HWY_ASSERT_VEC_EQ(d, min_lanes.get(), Min128Upper(d, a, b)); + HWY_ASSERT_VEC_EQ(d, max_lanes.get(), Max128Upper(d, a, b)); + } + } +}; + +HWY_NOINLINE void TestAllMinMax128Upper() { + ForGEVectors<128, TestMinMax128Upper>()(uint64_t()); +} + +struct TestIntegerAbsDiff { + template<typename T, HWY_IF_T_SIZE_ONE_OF(T, (1 << 1) | (1 << 2) | (1 << 4))> + static inline T ScalarAbsDiff(T a, T b) { + using TW = MakeSigned<MakeWide<T>>; + const TW diff = static_cast<TW>(a) - static_cast<TW>(b); + return static_cast<T>((diff >= 0) ? diff : -diff); + } + template<typename T, HWY_IF_T_SIZE(T, 8)> + static inline T ScalarAbsDiff(T a, T b) { + if (a >= b) { + return a - b; + } else { + return b - a; + } + } + + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const size_t N = Lanes(d); + auto in_lanes_a = AllocateAligned<T>(N); + auto in_lanes_b = AllocateAligned<T>(N); + auto out_lanes = AllocateAligned<T>(N); + constexpr size_t shift_amt_mask = sizeof(T) * 8 - 1; + for (size_t i = 0; i < N; ++i) { + // Need to mask out shift_amt as i can be greater than or equal to + // the number of bits in T if T is int8_t, uint8_t, int16_t, or uint16_t. + const auto shift_amt = i & shift_amt_mask; + in_lanes_a[i] = + static_cast<T>((static_cast<uint64_t>(i) ^ 1u) << shift_amt); + in_lanes_b[i] = + static_cast<T>(static_cast<uint64_t>(i) << shift_amt); + out_lanes[i] = ScalarAbsDiff(in_lanes_a[i], in_lanes_b[i]); + } + const auto a = Load(d, in_lanes_a.get()); + const auto b = Load(d, in_lanes_b.get()); + const auto expected = Load(d, out_lanes.get()); + HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(a, b)); + HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(b, a)); + } +}; + +HWY_NOINLINE void TestAllIntegerAbsDiff() { + ForPartialVectors<TestIntegerAbsDiff>()(int8_t()); + ForPartialVectors<TestIntegerAbsDiff>()(uint8_t()); + ForPartialVectors<TestIntegerAbsDiff>()(int16_t()); + ForPartialVectors<TestIntegerAbsDiff>()(uint16_t()); + ForPartialVectors<TestIntegerAbsDiff>()(int32_t()); + ForPartialVectors<TestIntegerAbsDiff>()(uint32_t()); +#if HWY_HAVE_INTEGER64 + ForPartialVectors<TestIntegerAbsDiff>()(int64_t()); + ForPartialVectors<TestIntegerAbsDiff>()(uint64_t()); +#endif +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace hwy +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace hwy { +HWY_BEFORE_TEST(HwyArithmeticTest); +HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllPlusMinus); +HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllSaturatingArithmetic); +HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllAverage); +HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllAbs); +HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllNeg); +HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMinMax); +HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMinMax128); +HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllMinMax128Upper); +HWY_EXPORT_AND_TEST_P(HwyArithmeticTest, TestAllIntegerAbsDiff); +} // namespace hwy + +#endif |