diff options
Diffstat (limited to 'third_party/highway/hwy/highway_test.cc')
-rw-r--r-- | third_party/highway/hwy/highway_test.cc | 483 |
1 files changed, 483 insertions, 0 deletions
diff --git a/third_party/highway/hwy/highway_test.cc b/third_party/highway/hwy/highway_test.cc new file mode 100644 index 0000000000..d2caec067b --- /dev/null +++ b/third_party/highway/hwy/highway_test.cc @@ -0,0 +1,483 @@ +// Copyright 2019 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> // std::fill +#include <bitset> + +#include "hwy/base.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "highway_test.cc" +#include "hwy/foreach_target.h" // IWYU pragma: keep +#include "hwy/highway.h" +#include "hwy/nanobenchmark.h" // Unpredictable1 +#include "hwy/tests/test_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace hwy { +namespace HWY_NAMESPACE { + +template <size_t kLimit, typename T> +HWY_NOINLINE void TestCappedLimit(T /* tag */) { + CappedTag<T, kLimit> d; + // Ensure two ops compile + HWY_ASSERT_VEC_EQ(d, Zero(d), Set(d, T{0})); + + // Ensure we do not write more than kLimit lanes + const size_t N = Lanes(d); + if (kLimit < N) { + auto lanes = AllocateAligned<T>(N); + std::fill(lanes.get(), lanes.get() + N, T{0}); + Store(Set(d, T{1}), d, lanes.get()); + for (size_t i = kLimit; i < N; ++i) { + HWY_ASSERT_EQ(lanes[i], T{0}); + } + } +} + +// Adapter for ForAllTypes - we are constructing our own Simd<> and thus do not +// use ForPartialVectors etc. +struct TestCapped { + template <typename T> + void operator()(T t) const { + TestCappedLimit<1>(t); + TestCappedLimit<3>(t); + TestCappedLimit<5>(t); + TestCappedLimit<1ull << 15>(t); + } +}; + +HWY_NOINLINE void TestAllCapped() { ForAllTypes(TestCapped()); } + +// For testing that ForPartialVectors reaches every possible size: +using NumLanesSet = std::bitset<HWY_MAX_BYTES + 1>; + +// Monostate pattern because ForPartialVectors takes a template argument, not a +// functor by reference. +static NumLanesSet* NumLanesForSize(size_t sizeof_t) { + HWY_ASSERT(sizeof_t <= sizeof(uint64_t)); + static NumLanesSet num_lanes[sizeof(uint64_t) + 1]; + return num_lanes + sizeof_t; +} +static size_t* MaxLanesForSize(size_t sizeof_t) { + HWY_ASSERT(sizeof_t <= sizeof(uint64_t)); + static size_t num_lanes[sizeof(uint64_t) + 1] = {0}; + return num_lanes + sizeof_t; +} + +struct TestMaxLanes { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const size_t N = Lanes(d); + const size_t kMax = MaxLanes(d); // for RVV, includes LMUL + HWY_ASSERT(N <= kMax); + HWY_ASSERT(kMax <= (HWY_MAX_BYTES / sizeof(T))); + + NumLanesForSize(sizeof(T))->set(N); + *MaxLanesForSize(sizeof(T)) = HWY_MAX(*MaxLanesForSize(sizeof(T)), N); + } +}; + +HWY_NOINLINE void TestAllMaxLanes() { + ForAllTypes(ForPartialVectors<TestMaxLanes>()); + + // Ensure ForPartialVectors visited all powers of two [1, N]. + for (size_t sizeof_t : {sizeof(uint8_t), sizeof(uint16_t), sizeof(uint32_t), + sizeof(uint64_t)}) { + const size_t N = *MaxLanesForSize(sizeof_t); + for (size_t i = 1; i <= N; i += i) { + if (!NumLanesForSize(sizeof_t)->test(i)) { + fprintf(stderr, "T=%d: did not visit for N=%d, max=%d\n", + static_cast<int>(sizeof_t), static_cast<int>(i), + static_cast<int>(N)); + HWY_ASSERT(false); + } + } + } +} + +struct TestSet { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + // Zero + const auto v0 = Zero(d); + const size_t N = Lanes(d); + auto expected = AllocateAligned<T>(N); + std::fill(expected.get(), expected.get() + N, T(0)); + HWY_ASSERT_VEC_EQ(d, expected.get(), v0); + + // Set + const auto v2 = Set(d, T(2)); + for (size_t i = 0; i < N; ++i) { + expected[i] = 2; + } + HWY_ASSERT_VEC_EQ(d, expected.get(), v2); + + // Iota + const auto vi = Iota(d, T(5)); + for (size_t i = 0; i < N; ++i) { + expected[i] = T(5 + i); + } + HWY_ASSERT_VEC_EQ(d, expected.get(), vi); + + // Undefined + const auto vu = Undefined(d); + Store(vu, d, expected.get()); + } +}; + +HWY_NOINLINE void TestAllSet() { ForAllTypes(ForPartialVectors<TestSet>()); } + +// Ensures wraparound (mod 2^bits) +struct TestOverflow { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v1 = Set(d, T(1)); + const auto vmax = Set(d, LimitsMax<T>()); + const auto vmin = Set(d, LimitsMin<T>()); + // Unsigned underflow / negative -> positive + HWY_ASSERT_VEC_EQ(d, vmax, Sub(vmin, v1)); + // Unsigned overflow / positive -> negative + HWY_ASSERT_VEC_EQ(d, vmin, Add(vmax, v1)); + } +}; + +HWY_NOINLINE void TestAllOverflow() { + ForIntegerTypes(ForPartialVectors<TestOverflow>()); +} + +struct TestClamp { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v0 = Zero(d); + const auto v1 = Set(d, 1); + const auto v2 = Set(d, 2); + + HWY_ASSERT_VEC_EQ(d, v1, Clamp(v2, v0, v1)); + HWY_ASSERT_VEC_EQ(d, v1, Clamp(v0, v1, v2)); + } +}; + +HWY_NOINLINE void TestAllClamp() { + ForAllTypes(ForPartialVectors<TestClamp>()); +} + +struct TestSignBitInteger { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v0 = Zero(d); + const auto all = VecFromMask(d, Eq(v0, v0)); + const auto vs = SignBit(d); + const auto other = Sub(vs, Set(d, 1)); + + // Shifting left by one => overflow, equal zero + HWY_ASSERT_VEC_EQ(d, v0, Add(vs, vs)); + // Verify the lower bits are zero (only +/- and logical ops are available + // for all types) + HWY_ASSERT_VEC_EQ(d, all, Add(vs, other)); + } +}; + +struct TestSignBitFloat { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v0 = Zero(d); + const auto vs = SignBit(d); + const auto vp = Set(d, 2.25); + const auto vn = Set(d, -2.25); + HWY_ASSERT_VEC_EQ(d, Or(vp, vs), vn); + HWY_ASSERT_VEC_EQ(d, AndNot(vs, vn), vp); + HWY_ASSERT_VEC_EQ(d, v0, vs); + } +}; + +HWY_NOINLINE void TestAllSignBit() { + ForIntegerTypes(ForPartialVectors<TestSignBitInteger>()); + ForFloatTypes(ForPartialVectors<TestSignBitFloat>()); +} + +// inline to work around incorrect SVE codegen (only first 128 bits used). +template <class D, class V> +HWY_INLINE void AssertNaN(D d, VecArg<V> v, const char* file, int line) { + using T = TFromD<D>; + const size_t N = Lanes(d); + if (!AllTrue(d, IsNaN(v))) { + Print(d, "not all NaN", v, 0, N); + Print(d, "mask", VecFromMask(d, IsNaN(v)), 0, N); + const std::string type_name = TypeName(T(), N); + // RVV lacks PRIu64 and MSYS still has problems with %zu, so print bytes to + // avoid truncating doubles. + uint8_t bytes[HWY_MAX(sizeof(T), 8)] = {0}; + const T lane = GetLane(v); + CopyBytes<sizeof(T)>(&lane, bytes); + Abort(file, line, + "Expected %s NaN, got %E (bytes %02x %02x %02x %02x %02x %02x %02x " + "%02x)", + type_name.c_str(), lane, bytes[0], bytes[1], bytes[2], bytes[3], + bytes[4], bytes[5], bytes[6], bytes[7]); + } +} + +#define HWY_ASSERT_NAN(d, v) AssertNaN(d, v, __FILE__, __LINE__) + +struct TestNaN { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const Vec<D> v1 = Set(d, static_cast<T>(Unpredictable1())); + const Vec<D> nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1); + HWY_ASSERT_NAN(d, nan); + + // Arithmetic + HWY_ASSERT_NAN(d, Add(nan, v1)); + HWY_ASSERT_NAN(d, Add(v1, nan)); + HWY_ASSERT_NAN(d, Sub(nan, v1)); + HWY_ASSERT_NAN(d, Sub(v1, nan)); + HWY_ASSERT_NAN(d, Mul(nan, v1)); + HWY_ASSERT_NAN(d, Mul(v1, nan)); + HWY_ASSERT_NAN(d, Div(nan, v1)); + HWY_ASSERT_NAN(d, Div(v1, nan)); + + // FMA + HWY_ASSERT_NAN(d, MulAdd(nan, v1, v1)); + HWY_ASSERT_NAN(d, MulAdd(v1, nan, v1)); + HWY_ASSERT_NAN(d, MulAdd(v1, v1, nan)); + HWY_ASSERT_NAN(d, MulSub(nan, v1, v1)); + HWY_ASSERT_NAN(d, MulSub(v1, nan, v1)); + HWY_ASSERT_NAN(d, MulSub(v1, v1, nan)); + HWY_ASSERT_NAN(d, NegMulAdd(nan, v1, v1)); + HWY_ASSERT_NAN(d, NegMulAdd(v1, nan, v1)); + HWY_ASSERT_NAN(d, NegMulAdd(v1, v1, nan)); + HWY_ASSERT_NAN(d, NegMulSub(nan, v1, v1)); + HWY_ASSERT_NAN(d, NegMulSub(v1, nan, v1)); + HWY_ASSERT_NAN(d, NegMulSub(v1, v1, nan)); + + // Rcp/Sqrt + HWY_ASSERT_NAN(d, Sqrt(nan)); + + // Sign manipulation + HWY_ASSERT_NAN(d, Abs(nan)); + HWY_ASSERT_NAN(d, Neg(nan)); + HWY_ASSERT_NAN(d, CopySign(nan, v1)); + HWY_ASSERT_NAN(d, CopySignToAbs(nan, v1)); + + // Rounding + HWY_ASSERT_NAN(d, Ceil(nan)); + HWY_ASSERT_NAN(d, Floor(nan)); + HWY_ASSERT_NAN(d, Round(nan)); + HWY_ASSERT_NAN(d, Trunc(nan)); + + // Logical (And/AndNot/Xor will clear NaN!) + HWY_ASSERT_NAN(d, Or(nan, v1)); + + // Comparison + HWY_ASSERT(AllFalse(d, Eq(nan, v1))); + HWY_ASSERT(AllFalse(d, Gt(nan, v1))); + HWY_ASSERT(AllFalse(d, Lt(nan, v1))); + HWY_ASSERT(AllFalse(d, Ge(nan, v1))); + HWY_ASSERT(AllFalse(d, Le(nan, v1))); + + // Reduction + HWY_ASSERT_NAN(d, SumOfLanes(d, nan)); +// TODO(janwas): re-enable after QEMU/Spike are fixed +#if HWY_TARGET != HWY_RVV + HWY_ASSERT_NAN(d, MinOfLanes(d, nan)); + HWY_ASSERT_NAN(d, MaxOfLanes(d, nan)); +#endif + + // Min/Max +#if (HWY_ARCH_X86 || HWY_ARCH_WASM) && (HWY_TARGET < HWY_EMU128) + // Native WASM or x86 SIMD return the second operand if any input is NaN. + HWY_ASSERT_VEC_EQ(d, v1, Min(nan, v1)); + HWY_ASSERT_VEC_EQ(d, v1, Max(nan, v1)); + HWY_ASSERT_NAN(d, Min(v1, nan)); + HWY_ASSERT_NAN(d, Max(v1, nan)); +#elif HWY_TARGET == HWY_NEON && HWY_ARCH_ARM_V7 + // ARMv7 NEON returns NaN if any input is NaN. + HWY_ASSERT_NAN(d, Min(v1, nan)); + HWY_ASSERT_NAN(d, Max(v1, nan)); + HWY_ASSERT_NAN(d, Min(nan, v1)); + HWY_ASSERT_NAN(d, Max(nan, v1)); +#else + // IEEE 754-2019 minimumNumber is defined as the other argument if exactly + // one is NaN, and qNaN if both are. + HWY_ASSERT_VEC_EQ(d, v1, Min(nan, v1)); + HWY_ASSERT_VEC_EQ(d, v1, Max(nan, v1)); + HWY_ASSERT_VEC_EQ(d, v1, Min(v1, nan)); + HWY_ASSERT_VEC_EQ(d, v1, Max(v1, nan)); +#endif + HWY_ASSERT_NAN(d, Min(nan, nan)); + HWY_ASSERT_NAN(d, Max(nan, nan)); + } +}; + +// For functions only available for float32 +struct TestF32NaN { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v1 = Set(d, T(Unpredictable1())); + const auto nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1); + HWY_ASSERT_NAN(d, ApproximateReciprocal(nan)); + HWY_ASSERT_NAN(d, ApproximateReciprocalSqrt(nan)); + HWY_ASSERT_NAN(d, AbsDiff(nan, v1)); + HWY_ASSERT_NAN(d, AbsDiff(v1, nan)); + } +}; + +HWY_NOINLINE void TestAllNaN() { + ForFloatTypes(ForPartialVectors<TestNaN>()); + ForPartialVectors<TestF32NaN>()(float()); +} + +struct TestIsNaN { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v1 = Set(d, T(Unpredictable1())); + const auto inf = IfThenElse(Eq(v1, Set(d, T(1))), Inf(d), v1); + const auto nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1); + const auto neg = Set(d, T{-1}); + HWY_ASSERT_NAN(d, nan); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(inf)); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(CopySign(inf, neg))); + HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsNaN(nan)); + HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsNaN(CopySign(nan, neg))); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(v1)); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(Zero(d))); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(Set(d, hwy::LowestValue<T>()))); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(Set(d, hwy::HighestValue<T>()))); + } +}; + +HWY_NOINLINE void TestAllIsNaN() { + ForFloatTypes(ForPartialVectors<TestIsNaN>()); +} + +struct TestIsInf { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v1 = Set(d, T(Unpredictable1())); + const auto inf = IfThenElse(Eq(v1, Set(d, T(1))), Inf(d), v1); + const auto nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1); + const auto neg = Set(d, T{-1}); + HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsInf(inf)); + HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsInf(CopySign(inf, neg))); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(nan)); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(CopySign(nan, neg))); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(v1)); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(Zero(d))); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(Set(d, hwy::LowestValue<T>()))); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(Set(d, hwy::HighestValue<T>()))); + } +}; + +HWY_NOINLINE void TestAllIsInf() { + ForFloatTypes(ForPartialVectors<TestIsInf>()); +} + +struct TestIsFinite { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v1 = Set(d, T(Unpredictable1())); + const auto inf = IfThenElse(Eq(v1, Set(d, T(1))), Inf(d), v1); + const auto nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1); + const auto neg = Set(d, T{-1}); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(inf)); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(CopySign(inf, neg))); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(nan)); + HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(CopySign(nan, neg))); + HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(v1)); + HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(Zero(d))); + HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(Set(d, hwy::LowestValue<T>()))); + HWY_ASSERT_MASK_EQ(d, MaskTrue(d), + IsFinite(Set(d, hwy::HighestValue<T>()))); + } +}; + +HWY_NOINLINE void TestAllIsFinite() { + ForFloatTypes(ForPartialVectors<TestIsFinite>()); +} + +struct TestCopyAndAssign { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + // copy V + const auto v3 = Iota(d, 3); + auto v3b(v3); + HWY_ASSERT_VEC_EQ(d, v3, v3b); + + // assign V + auto v3c = Undefined(d); + v3c = v3; + HWY_ASSERT_VEC_EQ(d, v3, v3c); + } +}; + +HWY_NOINLINE void TestAllCopyAndAssign() { + ForAllTypes(ForPartialVectors<TestCopyAndAssign>()); +} + +struct TestGetLane { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + HWY_ASSERT_EQ(T(0), GetLane(Zero(d))); + HWY_ASSERT_EQ(T(1), GetLane(Set(d, 1))); + } +}; + +HWY_NOINLINE void TestAllGetLane() { + ForAllTypes(ForPartialVectors<TestGetLane>()); +} + +struct TestDFromV { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v0 = Zero(d); + using D0 = DFromV<decltype(v0)>; // not necessarily same as D + const auto v0b = And(v0, Set(D0(), 1)); // but vectors can interoperate + HWY_ASSERT_VEC_EQ(d, v0, v0b); + } +}; + +HWY_NOINLINE void TestAllDFromV() { + ForAllTypes(ForPartialVectors<TestDFromV>()); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace hwy +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace hwy { +HWY_BEFORE_TEST(HighwayTest); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllCapped); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllMaxLanes); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllSet); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllOverflow); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllClamp); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllSignBit); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllNaN); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllIsNaN); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllIsInf); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllIsFinite); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllCopyAndAssign); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllGetLane); +HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllDFromV); +} // namespace hwy + +#endif |