// Copyright 2019 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include // std::fill #include #include "hwy/base.h" #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "highway_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/nanobenchmark.h" // Unpredictable1 #include "hwy/tests/test_util-inl.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { template HWY_NOINLINE void TestCappedLimit(T /* tag */) { CappedTag d; // Ensure two ops compile HWY_ASSERT_VEC_EQ(d, Zero(d), Set(d, T{0})); // Ensure we do not write more than kLimit lanes const size_t N = Lanes(d); if (kLimit < N) { auto lanes = AllocateAligned(N); std::fill(lanes.get(), lanes.get() + N, T{0}); Store(Set(d, T{1}), d, lanes.get()); for (size_t i = kLimit; i < N; ++i) { HWY_ASSERT_EQ(lanes[i], T{0}); } } } // Adapter for ForAllTypes - we are constructing our own Simd<> and thus do not // use ForPartialVectors etc. struct TestCapped { template void operator()(T t) const { TestCappedLimit<1>(t); TestCappedLimit<3>(t); TestCappedLimit<5>(t); TestCappedLimit<1ull << 15>(t); } }; HWY_NOINLINE void TestAllCapped() { ForAllTypes(TestCapped()); } // For testing that ForPartialVectors reaches every possible size: using NumLanesSet = std::bitset; // Monostate pattern because ForPartialVectors takes a template argument, not a // functor by reference. static NumLanesSet* NumLanesForSize(size_t sizeof_t) { HWY_ASSERT(sizeof_t <= sizeof(uint64_t)); static NumLanesSet num_lanes[sizeof(uint64_t) + 1]; return num_lanes + sizeof_t; } static size_t* MaxLanesForSize(size_t sizeof_t) { HWY_ASSERT(sizeof_t <= sizeof(uint64_t)); static size_t num_lanes[sizeof(uint64_t) + 1] = {0}; return num_lanes + sizeof_t; } struct TestMaxLanes { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const size_t N = Lanes(d); const size_t kMax = MaxLanes(d); // for RVV, includes LMUL HWY_ASSERT(N <= kMax); HWY_ASSERT(kMax <= (HWY_MAX_BYTES / sizeof(T))); NumLanesForSize(sizeof(T))->set(N); *MaxLanesForSize(sizeof(T)) = HWY_MAX(*MaxLanesForSize(sizeof(T)), N); } }; HWY_NOINLINE void TestAllMaxLanes() { ForAllTypes(ForPartialVectors()); // Ensure ForPartialVectors visited all powers of two [1, N]. for (size_t sizeof_t : {sizeof(uint8_t), sizeof(uint16_t), sizeof(uint32_t), sizeof(uint64_t)}) { const size_t N = *MaxLanesForSize(sizeof_t); for (size_t i = 1; i <= N; i += i) { if (!NumLanesForSize(sizeof_t)->test(i)) { fprintf(stderr, "T=%d: did not visit for N=%d, max=%d\n", static_cast(sizeof_t), static_cast(i), static_cast(N)); HWY_ASSERT(false); } } } } struct TestSet { template HWY_NOINLINE void operator()(T /*unused*/, D d) { // Zero const auto v0 = Zero(d); const size_t N = Lanes(d); auto expected = AllocateAligned(N); std::fill(expected.get(), expected.get() + N, T(0)); HWY_ASSERT_VEC_EQ(d, expected.get(), v0); // Set const auto v2 = Set(d, T(2)); for (size_t i = 0; i < N; ++i) { expected[i] = 2; } HWY_ASSERT_VEC_EQ(d, expected.get(), v2); // Iota const auto vi = Iota(d, T(5)); for (size_t i = 0; i < N; ++i) { expected[i] = T(5 + i); } HWY_ASSERT_VEC_EQ(d, expected.get(), vi); // Undefined const auto vu = Undefined(d); Store(vu, d, expected.get()); } }; HWY_NOINLINE void TestAllSet() { ForAllTypes(ForPartialVectors()); } // Ensures wraparound (mod 2^bits) struct TestOverflow { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v1 = Set(d, T(1)); const auto vmax = Set(d, LimitsMax()); const auto vmin = Set(d, LimitsMin()); // Unsigned underflow / negative -> positive HWY_ASSERT_VEC_EQ(d, vmax, Sub(vmin, v1)); // Unsigned overflow / positive -> negative HWY_ASSERT_VEC_EQ(d, vmin, Add(vmax, v1)); } }; HWY_NOINLINE void TestAllOverflow() { ForIntegerTypes(ForPartialVectors()); } struct TestClamp { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v0 = Zero(d); const auto v1 = Set(d, 1); const auto v2 = Set(d, 2); HWY_ASSERT_VEC_EQ(d, v1, Clamp(v2, v0, v1)); HWY_ASSERT_VEC_EQ(d, v1, Clamp(v0, v1, v2)); } }; HWY_NOINLINE void TestAllClamp() { ForAllTypes(ForPartialVectors()); } struct TestSignBitInteger { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v0 = Zero(d); const auto all = VecFromMask(d, Eq(v0, v0)); const auto vs = SignBit(d); const auto other = Sub(vs, Set(d, 1)); // Shifting left by one => overflow, equal zero HWY_ASSERT_VEC_EQ(d, v0, Add(vs, vs)); // Verify the lower bits are zero (only +/- and logical ops are available // for all types) HWY_ASSERT_VEC_EQ(d, all, Add(vs, other)); } }; struct TestSignBitFloat { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v0 = Zero(d); const auto vs = SignBit(d); const auto vp = Set(d, 2.25); const auto vn = Set(d, -2.25); HWY_ASSERT_VEC_EQ(d, Or(vp, vs), vn); HWY_ASSERT_VEC_EQ(d, AndNot(vs, vn), vp); HWY_ASSERT_VEC_EQ(d, v0, vs); } }; HWY_NOINLINE void TestAllSignBit() { ForIntegerTypes(ForPartialVectors()); ForFloatTypes(ForPartialVectors()); } // inline to work around incorrect SVE codegen (only first 128 bits used). template HWY_INLINE void AssertNaN(D d, VecArg v, const char* file, int line) { using T = TFromD; const size_t N = Lanes(d); if (!AllTrue(d, IsNaN(v))) { Print(d, "not all NaN", v, 0, N); Print(d, "mask", VecFromMask(d, IsNaN(v)), 0, N); const std::string type_name = TypeName(T(), N); // RVV lacks PRIu64 and MSYS still has problems with %zu, so print bytes to // avoid truncating doubles. uint8_t bytes[HWY_MAX(sizeof(T), 8)] = {0}; const T lane = GetLane(v); CopyBytes(&lane, bytes); Abort(file, line, "Expected %s NaN, got %E (bytes %02x %02x %02x %02x %02x %02x %02x " "%02x)", type_name.c_str(), lane, bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7]); } } #define HWY_ASSERT_NAN(d, v) AssertNaN(d, v, __FILE__, __LINE__) struct TestNaN { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const Vec v1 = Set(d, static_cast(Unpredictable1())); const Vec nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1); HWY_ASSERT_NAN(d, nan); // Arithmetic HWY_ASSERT_NAN(d, Add(nan, v1)); HWY_ASSERT_NAN(d, Add(v1, nan)); HWY_ASSERT_NAN(d, Sub(nan, v1)); HWY_ASSERT_NAN(d, Sub(v1, nan)); HWY_ASSERT_NAN(d, Mul(nan, v1)); HWY_ASSERT_NAN(d, Mul(v1, nan)); HWY_ASSERT_NAN(d, Div(nan, v1)); HWY_ASSERT_NAN(d, Div(v1, nan)); // FMA HWY_ASSERT_NAN(d, MulAdd(nan, v1, v1)); HWY_ASSERT_NAN(d, MulAdd(v1, nan, v1)); HWY_ASSERT_NAN(d, MulAdd(v1, v1, nan)); HWY_ASSERT_NAN(d, MulSub(nan, v1, v1)); HWY_ASSERT_NAN(d, MulSub(v1, nan, v1)); HWY_ASSERT_NAN(d, MulSub(v1, v1, nan)); HWY_ASSERT_NAN(d, NegMulAdd(nan, v1, v1)); HWY_ASSERT_NAN(d, NegMulAdd(v1, nan, v1)); HWY_ASSERT_NAN(d, NegMulAdd(v1, v1, nan)); HWY_ASSERT_NAN(d, NegMulSub(nan, v1, v1)); HWY_ASSERT_NAN(d, NegMulSub(v1, nan, v1)); HWY_ASSERT_NAN(d, NegMulSub(v1, v1, nan)); // Rcp/Sqrt HWY_ASSERT_NAN(d, Sqrt(nan)); // Sign manipulation HWY_ASSERT_NAN(d, Abs(nan)); HWY_ASSERT_NAN(d, Neg(nan)); HWY_ASSERT_NAN(d, CopySign(nan, v1)); HWY_ASSERT_NAN(d, CopySignToAbs(nan, v1)); // Rounding HWY_ASSERT_NAN(d, Ceil(nan)); HWY_ASSERT_NAN(d, Floor(nan)); HWY_ASSERT_NAN(d, Round(nan)); HWY_ASSERT_NAN(d, Trunc(nan)); // Logical (And/AndNot/Xor will clear NaN!) HWY_ASSERT_NAN(d, Or(nan, v1)); // Comparison HWY_ASSERT(AllFalse(d, Eq(nan, v1))); HWY_ASSERT(AllFalse(d, Gt(nan, v1))); HWY_ASSERT(AllFalse(d, Lt(nan, v1))); HWY_ASSERT(AllFalse(d, Ge(nan, v1))); HWY_ASSERT(AllFalse(d, Le(nan, v1))); // Reduction HWY_ASSERT_NAN(d, SumOfLanes(d, nan)); // TODO(janwas): re-enable after QEMU/Spike are fixed #if HWY_TARGET != HWY_RVV HWY_ASSERT_NAN(d, MinOfLanes(d, nan)); HWY_ASSERT_NAN(d, MaxOfLanes(d, nan)); #endif // Min/Max #if (HWY_ARCH_X86 || HWY_ARCH_WASM) && (HWY_TARGET < HWY_EMU128) // Native WASM or x86 SIMD return the second operand if any input is NaN. HWY_ASSERT_VEC_EQ(d, v1, Min(nan, v1)); HWY_ASSERT_VEC_EQ(d, v1, Max(nan, v1)); HWY_ASSERT_NAN(d, Min(v1, nan)); HWY_ASSERT_NAN(d, Max(v1, nan)); #elif HWY_TARGET == HWY_NEON && HWY_ARCH_ARM_V7 // ARMv7 NEON returns NaN if any input is NaN. HWY_ASSERT_NAN(d, Min(v1, nan)); HWY_ASSERT_NAN(d, Max(v1, nan)); HWY_ASSERT_NAN(d, Min(nan, v1)); HWY_ASSERT_NAN(d, Max(nan, v1)); #else // IEEE 754-2019 minimumNumber is defined as the other argument if exactly // one is NaN, and qNaN if both are. HWY_ASSERT_VEC_EQ(d, v1, Min(nan, v1)); HWY_ASSERT_VEC_EQ(d, v1, Max(nan, v1)); HWY_ASSERT_VEC_EQ(d, v1, Min(v1, nan)); HWY_ASSERT_VEC_EQ(d, v1, Max(v1, nan)); #endif HWY_ASSERT_NAN(d, Min(nan, nan)); HWY_ASSERT_NAN(d, Max(nan, nan)); } }; // For functions only available for float32 struct TestF32NaN { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v1 = Set(d, T(Unpredictable1())); const auto nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1); HWY_ASSERT_NAN(d, ApproximateReciprocal(nan)); HWY_ASSERT_NAN(d, ApproximateReciprocalSqrt(nan)); HWY_ASSERT_NAN(d, AbsDiff(nan, v1)); HWY_ASSERT_NAN(d, AbsDiff(v1, nan)); } }; HWY_NOINLINE void TestAllNaN() { ForFloatTypes(ForPartialVectors()); ForPartialVectors()(float()); } struct TestIsNaN { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v1 = Set(d, T(Unpredictable1())); const auto inf = IfThenElse(Eq(v1, Set(d, T(1))), Inf(d), v1); const auto nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1); const auto neg = Set(d, T{-1}); HWY_ASSERT_NAN(d, nan); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(inf)); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(CopySign(inf, neg))); HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsNaN(nan)); HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsNaN(CopySign(nan, neg))); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(v1)); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(Zero(d))); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(Set(d, hwy::LowestValue()))); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsNaN(Set(d, hwy::HighestValue()))); } }; HWY_NOINLINE void TestAllIsNaN() { ForFloatTypes(ForPartialVectors()); } struct TestIsInf { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v1 = Set(d, T(Unpredictable1())); const auto inf = IfThenElse(Eq(v1, Set(d, T(1))), Inf(d), v1); const auto nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1); const auto neg = Set(d, T{-1}); HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsInf(inf)); HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsInf(CopySign(inf, neg))); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(nan)); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(CopySign(nan, neg))); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(v1)); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(Zero(d))); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(Set(d, hwy::LowestValue()))); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsInf(Set(d, hwy::HighestValue()))); } }; HWY_NOINLINE void TestAllIsInf() { ForFloatTypes(ForPartialVectors()); } struct TestIsFinite { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v1 = Set(d, T(Unpredictable1())); const auto inf = IfThenElse(Eq(v1, Set(d, T(1))), Inf(d), v1); const auto nan = IfThenElse(Eq(v1, Set(d, T(1))), NaN(d), v1); const auto neg = Set(d, T{-1}); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(inf)); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(CopySign(inf, neg))); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(nan)); HWY_ASSERT_MASK_EQ(d, MaskFalse(d), IsFinite(CopySign(nan, neg))); HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(v1)); HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(Zero(d))); HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(Set(d, hwy::LowestValue()))); HWY_ASSERT_MASK_EQ(d, MaskTrue(d), IsFinite(Set(d, hwy::HighestValue()))); } }; HWY_NOINLINE void TestAllIsFinite() { ForFloatTypes(ForPartialVectors()); } struct TestCopyAndAssign { template HWY_NOINLINE void operator()(T /*unused*/, D d) { // copy V const auto v3 = Iota(d, 3); auto v3b(v3); HWY_ASSERT_VEC_EQ(d, v3, v3b); // assign V auto v3c = Undefined(d); v3c = v3; HWY_ASSERT_VEC_EQ(d, v3, v3c); } }; HWY_NOINLINE void TestAllCopyAndAssign() { ForAllTypes(ForPartialVectors()); } struct TestGetLane { template HWY_NOINLINE void operator()(T /*unused*/, D d) { HWY_ASSERT_EQ(T(0), GetLane(Zero(d))); HWY_ASSERT_EQ(T(1), GetLane(Set(d, 1))); } }; HWY_NOINLINE void TestAllGetLane() { ForAllTypes(ForPartialVectors()); } struct TestDFromV { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v0 = Zero(d); using D0 = DFromV; // not necessarily same as D const auto v0b = And(v0, Set(D0(), 1)); // but vectors can interoperate HWY_ASSERT_VEC_EQ(d, v0, v0b); } }; HWY_NOINLINE void TestAllDFromV() { ForAllTypes(ForPartialVectors()); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace hwy { HWY_BEFORE_TEST(HighwayTest); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllCapped); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllMaxLanes); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllSet); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllOverflow); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllClamp); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllSignBit); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllNaN); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllIsNaN); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllIsInf); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllIsFinite); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllCopyAndAssign); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllGetLane); HWY_EXPORT_AND_TEST_P(HighwayTest, TestAllDFromV); } // namespace hwy #endif