// Copyright 2019 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Tests some ops specific to floating-point types (Div, Round etc.) #include #include #include // std::copy, std::fill #include #include // std::abs, std::isnan, std::isinf, std::ceil, std::floor #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "tests/float_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/tests/test_util-inl.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { struct TestDiv { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v = Iota(d, T(-2)); const auto v1 = Set(d, T(1)); // Unchanged after division by 1. HWY_ASSERT_VEC_EQ(d, v, Div(v, v1)); const size_t N = Lanes(d); auto expected = AllocateAligned(N); for (size_t i = 0; i < N; ++i) { expected[i] = (T(i) - 2) / T(2); } HWY_ASSERT_VEC_EQ(d, expected.get(), Div(v, Set(d, T(2)))); } }; HWY_NOINLINE void TestAllDiv() { ForFloatTypes(ForPartialVectors()); } struct TestApproximateReciprocal { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v = Iota(d, T(-2)); const auto nonzero = IfThenElse(Eq(v, Zero(d)), Set(d, T(1)), v); const size_t N = Lanes(d); auto input = AllocateAligned(N); Store(nonzero, d, input.get()); auto actual = AllocateAligned(N); Store(ApproximateReciprocal(nonzero), d, actual.get()); double max_l1 = 0.0; double worst_expected = 0.0; double worst_actual = 0.0; for (size_t i = 0; i < N; ++i) { const double expected = 1.0 / input[i]; const double l1 = std::abs(expected - actual[i]); if (l1 > max_l1) { max_l1 = l1; worst_expected = expected; worst_actual = actual[i]; } } const double abs_worst_expected = std::abs(worst_expected); if (abs_worst_expected > 1E-5) { const double max_rel = max_l1 / abs_worst_expected; fprintf(stderr, "max l1 %f rel %f (%f vs %f)\n", max_l1, max_rel, worst_expected, worst_actual); HWY_ASSERT(max_rel < 0.004); } } }; HWY_NOINLINE void TestAllApproximateReciprocal() { ForPartialVectors()(float()); } struct TestSquareRoot { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto vi = Iota(d, 0); HWY_ASSERT_VEC_EQ(d, vi, Sqrt(Mul(vi, vi))); } }; HWY_NOINLINE void TestAllSquareRoot() { ForFloatTypes(ForPartialVectors()); } struct TestReciprocalSquareRoot { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const auto v = Set(d, 123.0f); const size_t N = Lanes(d); auto lanes = AllocateAligned(N); Store(ApproximateReciprocalSqrt(v), d, lanes.get()); for (size_t i = 0; i < N; ++i) { float err = lanes[i] - 0.090166f; if (err < 0.0f) err = -err; if (err >= 4E-4f) { HWY_ABORT("Lane %d (%d): actual %f err %f\n", static_cast(i), static_cast(N), lanes[i], err); } } } }; HWY_NOINLINE void TestAllReciprocalSquareRoot() { ForPartialVectors()(float()); } template AlignedFreeUniquePtr RoundTestCases(T /*unused*/, D d, size_t& padded) { const T eps = std::numeric_limits::epsilon(); const T test_cases[] = { // +/- 1 T(1), T(-1), // +/- 0 T(0), T(-0), // near 0 T(0.4), T(-0.4), // +/- integer T(4), T(-32), // positive near limit MantissaEnd() - T(1.5), MantissaEnd() + T(1.5), // negative near limit -MantissaEnd() - T(1.5), -MantissaEnd() + T(1.5), // positive tiebreak T(1.5), T(2.5), // negative tiebreak T(-1.5), T(-2.5), // positive +/- delta T(2.0001), T(3.9999), // negative +/- delta T(-999.9999), T(-998.0001), // positive +/- epsilon T(1) + eps, T(1) - eps, // negative +/- epsilon T(-1) + eps, T(-1) - eps, // +/- huge (but still fits in float) T(1E34), T(-1E35), // +/- infinity std::numeric_limits::infinity(), -std::numeric_limits::infinity(), // qNaN GetLane(NaN(d)) }; const size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]); const size_t N = Lanes(d); padded = RoundUpTo(kNumTestCases, N); // allow loading whole vectors auto in = AllocateAligned(padded); auto expected = AllocateAligned(padded); std::copy(test_cases, test_cases + kNumTestCases, in.get()); std::fill(in.get() + kNumTestCases, in.get() + padded, T(0)); return in; } struct TestRound { template HWY_NOINLINE void operator()(T t, D d) { size_t padded; auto in = RoundTestCases(t, d, padded); auto expected = AllocateAligned(padded); for (size_t i = 0; i < padded; ++i) { // Avoid [std::]round, which does not round to nearest *even*. // NOTE: std:: version from C++11 cmath is not defined in RVV GCC, see // https://lists.freebsd.org/pipermail/freebsd-current/2014-January/048130.html expected[i] = static_cast(nearbyint(in[i])); } for (size_t i = 0; i < padded; i += Lanes(d)) { HWY_ASSERT_VEC_EQ(d, &expected[i], Round(Load(d, &in[i]))); } } }; HWY_NOINLINE void TestAllRound() { ForFloatTypes(ForPartialVectors()); } struct TestNearestInt { template HWY_NOINLINE void operator()(TF tf, const DF df) { using TI = MakeSigned; const RebindToSigned di; size_t padded; auto in = RoundTestCases(tf, df, padded); auto expected = AllocateAligned(padded); constexpr double max = static_cast(LimitsMax()); for (size_t i = 0; i < padded; ++i) { if (std::isnan(in[i])) { // We replace NaN with 0 below (no_nan) expected[i] = 0; } else if (std::isinf(in[i]) || double{std::abs(in[i])} >= max) { // Avoid undefined result for lrintf expected[i] = std::signbit(in[i]) ? LimitsMin() : LimitsMax(); } else { expected[i] = static_cast(lrintf(in[i])); } } for (size_t i = 0; i < padded; i += Lanes(df)) { const auto v = Load(df, &in[i]); const auto no_nan = IfThenElse(Eq(v, v), v, Zero(df)); HWY_ASSERT_VEC_EQ(di, &expected[i], NearestInt(no_nan)); } } }; HWY_NOINLINE void TestAllNearestInt() { ForPartialVectors()(float()); } struct TestTrunc { template HWY_NOINLINE void operator()(T t, D d) { size_t padded; auto in = RoundTestCases(t, d, padded); auto expected = AllocateAligned(padded); for (size_t i = 0; i < padded; ++i) { // NOTE: std:: version from C++11 cmath is not defined in RVV GCC, see // https://lists.freebsd.org/pipermail/freebsd-current/2014-January/048130.html expected[i] = static_cast(trunc(in[i])); } for (size_t i = 0; i < padded; i += Lanes(d)) { HWY_ASSERT_VEC_EQ(d, &expected[i], Trunc(Load(d, &in[i]))); } } }; HWY_NOINLINE void TestAllTrunc() { ForFloatTypes(ForPartialVectors()); } struct TestCeil { template HWY_NOINLINE void operator()(T t, D d) { size_t padded; auto in = RoundTestCases(t, d, padded); auto expected = AllocateAligned(padded); for (size_t i = 0; i < padded; ++i) { expected[i] = std::ceil(in[i]); } for (size_t i = 0; i < padded; i += Lanes(d)) { HWY_ASSERT_VEC_EQ(d, &expected[i], Ceil(Load(d, &in[i]))); } } }; HWY_NOINLINE void TestAllCeil() { ForFloatTypes(ForPartialVectors()); } struct TestFloor { template HWY_NOINLINE void operator()(T t, D d) { size_t padded; auto in = RoundTestCases(t, d, padded); auto expected = AllocateAligned(padded); for (size_t i = 0; i < padded; ++i) { expected[i] = std::floor(in[i]); } for (size_t i = 0; i < padded; i += Lanes(d)) { HWY_ASSERT_VEC_EQ(d, &expected[i], Floor(Load(d, &in[i]))); } } }; HWY_NOINLINE void TestAllFloor() { ForFloatTypes(ForPartialVectors()); } struct TestAbsDiff { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const size_t N = Lanes(d); auto in_lanes_a = AllocateAligned(N); auto in_lanes_b = AllocateAligned(N); auto out_lanes = AllocateAligned(N); for (size_t i = 0; i < N; ++i) { in_lanes_a[i] = static_cast((i ^ 1u) << i); in_lanes_b[i] = static_cast(i << i); out_lanes[i] = std::abs(in_lanes_a[i] - in_lanes_b[i]); } const auto a = Load(d, in_lanes_a.get()); const auto b = Load(d, in_lanes_b.get()); const auto expected = Load(d, out_lanes.get()); HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(a, b)); HWY_ASSERT_VEC_EQ(d, expected, AbsDiff(b, a)); } }; HWY_NOINLINE void TestAllAbsDiff() { ForPartialVectors()(float()); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace hwy { HWY_BEFORE_TEST(HwyFloatTest); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllDiv); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllApproximateReciprocal); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllSquareRoot); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllReciprocalSquareRoot); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllRound); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllNearestInt); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllTrunc); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllCeil); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllFloor); HWY_EXPORT_AND_TEST_P(HwyFloatTest, TestAllAbsDiff); } // namespace hwy #endif