diff options
Diffstat (limited to 'third_party/highway/hwy/tests/reduction_test.cc')
-rw-r--r-- | third_party/highway/hwy/tests/reduction_test.cc | 261 |
1 files changed, 261 insertions, 0 deletions
diff --git a/third_party/highway/hwy/tests/reduction_test.cc b/third_party/highway/hwy/tests/reduction_test.cc new file mode 100644 index 0000000000..5cc051ef1c --- /dev/null +++ b/third_party/highway/hwy/tests/reduction_test.cc @@ -0,0 +1,261 @@ +// Copyright 2019 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <stddef.h> +#include <stdint.h> + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "tests/reduction_test.cc" +#include "hwy/foreach_target.h" // IWYU pragma: keep +#include "hwy/highway.h" +#include "hwy/tests/test_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace hwy { +namespace HWY_NAMESPACE { + +struct TestSumOfLanes { + template <typename T, size_t N, int P, + hwy::EnableIf<!IsSigned<T>() || ((N & 1) != 0)>* = nullptr> + HWY_NOINLINE void SignedEvenLengthVectorTests(Simd<T, N, P>) { + // do nothing + } + template <typename T, size_t N, int P, + hwy::EnableIf<IsSigned<T>() && ((N & 1) == 0)>* = nullptr> + HWY_NOINLINE void SignedEvenLengthVectorTests(Simd<T, N, P> d) { + const T pairs = static_cast<T>(Lanes(d) / 2); + + // Lanes are the repeated sequence -2, 1, [...]; each pair sums to -1, + // so the eventual total is just -(N/2). + Vec<decltype(d)> v = + InterleaveLower(Set(d, static_cast<T>(-2)), Set(d, T{1})); + HWY_ASSERT_VEC_EQ(d, Set(d, static_cast<T>(-pairs)), SumOfLanes(d, v)); + + // Similar test with a positive result. + v = InterleaveLower(Set(d, static_cast<T>(-2)), Set(d, T{4})); + HWY_ASSERT_VEC_EQ(d, Set(d, static_cast<T>(pairs * 2)), SumOfLanes(d, v)); + } + + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const size_t N = Lanes(d); + auto in_lanes = AllocateAligned<T>(N); + + // Lane i = bit i, higher lanes 0 + double sum = 0.0; + // Avoid setting sign bit and cap at double precision + constexpr size_t kBits = HWY_MIN(sizeof(T) * 8 - 1, 51); + for (size_t i = 0; i < N; ++i) { + in_lanes[i] = i < kBits ? static_cast<T>(1ull << i) : 0; + sum += static_cast<double>(in_lanes[i]); + } + HWY_ASSERT_VEC_EQ(d, Set(d, T(sum)), + SumOfLanes(d, Load(d, in_lanes.get()))); + + // Lane i = i (iota) to include upper lanes + sum = 0.0; + for (size_t i = 0; i < N; ++i) { + sum += static_cast<double>(i); + } + HWY_ASSERT_VEC_EQ(d, Set(d, T(sum)), SumOfLanes(d, Iota(d, 0))); + + // Run more tests only for signed types with even vector lengths. Some of + // this code may not otherwise compile, so put it in a templated function. + SignedEvenLengthVectorTests(d); + } +}; + +HWY_NOINLINE void TestAllSumOfLanes() { + ForUIF3264(ForPartialVectors<TestSumOfLanes>()); + ForUI16(ForPartialVectors<TestSumOfLanes>()); + +#if HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_SSE4 || HWY_TARGET == HWY_SSSE3 + ForUI8(ForGEVectors<64, TestSumOfLanes>()); +#endif +} + +struct TestMinOfLanes { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const size_t N = Lanes(d); + auto in_lanes = AllocateAligned<T>(N); + + // Lane i = bit i, higher lanes = 2 (not the minimum) + T min = HighestValue<T>(); + // Avoid setting sign bit and cap at double precision + constexpr size_t kBits = HWY_MIN(sizeof(T) * 8 - 1, 51); + for (size_t i = 0; i < N; ++i) { + in_lanes[i] = i < kBits ? static_cast<T>(1ull << i) : 2; + min = HWY_MIN(min, in_lanes[i]); + } + HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(d, Load(d, in_lanes.get()))); + + // Lane i = N - i to include upper lanes + min = HighestValue<T>(); + for (size_t i = 0; i < N; ++i) { + in_lanes[i] = static_cast<T>(N - i); // no 8-bit T so no wraparound + min = HWY_MIN(min, in_lanes[i]); + } + HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(d, Load(d, in_lanes.get()))); + + // Bug #910: also check negative values + min = HighestValue<T>(); + const T input_copy[] = {static_cast<T>(-1), + static_cast<T>(-2), + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14}; + size_t i = 0; + for (; i < HWY_MIN(N, sizeof(input_copy) / sizeof(T)); ++i) { + in_lanes[i] = input_copy[i]; + min = HWY_MIN(min, input_copy[i]); + } + // Pad with neutral element to full vector (so we can load) + for (; i < N; ++i) { + in_lanes[i] = min; + } + HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(d, Load(d, in_lanes.get()))); + } +}; + +struct TestMaxOfLanes { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const size_t N = Lanes(d); + auto in_lanes = AllocateAligned<T>(N); + + T max = LowestValue<T>(); + // Avoid setting sign bit and cap at double precision + constexpr size_t kBits = HWY_MIN(sizeof(T) * 8 - 1, 51); + for (size_t i = 0; i < N; ++i) { + in_lanes[i] = i < kBits ? static_cast<T>(1ull << i) : 0; + max = HWY_MAX(max, in_lanes[i]); + } + HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(d, Load(d, in_lanes.get()))); + + // Lane i = i to include upper lanes + max = LowestValue<T>(); + for (size_t i = 0; i < N; ++i) { + in_lanes[i] = static_cast<T>(i); // no 8-bit T so no wraparound + max = HWY_MAX(max, in_lanes[i]); + } + HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(d, Load(d, in_lanes.get()))); + + // Bug #910: also check negative values + max = LowestValue<T>(); + const T input_copy[] = {static_cast<T>(-1), + static_cast<T>(-2), + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14}; + size_t i = 0; + for (; i < HWY_MIN(N, sizeof(input_copy) / sizeof(T)); ++i) { + in_lanes[i] = input_copy[i]; + max = HWY_MAX(max, in_lanes[i]); + } + // Pad with neutral element to full vector (so we can load) + for (; i < N; ++i) { + in_lanes[i] = max; + } + HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(d, Load(d, in_lanes.get()))); + } +}; + +HWY_NOINLINE void TestAllMinMaxOfLanes() { + const ForPartialVectors<TestMinOfLanes> test_min; + const ForPartialVectors<TestMaxOfLanes> test_max; + ForUIF3264(test_min); + ForUIF3264(test_max); + ForUI16(test_min); + ForUI16(test_max); + +#if HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_SSE4 || HWY_TARGET == HWY_SSSE3 + ForUI8(ForGEVectors<64, TestMinOfLanes>()); + ForUI8(ForGEVectors<64, TestMaxOfLanes>()); +#endif +} + +struct TestSumsOf8 { + template <typename T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + RandomState rng; + + const size_t N = Lanes(d); + if (N < 8) return; + const Repartition<uint64_t, D> du64; + + auto in_lanes = AllocateAligned<T>(N); + auto sum_lanes = AllocateAligned<uint64_t>(N / 8); + + for (size_t rep = 0; rep < 100; ++rep) { + for (size_t i = 0; i < N; ++i) { + in_lanes[i] = Random64(&rng) & 0xFF; + } + + for (size_t idx_sum = 0; idx_sum < N / 8; ++idx_sum) { + uint64_t sum = 0; + for (size_t i = 0; i < 8; ++i) { + sum += in_lanes[idx_sum * 8 + i]; + } + sum_lanes[idx_sum] = sum; + } + + const Vec<D> in = Load(d, in_lanes.get()); + HWY_ASSERT_VEC_EQ(du64, sum_lanes.get(), SumsOf8(in)); + } + } +}; + +HWY_NOINLINE void TestAllSumsOf8() { + ForGEVectors<64, TestSumsOf8>()(uint8_t()); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace hwy +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace hwy { +HWY_BEFORE_TEST(HwyReductionTest); +HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumOfLanes); +HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllMinMaxOfLanes); +HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumsOf8); +} // namespace hwy + +#endif |