summaryrefslogtreecommitdiffstats
path: root/third_party/highway/hwy/tests/reduction_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/highway/hwy/tests/reduction_test.cc')
-rw-r--r--third_party/highway/hwy/tests/reduction_test.cc261
1 files changed, 261 insertions, 0 deletions
diff --git a/third_party/highway/hwy/tests/reduction_test.cc b/third_party/highway/hwy/tests/reduction_test.cc
new file mode 100644
index 0000000000..5cc051ef1c
--- /dev/null
+++ b/third_party/highway/hwy/tests/reduction_test.cc
@@ -0,0 +1,261 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "tests/reduction_test.cc"
+#include "hwy/foreach_target.h" // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+struct TestSumOfLanes {
+ template <typename T, size_t N, int P,
+ hwy::EnableIf<!IsSigned<T>() || ((N & 1) != 0)>* = nullptr>
+ HWY_NOINLINE void SignedEvenLengthVectorTests(Simd<T, N, P>) {
+ // do nothing
+ }
+ template <typename T, size_t N, int P,
+ hwy::EnableIf<IsSigned<T>() && ((N & 1) == 0)>* = nullptr>
+ HWY_NOINLINE void SignedEvenLengthVectorTests(Simd<T, N, P> d) {
+ const T pairs = static_cast<T>(Lanes(d) / 2);
+
+ // Lanes are the repeated sequence -2, 1, [...]; each pair sums to -1,
+ // so the eventual total is just -(N/2).
+ Vec<decltype(d)> v =
+ InterleaveLower(Set(d, static_cast<T>(-2)), Set(d, T{1}));
+ HWY_ASSERT_VEC_EQ(d, Set(d, static_cast<T>(-pairs)), SumOfLanes(d, v));
+
+ // Similar test with a positive result.
+ v = InterleaveLower(Set(d, static_cast<T>(-2)), Set(d, T{4}));
+ HWY_ASSERT_VEC_EQ(d, Set(d, static_cast<T>(pairs * 2)), SumOfLanes(d, v));
+ }
+
+ template <typename T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ const size_t N = Lanes(d);
+ auto in_lanes = AllocateAligned<T>(N);
+
+ // Lane i = bit i, higher lanes 0
+ double sum = 0.0;
+ // Avoid setting sign bit and cap at double precision
+ constexpr size_t kBits = HWY_MIN(sizeof(T) * 8 - 1, 51);
+ for (size_t i = 0; i < N; ++i) {
+ in_lanes[i] = i < kBits ? static_cast<T>(1ull << i) : 0;
+ sum += static_cast<double>(in_lanes[i]);
+ }
+ HWY_ASSERT_VEC_EQ(d, Set(d, T(sum)),
+ SumOfLanes(d, Load(d, in_lanes.get())));
+
+ // Lane i = i (iota) to include upper lanes
+ sum = 0.0;
+ for (size_t i = 0; i < N; ++i) {
+ sum += static_cast<double>(i);
+ }
+ HWY_ASSERT_VEC_EQ(d, Set(d, T(sum)), SumOfLanes(d, Iota(d, 0)));
+
+ // Run more tests only for signed types with even vector lengths. Some of
+ // this code may not otherwise compile, so put it in a templated function.
+ SignedEvenLengthVectorTests(d);
+ }
+};
+
+HWY_NOINLINE void TestAllSumOfLanes() {
+ ForUIF3264(ForPartialVectors<TestSumOfLanes>());
+ ForUI16(ForPartialVectors<TestSumOfLanes>());
+
+#if HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_SSE4 || HWY_TARGET == HWY_SSSE3
+ ForUI8(ForGEVectors<64, TestSumOfLanes>());
+#endif
+}
+
+struct TestMinOfLanes {
+ template <typename T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ const size_t N = Lanes(d);
+ auto in_lanes = AllocateAligned<T>(N);
+
+ // Lane i = bit i, higher lanes = 2 (not the minimum)
+ T min = HighestValue<T>();
+ // Avoid setting sign bit and cap at double precision
+ constexpr size_t kBits = HWY_MIN(sizeof(T) * 8 - 1, 51);
+ for (size_t i = 0; i < N; ++i) {
+ in_lanes[i] = i < kBits ? static_cast<T>(1ull << i) : 2;
+ min = HWY_MIN(min, in_lanes[i]);
+ }
+ HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(d, Load(d, in_lanes.get())));
+
+ // Lane i = N - i to include upper lanes
+ min = HighestValue<T>();
+ for (size_t i = 0; i < N; ++i) {
+ in_lanes[i] = static_cast<T>(N - i); // no 8-bit T so no wraparound
+ min = HWY_MIN(min, in_lanes[i]);
+ }
+ HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(d, Load(d, in_lanes.get())));
+
+ // Bug #910: also check negative values
+ min = HighestValue<T>();
+ const T input_copy[] = {static_cast<T>(-1),
+ static_cast<T>(-2),
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14};
+ size_t i = 0;
+ for (; i < HWY_MIN(N, sizeof(input_copy) / sizeof(T)); ++i) {
+ in_lanes[i] = input_copy[i];
+ min = HWY_MIN(min, input_copy[i]);
+ }
+ // Pad with neutral element to full vector (so we can load)
+ for (; i < N; ++i) {
+ in_lanes[i] = min;
+ }
+ HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(d, Load(d, in_lanes.get())));
+ }
+};
+
+struct TestMaxOfLanes {
+ template <typename T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ const size_t N = Lanes(d);
+ auto in_lanes = AllocateAligned<T>(N);
+
+ T max = LowestValue<T>();
+ // Avoid setting sign bit and cap at double precision
+ constexpr size_t kBits = HWY_MIN(sizeof(T) * 8 - 1, 51);
+ for (size_t i = 0; i < N; ++i) {
+ in_lanes[i] = i < kBits ? static_cast<T>(1ull << i) : 0;
+ max = HWY_MAX(max, in_lanes[i]);
+ }
+ HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(d, Load(d, in_lanes.get())));
+
+ // Lane i = i to include upper lanes
+ max = LowestValue<T>();
+ for (size_t i = 0; i < N; ++i) {
+ in_lanes[i] = static_cast<T>(i); // no 8-bit T so no wraparound
+ max = HWY_MAX(max, in_lanes[i]);
+ }
+ HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(d, Load(d, in_lanes.get())));
+
+ // Bug #910: also check negative values
+ max = LowestValue<T>();
+ const T input_copy[] = {static_cast<T>(-1),
+ static_cast<T>(-2),
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 9,
+ 10,
+ 11,
+ 12,
+ 13,
+ 14};
+ size_t i = 0;
+ for (; i < HWY_MIN(N, sizeof(input_copy) / sizeof(T)); ++i) {
+ in_lanes[i] = input_copy[i];
+ max = HWY_MAX(max, in_lanes[i]);
+ }
+ // Pad with neutral element to full vector (so we can load)
+ for (; i < N; ++i) {
+ in_lanes[i] = max;
+ }
+ HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(d, Load(d, in_lanes.get())));
+ }
+};
+
+HWY_NOINLINE void TestAllMinMaxOfLanes() {
+ const ForPartialVectors<TestMinOfLanes> test_min;
+ const ForPartialVectors<TestMaxOfLanes> test_max;
+ ForUIF3264(test_min);
+ ForUIF3264(test_max);
+ ForUI16(test_min);
+ ForUI16(test_max);
+
+#if HWY_TARGET == HWY_NEON || HWY_TARGET == HWY_SSE4 || HWY_TARGET == HWY_SSSE3
+ ForUI8(ForGEVectors<64, TestMinOfLanes>());
+ ForUI8(ForGEVectors<64, TestMaxOfLanes>());
+#endif
+}
+
+struct TestSumsOf8 {
+ template <typename T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ RandomState rng;
+
+ const size_t N = Lanes(d);
+ if (N < 8) return;
+ const Repartition<uint64_t, D> du64;
+
+ auto in_lanes = AllocateAligned<T>(N);
+ auto sum_lanes = AllocateAligned<uint64_t>(N / 8);
+
+ for (size_t rep = 0; rep < 100; ++rep) {
+ for (size_t i = 0; i < N; ++i) {
+ in_lanes[i] = Random64(&rng) & 0xFF;
+ }
+
+ for (size_t idx_sum = 0; idx_sum < N / 8; ++idx_sum) {
+ uint64_t sum = 0;
+ for (size_t i = 0; i < 8; ++i) {
+ sum += in_lanes[idx_sum * 8 + i];
+ }
+ sum_lanes[idx_sum] = sum;
+ }
+
+ const Vec<D> in = Load(d, in_lanes.get());
+ HWY_ASSERT_VEC_EQ(du64, sum_lanes.get(), SumsOf8(in));
+ }
+ }
+};
+
+HWY_NOINLINE void TestAllSumsOf8() {
+ ForGEVectors<64, TestSumsOf8>()(uint8_t());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(HwyReductionTest);
+HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumOfLanes);
+HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllMinMaxOfLanes);
+HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumsOf8);
+} // namespace hwy
+
+#endif