// Copyright 2019 Google LLC // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #undef HWY_TARGET_INCLUDE #define HWY_TARGET_INCLUDE "tests/reduction_test.cc" #include "hwy/foreach_target.h" // IWYU pragma: keep #include "hwy/highway.h" #include "hwy/tests/test_util-inl.h" HWY_BEFORE_NAMESPACE(); namespace hwy { namespace HWY_NAMESPACE { struct TestSumOfLanes { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const size_t N = Lanes(d); auto in_lanes = AllocateAligned(N); // Lane i = bit i, higher lanes 0 double sum = 0.0; // Avoid setting sign bit and cap at double precision constexpr size_t kBits = HWY_MIN(sizeof(T) * 8 - 1, 51); for (size_t i = 0; i < N; ++i) { in_lanes[i] = i < kBits ? static_cast(1ull << i) : 0; sum += static_cast(in_lanes[i]); } HWY_ASSERT_VEC_EQ(d, Set(d, T(sum)), SumOfLanes(d, Load(d, in_lanes.get()))); // Lane i = i (iota) to include upper lanes sum = 0.0; for (size_t i = 0; i < N; ++i) { sum += static_cast(i); } HWY_ASSERT_VEC_EQ(d, Set(d, T(sum)), SumOfLanes(d, Iota(d, 0))); } }; HWY_NOINLINE void TestAllSumOfLanes() { ForUIF3264(ForPartialVectors()); ForUI16(ForPartialVectors()); } struct TestMinOfLanes { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const size_t N = Lanes(d); auto in_lanes = AllocateAligned(N); // Lane i = bit i, higher lanes = 2 (not the minimum) T min = HighestValue(); // Avoid setting sign bit and cap at double precision constexpr size_t kBits = HWY_MIN(sizeof(T) * 8 - 1, 51); for (size_t i = 0; i < N; ++i) { in_lanes[i] = i < kBits ? static_cast(1ull << i) : 2; min = HWY_MIN(min, in_lanes[i]); } HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(d, Load(d, in_lanes.get()))); // Lane i = N - i to include upper lanes min = HighestValue(); for (size_t i = 0; i < N; ++i) { in_lanes[i] = static_cast(N - i); // no 8-bit T so no wraparound min = HWY_MIN(min, in_lanes[i]); } HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(d, Load(d, in_lanes.get()))); // Bug #910: also check negative values min = HighestValue(); const T input_copy[] = {static_cast(-1), static_cast(-2), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}; size_t i = 0; for (; i < HWY_MIN(N, sizeof(input_copy) / sizeof(T)); ++i) { in_lanes[i] = input_copy[i]; min = HWY_MIN(min, input_copy[i]); } // Pad with neutral element to full vector (so we can load) for (; i < N; ++i) { in_lanes[i] = min; } HWY_ASSERT_VEC_EQ(d, Set(d, min), MinOfLanes(d, Load(d, in_lanes.get()))); } }; struct TestMaxOfLanes { template HWY_NOINLINE void operator()(T /*unused*/, D d) { const size_t N = Lanes(d); auto in_lanes = AllocateAligned(N); T max = LowestValue(); // Avoid setting sign bit and cap at double precision constexpr size_t kBits = HWY_MIN(sizeof(T) * 8 - 1, 51); for (size_t i = 0; i < N; ++i) { in_lanes[i] = i < kBits ? static_cast(1ull << i) : 0; max = HWY_MAX(max, in_lanes[i]); } HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(d, Load(d, in_lanes.get()))); // Lane i = i to include upper lanes max = LowestValue(); for (size_t i = 0; i < N; ++i) { in_lanes[i] = static_cast(i); // no 8-bit T so no wraparound max = HWY_MAX(max, in_lanes[i]); } HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(d, Load(d, in_lanes.get()))); // Bug #910: also check negative values max = LowestValue(); const T input_copy[] = {static_cast(-1), static_cast(-2), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}; size_t i = 0; for (; i < HWY_MIN(N, sizeof(input_copy) / sizeof(T)); ++i) { in_lanes[i] = input_copy[i]; max = HWY_MAX(max, in_lanes[i]); } // Pad with neutral element to full vector (so we can load) for (; i < N; ++i) { in_lanes[i] = max; } HWY_ASSERT_VEC_EQ(d, Set(d, max), MaxOfLanes(d, Load(d, in_lanes.get()))); } }; HWY_NOINLINE void TestAllMinMaxOfLanes() { const ForPartialVectors test_min; const ForPartialVectors test_max; ForUIF3264(test_min); ForUIF3264(test_max); ForUI16(test_min); ForUI16(test_max); } struct TestSumsOf8 { template HWY_NOINLINE void operator()(T /*unused*/, D d) { RandomState rng; const size_t N = Lanes(d); if (N < 8) return; const Repartition du64; auto in_lanes = AllocateAligned(N); auto sum_lanes = AllocateAligned(N / 8); for (size_t rep = 0; rep < 100; ++rep) { for (size_t i = 0; i < N; ++i) { in_lanes[i] = Random64(&rng) & 0xFF; } for (size_t idx_sum = 0; idx_sum < N / 8; ++idx_sum) { uint64_t sum = 0; for (size_t i = 0; i < 8; ++i) { sum += in_lanes[idx_sum * 8 + i]; } sum_lanes[idx_sum] = sum; } const Vec in = Load(d, in_lanes.get()); HWY_ASSERT_VEC_EQ(du64, sum_lanes.get(), SumsOf8(in)); } } }; HWY_NOINLINE void TestAllSumsOf8() { ForGEVectors<64, TestSumsOf8>()(uint8_t()); } // NOLINTNEXTLINE(google-readability-namespace-comments) } // namespace HWY_NAMESPACE } // namespace hwy HWY_AFTER_NAMESPACE(); #if HWY_ONCE namespace hwy { HWY_BEFORE_TEST(HwyReductionTest); HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumOfLanes); HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllMinMaxOfLanes); HWY_EXPORT_AND_TEST_P(HwyReductionTest, TestAllSumsOf8); } // namespace hwy #endif