From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- third_party/highway/hwy/contrib/sort/sort_test.cc | 626 ++++++++++++++++++++++ 1 file changed, 626 insertions(+) create mode 100644 third_party/highway/hwy/contrib/sort/sort_test.cc (limited to 'third_party/highway/hwy/contrib/sort/sort_test.cc') diff --git a/third_party/highway/hwy/contrib/sort/sort_test.cc b/third_party/highway/hwy/contrib/sort/sort_test.cc new file mode 100644 index 0000000000..2d1f1d5169 --- /dev/null +++ b/third_party/highway/hwy/contrib/sort/sort_test.cc @@ -0,0 +1,626 @@ +// Copyright 2021 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS // before inttypes.h +#endif +#include +#include +#include +#include // memcpy + +#include +#include + +// clang-format off +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "hwy/contrib/sort/sort_test.cc" +#include "hwy/foreach_target.h" // IWYU pragma: keep + +#include "hwy/contrib/sort/vqsort.h" +// After foreach_target +#include "hwy/contrib/sort/algo-inl.h" +#include "hwy/contrib/sort/traits128-inl.h" +#include "hwy/contrib/sort/result-inl.h" +#include "hwy/contrib/sort/vqsort-inl.h" // BaseCase +#include "hwy/tests/test_util-inl.h" +// clang-format on + +HWY_BEFORE_NAMESPACE(); +namespace hwy { +namespace HWY_NAMESPACE { +namespace { + +using detail::OrderAscending; +using detail::OrderDescending; +using detail::SharedTraits; +using detail::TraitsLane; +#if VQSORT_ENABLED || HWY_IDE +using detail::OrderAscending128; +using detail::OrderAscendingKV128; +using detail::OrderAscendingKV64; +using detail::OrderDescending128; +using detail::OrderDescendingKV128; +using detail::OrderDescendingKV64; +using detail::Traits128; + +template +static HWY_NOINLINE void TestMedian3() { + using LaneType = typename Traits::LaneType; + using D = CappedTag; + SharedTraits st; + const D d; + using V = Vec; + for (uint32_t bits = 0; bits < 8; ++bits) { + const V v0 = Set(d, LaneType{(bits & (1u << 0)) ? 1u : 0u}); + const V v1 = Set(d, LaneType{(bits & (1u << 1)) ? 1u : 0u}); + const V v2 = Set(d, LaneType{(bits & (1u << 2)) ? 1u : 0u}); + const LaneType m = GetLane(detail::MedianOf3(st, v0, v1, v2)); + // If at least half(rounded up) of bits are 1, so is the median. + const size_t count = PopCount(bits); + HWY_ASSERT_EQ((count >= 2) ? static_cast(1) : 0, m); + } +} + +HWY_NOINLINE void TestAllMedian() { + TestMedian3 > >(); +} + +template +static HWY_NOINLINE void TestBaseCaseAscDesc() { + using LaneType = typename Traits::LaneType; + SharedTraits st; + const SortTag d; + const size_t N = Lanes(d); + const size_t base_case_num = SortConstants::BaseCaseNum(N); + const size_t N1 = st.LanesPerKey(); + + constexpr int kDebug = 0; + auto aligned_lanes = hwy::AllocateAligned(N + base_case_num + N); + auto buf = hwy::AllocateAligned(base_case_num + 2 * N); + + std::vector lengths; + lengths.push_back(HWY_MAX(1, N1)); + lengths.push_back(3 * N1); + lengths.push_back(base_case_num / 2); + lengths.push_back(base_case_num / 2 + N1); + lengths.push_back(base_case_num - N1); + lengths.push_back(base_case_num); + + std::vector misalignments; + misalignments.push_back(0); + misalignments.push_back(1); + if (N >= 6) misalignments.push_back(N / 2 - 1); + misalignments.push_back(N / 2); + misalignments.push_back(N / 2 + 1); + misalignments.push_back(HWY_MIN(2 * N / 3 + 3, size_t{N - 1})); + + for (bool asc : {false, true}) { + for (size_t len : lengths) { + for (size_t misalign : misalignments) { + LaneType* HWY_RESTRICT lanes = aligned_lanes.get() + misalign; + if (kDebug) { + printf("============%s asc %d N1 %d len %d misalign %d\n", + st.KeyString().c_str(), asc, static_cast(N1), + static_cast(len), static_cast(misalign)); + } + + for (size_t i = 0; i < misalign; ++i) { + aligned_lanes[i] = hwy::LowestValue(); + } + InputStats input_stats; + for (size_t i = 0; i < len; ++i) { + lanes[i] = asc ? static_cast(LaneType(i) + 1) + : static_cast(LaneType(len) - LaneType(i)); + input_stats.Notify(lanes[i]); + if (kDebug >= 2) { + printf("%3zu: %f\n", i, static_cast(lanes[i])); + } + } + for (size_t i = len; i < base_case_num + N; ++i) { + lanes[i] = hwy::LowestValue(); + } + + detail::BaseCase(d, st, lanes, lanes + len, len, buf.get()); + + if (kDebug >= 2) { + printf("out>>>>>>\n"); + for (size_t i = 0; i < len; ++i) { + printf("%3zu: %f\n", i, static_cast(lanes[i])); + } + } + + HWY_ASSERT(VerifySort(st, input_stats, lanes, len, "BaseAscDesc")); + for (size_t i = 0; i < misalign; ++i) { + if (aligned_lanes[i] != hwy::LowestValue()) + HWY_ABORT("Overrun misalign at %d\n", static_cast(i)); + } + for (size_t i = len; i < base_case_num + N; ++i) { + if (lanes[i] != hwy::LowestValue()) + HWY_ABORT("Overrun right at %d\n", static_cast(i)); + } + } // misalign + } // len + } // asc +} + +template +static HWY_NOINLINE void TestBaseCase01() { + using LaneType = typename Traits::LaneType; + SharedTraits st; + const SortTag d; + const size_t N = Lanes(d); + const size_t base_case_num = SortConstants::BaseCaseNum(N); + const size_t N1 = st.LanesPerKey(); + + constexpr int kDebug = 0; + auto lanes = hwy::AllocateAligned(base_case_num + N); + auto buf = hwy::AllocateAligned(base_case_num + 2 * N); + + std::vector lengths; + lengths.push_back(HWY_MAX(1, N1)); + lengths.push_back(3 * N1); + lengths.push_back(base_case_num / 2); + lengths.push_back(base_case_num / 2 + N1); + lengths.push_back(base_case_num - N1); + lengths.push_back(base_case_num); + + for (size_t len : lengths) { + if (kDebug) { + printf("============%s 01 N1 %d len %d\n", st.KeyString().c_str(), + static_cast(N1), static_cast(len)); + } + const uint64_t kMaxBits = AdjustedLog2Reps(HWY_MIN(len, size_t{14})); + for (uint64_t bits = 0; bits < ((1ull << kMaxBits) - 1); ++bits) { + InputStats input_stats; + for (size_t i = 0; i < len; ++i) { + lanes[i] = (i < 64 && (bits & (1ull << i))) ? 1 : 0; + input_stats.Notify(lanes[i]); + if (kDebug >= 2) { + printf("%3zu: %f\n", i, static_cast(lanes[i])); + } + } + for (size_t i = len; i < base_case_num + N; ++i) { + lanes[i] = hwy::LowestValue(); + } + + detail::BaseCase(d, st, lanes.get(), lanes.get() + len, len, buf.get()); + + if (kDebug >= 2) { + printf("out>>>>>>\n"); + for (size_t i = 0; i < len; ++i) { + printf("%3zu: %f\n", i, static_cast(lanes[i])); + } + } + + HWY_ASSERT(VerifySort(st, input_stats, lanes.get(), len, "Base01")); + for (size_t i = len; i < base_case_num + N; ++i) { + if (lanes[i] != hwy::LowestValue()) + HWY_ABORT("Overrun right at %d\n", static_cast(i)); + } + } // bits + } // len +} + +template +static HWY_NOINLINE void TestBaseCase() { + TestBaseCaseAscDesc(); + TestBaseCase01(); +} + +HWY_NOINLINE void TestAllBaseCase() { + // Workaround for stack overflow on MSVC debug. +#if defined(_MSC_VER) + return; +#endif + TestBaseCase > >(); + TestBaseCase > >(); + TestBaseCase >(); + TestBaseCase >(); +} + +template +static HWY_NOINLINE void VerifyPartition( + Traits st, typename Traits::LaneType* HWY_RESTRICT lanes, size_t left, + size_t border, size_t right, const size_t N1, + const typename Traits::LaneType* pivot) { + /* for (size_t i = left; i < right; ++i) { + if (i == border) printf("--\n"); + printf("%4zu: %3d\n", i, lanes[i]); + }*/ + + HWY_ASSERT(left % N1 == 0); + HWY_ASSERT(border % N1 == 0); + HWY_ASSERT(right % N1 == 0); + const bool asc = typename Traits::Order().IsAscending(); + for (size_t i = left; i < border; i += N1) { + if (st.Compare1(pivot, lanes + i)) { + HWY_ABORT( + "%s: asc %d left[%d] piv %.0f %.0f compares before %.0f %.0f " + "border %d", + st.KeyString().c_str(), asc, static_cast(i), + static_cast(pivot[1]), static_cast(pivot[0]), + static_cast(lanes[i + 1]), static_cast(lanes[i + 0]), + static_cast(border)); + } + } + for (size_t i = border; i < right; i += N1) { + if (!st.Compare1(pivot, lanes + i)) { + HWY_ABORT( + "%s: asc %d right[%d] piv %.0f %.0f compares after %.0f %.0f " + "border %d", + st.KeyString().c_str(), asc, static_cast(i), + static_cast(pivot[1]), static_cast(pivot[0]), + static_cast(lanes[i + 1]), static_cast(lanes[i]), + static_cast(border)); + } + } +} + +template +static HWY_NOINLINE void TestPartition() { + using LaneType = typename Traits::LaneType; + const SortTag d; + SharedTraits st; + const bool asc = typename Traits::Order().IsAscending(); + const size_t N = Lanes(d); + constexpr int kDebug = 0; + const size_t base_case_num = SortConstants::BaseCaseNum(N); + // left + len + align + const size_t total = 32 + (base_case_num + 4 * HWY_MAX(N, 4)) + 2 * N; + auto aligned_lanes = hwy::AllocateAligned(total); + auto buf = hwy::AllocateAligned(SortConstants::PartitionBufNum(N)); + + const size_t N1 = st.LanesPerKey(); + for (bool in_asc : {false, true}) { + for (int left_i : {0, 1, 4, 6, 7, 8, 12, 15, 22, 28, 30, 31}) { + const size_t left = static_cast(left_i) & ~(N1 - 1); + for (size_t ofs : {N, N + 1, N + 3, 2 * N, 2 * N + 2, 2 * N + 3, + 3 * N - 1, 4 * N - 3, 4 * N - 2}) { + const size_t len = (base_case_num + ofs) & ~(N1 - 1); + for (LaneType pivot1 : + {LaneType(0), LaneType(len / 3), LaneType(len / 2), + LaneType(2 * len / 3), LaneType(len)}) { + const LaneType pivot2[2] = {pivot1, 0}; + const auto pivot = st.SetKey(d, pivot2); + for (size_t misalign = 0; misalign < N; + misalign += st.LanesPerKey()) { + LaneType* HWY_RESTRICT lanes = aligned_lanes.get() + misalign; + const size_t right = left + len; + if (kDebug) { + printf( + "=========%s asc %d left %d len %d right %d piv %.0f %.0f\n", + st.KeyString().c_str(), asc, static_cast(left), + static_cast(len), static_cast(right), + static_cast(pivot2[1]), + static_cast(pivot2[0])); + } + + for (size_t i = 0; i < misalign; ++i) { + aligned_lanes[i] = hwy::LowestValue(); + } + for (size_t i = 0; i < left; ++i) { + lanes[i] = hwy::LowestValue(); + } + std::unordered_map counts; + for (size_t i = left; i < right; ++i) { + lanes[i] = static_cast( + in_asc ? LaneType(i + 1) - static_cast(left) + : static_cast(right) - LaneType(i)); + ++counts[lanes[i]]; + if (kDebug >= 2) { + printf("%3zu: %f\n", i, static_cast(lanes[i])); + } + } + for (size_t i = right; i < total - misalign; ++i) { + lanes[i] = hwy::LowestValue(); + } + + size_t border = + left + detail::Partition(d, st, lanes + left, right - left, + pivot, buf.get()); + + if (kDebug >= 2) { + printf("out>>>>>>\n"); + for (size_t i = left; i < right; ++i) { + printf("%3zu: %f\n", i, static_cast(lanes[i])); + } + for (size_t i = right; i < total - misalign; ++i) { + printf("%3zu: sentinel %f\n", i, static_cast(lanes[i])); + } + } + for (size_t i = left; i < right; ++i) { + --counts[lanes[i]]; + } + for (auto kv : counts) { + if (kv.second != 0) { + PrintValue(kv.first); + HWY_ABORT("Incorrect count %d\n", kv.second); + } + } + VerifyPartition(st, lanes, left, border, right, N1, pivot2); + for (size_t i = 0; i < misalign; ++i) { + if (aligned_lanes[i] != hwy::LowestValue()) + HWY_ABORT("Overrun misalign at %d\n", static_cast(i)); + } + for (size_t i = 0; i < left; ++i) { + if (lanes[i] != hwy::LowestValue()) + HWY_ABORT("Overrun left at %d\n", static_cast(i)); + } + for (size_t i = right; i < total - misalign; ++i) { + if (lanes[i] != hwy::LowestValue()) + HWY_ABORT("Overrun right at %d\n", static_cast(i)); + } + } // misalign + } // pivot + } // len + } // left + } // asc +} + +HWY_NOINLINE void TestAllPartition() { + TestPartition > >(); + TestPartition >(); + +#if !HWY_IS_DEBUG_BUILD + TestPartition > >(); + TestPartition > >(); + TestPartition > >(); +#if HWY_HAVE_FLOAT64 + TestPartition > >(); +#endif + TestPartition >(); +#endif +} + +// (used for sample selection for choosing a pivot) +template +static HWY_NOINLINE void TestRandomGenerator() { + static_assert(!hwy::IsSigned(), ""); + SortTag du; + const size_t N = Lanes(du); + + detail::Generator rng(&N, N); + + const size_t lanes_per_block = HWY_MAX(64 / sizeof(TU), N); // power of two + + for (uint32_t num_blocks = 2; num_blocks < 100000; + num_blocks = 3 * num_blocks / 2) { + // Generate some numbers and ensure all are in range + uint64_t sum = 0; + constexpr size_t kReps = 10000; + for (size_t rep = 0; rep < kReps; ++rep) { + const uint32_t bits = rng() & 0xFFFFFFFF; + const size_t index = detail::RandomChunkIndex(num_blocks, bits); + HWY_ASSERT(((index + 1) * lanes_per_block) <= + num_blocks * lanes_per_block); + + sum += index; + } + + // Also ensure the mean is near the middle of the range + const double expected = (num_blocks - 1) / 2.0; + const double actual = static_cast(sum) / kReps; + HWY_ASSERT(0.9 * expected <= actual && actual <= 1.1 * expected); + } +} + +HWY_NOINLINE void TestAllGenerator() { + TestRandomGenerator(); + TestRandomGenerator(); +} + +#else +static void TestAllMedian() {} +static void TestAllBaseCase() {} +static void TestAllPartition() {} +static void TestAllGenerator() {} +#endif // VQSORT_ENABLED + +// Remembers input, and compares results to that of a reference algorithm. +template +class CompareResults { + using LaneType = typename Traits::LaneType; + using KeyType = typename Traits::KeyType; + + public: + CompareResults(const LaneType* in, size_t num_lanes) { + copy_.resize(num_lanes); + memcpy(copy_.data(), in, num_lanes * sizeof(LaneType)); + } + + bool Verify(const LaneType* output) { +#if HAVE_PDQSORT + const Algo reference = Algo::kPDQ; +#else + const Algo reference = Algo::kStd; +#endif + SharedState shared; + using Order = typename Traits::Order; + const Traits st; + const size_t num_keys = copy_.size() / st.LanesPerKey(); + Run(reference, reinterpret_cast(copy_.data()), num_keys, + shared, /*thread=*/0); +#if VQSORT_PRINT >= 3 + fprintf(stderr, "\nExpected:\n"); + for (size_t i = 0; i < copy_.size(); ++i) { + PrintValue(copy_[i]); + } + fprintf(stderr, "\n"); +#endif + for (size_t i = 0; i < copy_.size(); ++i) { + if (copy_[i] != output[i]) { + if (sizeof(KeyType) == 16) { + fprintf(stderr, + "%s Asc %d mismatch at %d of %d: %" PRIu64 " %" PRIu64 "\n", + st.KeyString().c_str(), Order().IsAscending(), + static_cast(i), static_cast(copy_.size()), + static_cast(copy_[i]), + static_cast(output[i])); + } else { + fprintf(stderr, "Type %s Asc %d mismatch at %d of %d: ", + st.KeyString().c_str(), Order().IsAscending(), + static_cast(i), static_cast(copy_.size())); + PrintValue(copy_[i]); + PrintValue(output[i]); + fprintf(stderr, "\n"); + } + return false; + } + } + return true; + } + + private: + std::vector copy_; +}; + +std::vector AlgoForTest() { + return { +#if HAVE_AVX2SORT + Algo::kSEA, +#endif +#if HAVE_IPS4O + Algo::kIPS4O, +#endif +#if HAVE_PDQSORT + Algo::kPDQ, +#endif +#if HAVE_SORT512 + Algo::kSort512, +#endif + Algo::kHeap, Algo::kVQSort, + }; +} + +template +void TestSort(size_t num_lanes) { +// Workaround for stack overflow on clang-cl (/F 8388608 does not help). +#if defined(_MSC_VER) + return; +#endif + using Order = typename Traits::Order; + using LaneType = typename Traits::LaneType; + using KeyType = typename Traits::KeyType; + SharedState shared; + SharedTraits st; + + // Round up to a whole number of keys. + num_lanes += (st.Is128() && (num_lanes & 1)); + const size_t num_keys = num_lanes / st.LanesPerKey(); + + constexpr size_t kMaxMisalign = 16; + auto aligned = + hwy::AllocateAligned(kMaxMisalign + num_lanes + kMaxMisalign); + for (Algo algo : AlgoForTest()) { + for (Dist dist : AllDist()) { + for (size_t misalign : {size_t{0}, size_t{st.LanesPerKey()}, + size_t{3 * st.LanesPerKey()}, kMaxMisalign / 2}) { + LaneType* lanes = aligned.get() + misalign; + + // Set up red zones before/after the keys to sort + for (size_t i = 0; i < misalign; ++i) { + aligned[i] = hwy::LowestValue(); + } + for (size_t i = 0; i < kMaxMisalign; ++i) { + lanes[num_lanes + i] = hwy::HighestValue(); + } +#if HWY_IS_MSAN + __msan_poison(aligned.get(), misalign * sizeof(LaneType)); + __msan_poison(lanes + num_lanes, kMaxMisalign * sizeof(LaneType)); +#endif + InputStats input_stats = + GenerateInput(dist, lanes, num_lanes); + + CompareResults compare(lanes, num_lanes); + Run(algo, reinterpret_cast(lanes), num_keys, shared, + /*thread=*/0); + HWY_ASSERT(compare.Verify(lanes)); + HWY_ASSERT(VerifySort(st, input_stats, lanes, num_lanes, "TestSort")); + + // Check red zones +#if HWY_IS_MSAN + __msan_unpoison(aligned.get(), misalign * sizeof(LaneType)); + __msan_unpoison(lanes + num_lanes, kMaxMisalign * sizeof(LaneType)); +#endif + for (size_t i = 0; i < misalign; ++i) { + if (aligned[i] != hwy::LowestValue()) + HWY_ABORT("Overrun left at %d\n", static_cast(i)); + } + for (size_t i = num_lanes; i < num_lanes + kMaxMisalign; ++i) { + if (lanes[i] != hwy::HighestValue()) + HWY_ABORT("Overrun right at %d\n", static_cast(i)); + } + } // misalign + } // dist + } // algo +} + +void TestAllSort() { + for (int num : {129, 504, 3 * 1000, 34567}) { + const size_t num_lanes = AdjustedReps(static_cast(num)); + TestSort > >(num_lanes); + TestSort > >(num_lanes); + + TestSort > >(num_lanes); + TestSort > >(num_lanes); + + TestSort > >(num_lanes); + TestSort > >(num_lanes); + + // WARNING: for float types, SIMD comparisons will flush denormals to + // zero, causing mismatches with scalar sorts. In this test, we avoid + // generating denormal inputs. + TestSort > >(num_lanes); +#if HWY_HAVE_FLOAT64 // protects algo-inl's GenerateRandom + if (Sorter::HaveFloat64()) { + TestSort > >(num_lanes); + } +#endif + +// Our HeapSort does not support 128-bit keys. +#if VQSORT_ENABLED + TestSort >(num_lanes); + TestSort >(num_lanes); + + TestSort >(num_lanes); + TestSort >(num_lanes); + + TestSort >(num_lanes); + TestSort >(num_lanes); +#endif + } +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace hwy +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace hwy { +namespace { +HWY_BEFORE_TEST(SortTest); +HWY_EXPORT_AND_TEST_P(SortTest, TestAllMedian); +HWY_EXPORT_AND_TEST_P(SortTest, TestAllBaseCase); +HWY_EXPORT_AND_TEST_P(SortTest, TestAllPartition); +HWY_EXPORT_AND_TEST_P(SortTest, TestAllGenerator); +HWY_EXPORT_AND_TEST_P(SortTest, TestAllSort); +} // namespace +} // namespace hwy + +#endif // HWY_ONCE -- cgit v1.2.3