diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/highway/hwy/tests/memory_test.cc | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/highway/hwy/tests/memory_test.cc')
-rw-r--r-- | third_party/highway/hwy/tests/memory_test.cc | 343 |
1 files changed, 343 insertions, 0 deletions
diff --git a/third_party/highway/hwy/tests/memory_test.cc b/third_party/highway/hwy/tests/memory_test.cc new file mode 100644 index 0000000000..d17addf544 --- /dev/null +++ b/third_party/highway/hwy/tests/memory_test.cc @@ -0,0 +1,343 @@ +// Copyright 2019 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Ensure incompabilities with Windows macros (e.g. #define StoreFence) are +// detected. Must come before Highway headers. +#include "hwy/base.h" +#if defined(_WIN32) || defined(_WIN64) +#include <windows.h> +#endif + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> // std::fill + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "tests/memory_test.cc" +#include "hwy/cache_control.h" +#include "hwy/foreach_target.h" // IWYU pragma: keep +#include "hwy/highway.h" +#include "hwy/tests/test_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace hwy { +namespace HWY_NAMESPACE { + +struct TestLoadStore { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const size_t N = Lanes(d); + const auto hi = Iota(d, static_cast<T>(1 + N)); + const auto lo = Iota(d, 1); + auto lanes = AllocateAligned<T>(2 * N); + Store(hi, d, &lanes[N]); + Store(lo, d, &lanes[0]); + + // Aligned load + const auto lo2 = Load(d, &lanes[0]); + HWY_ASSERT_VEC_EQ(d, lo2, lo); + + // Aligned store + auto lanes2 = AllocateAligned<T>(2 * N); + Store(lo2, d, &lanes2[0]); + Store(hi, d, &lanes2[N]); + for (size_t i = 0; i < 2 * N; ++i) { + HWY_ASSERT_EQ(lanes[i], lanes2[i]); + } + + // Unaligned load + const auto vu = LoadU(d, &lanes[1]); + auto lanes3 = AllocateAligned<T>(N); + Store(vu, d, lanes3.get()); + for (size_t i = 0; i < N; ++i) { + HWY_ASSERT_EQ(T(i + 2), lanes3[i]); + } + + // Unaligned store + StoreU(lo2, d, &lanes2[N / 2]); + size_t i = 0; + for (; i < N / 2; ++i) { + HWY_ASSERT_EQ(lanes[i], lanes2[i]); + } + for (; i < 3 * N / 2; ++i) { + HWY_ASSERT_EQ(T(i - N / 2 + 1), lanes2[i]); + } + // Subsequent values remain unchanged. + for (; i < 2 * N; ++i) { + HWY_ASSERT_EQ(T(i + 1), lanes2[i]); + } + } +}; + +HWY_NOINLINE void TestAllLoadStore() { + ForAllTypes(ForPartialVectors<TestLoadStore>()); +} + +struct TestSafeCopyN { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const size_t N = Lanes(d); + const auto v = Iota(d, 1); + auto from = AllocateAligned<T>(N + 2); + auto to = AllocateAligned<T>(N + 2); + Store(v, d, from.get()); + + // 0: nothing changes + to[0] = T(); + SafeCopyN(0, d, from.get(), to.get()); + HWY_ASSERT_EQ(T(), to[0]); + + // 1: only first changes + to[1] = T(); + SafeCopyN(1, d, from.get(), to.get()); + HWY_ASSERT_EQ(static_cast<T>(1), to[0]); + HWY_ASSERT_EQ(T(), to[1]); + + // N-1: last does not change + to[N - 1] = T(); + SafeCopyN(N - 1, d, from.get(), to.get()); + HWY_ASSERT_EQ(T(), to[N - 1]); + // Also check preceding lanes + to[N - 1] = static_cast<T>(N); + HWY_ASSERT_VEC_EQ(d, to.get(), v); + + // N: all change + to[N] = T(); + SafeCopyN(N, d, from.get(), to.get()); + HWY_ASSERT_VEC_EQ(d, to.get(), v); + HWY_ASSERT_EQ(T(), to[N]); + + // N+1: subsequent lane does not change if using masked store + to[N + 1] = T(); + SafeCopyN(N + 1, d, from.get(), to.get()); + HWY_ASSERT_VEC_EQ(d, to.get(), v); +#if !HWY_MEM_OPS_MIGHT_FAULT + HWY_ASSERT_EQ(T(), to[N + 1]); +#endif + } +}; + +HWY_NOINLINE void TestAllSafeCopyN() { + ForAllTypes(ForPartialVectors<TestSafeCopyN>()); +} + +struct TestLoadDup128 { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + // Scalar does not define LoadDup128. +#if HWY_TARGET != HWY_SCALAR || HWY_IDE + constexpr size_t N128 = 16 / sizeof(T); + alignas(16) T lanes[N128]; + for (size_t i = 0; i < N128; ++i) { + lanes[i] = static_cast<T>(1 + i); + } + + const size_t N = Lanes(d); + auto expected = AllocateAligned<T>(N); + for (size_t i = 0; i < N; ++i) { + expected[i] = static_cast<T>(i % N128 + 1); + } + + HWY_ASSERT_VEC_EQ(d, expected.get(), LoadDup128(d, lanes)); +#else + (void)d; +#endif + } +}; + +HWY_NOINLINE void TestAllLoadDup128() { + ForAllTypes(ForGEVectors<128, TestLoadDup128>()); +} + +struct TestStream { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const auto v = Iota(d, T(1)); + const size_t affected_bytes = + (Lanes(d) * sizeof(T) + HWY_STREAM_MULTIPLE - 1) & + ~size_t(HWY_STREAM_MULTIPLE - 1); + const size_t affected_lanes = affected_bytes / sizeof(T); + auto out = AllocateAligned<T>(2 * affected_lanes); + std::fill(out.get(), out.get() + 2 * affected_lanes, T(0)); + + Stream(v, d, out.get()); + FlushStream(); + const auto actual = Load(d, out.get()); + HWY_ASSERT_VEC_EQ(d, v, actual); + // Ensure Stream didn't modify more memory than expected + for (size_t i = affected_lanes; i < 2 * affected_lanes; ++i) { + HWY_ASSERT_EQ(T(0), out[i]); + } + } +}; + +HWY_NOINLINE void TestAllStream() { + const ForPartialVectors<TestStream> test; + // No u8,u16. + test(uint32_t()); + test(uint64_t()); + // No i8,i16. + test(int32_t()); + test(int64_t()); + ForFloatTypes(test); +} + +// Assumes little-endian byte order! +struct TestScatter { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + using Offset = MakeSigned<T>; + + const size_t N = Lanes(d); + const size_t range = 4 * N; // number of items to scatter + const size_t max_bytes = range * sizeof(T); // upper bound on offset + + RandomState rng; + + // Data to be scattered + auto bytes = AllocateAligned<uint8_t>(max_bytes); + for (size_t i = 0; i < max_bytes; ++i) { + bytes[i] = static_cast<uint8_t>(Random32(&rng) & 0xFF); + } + const auto data = Load(d, reinterpret_cast<const T*>(bytes.get())); + + // Scatter into these regions, ensure vector results match scalar + auto expected = AllocateAligned<T>(range); + auto actual = AllocateAligned<T>(range); + + const Rebind<Offset, D> d_offsets; + auto offsets = AllocateAligned<Offset>(N); // or indices + + for (size_t rep = 0; rep < 100; ++rep) { + // Byte offsets + std::fill(expected.get(), expected.get() + range, T(0)); + std::fill(actual.get(), actual.get() + range, T(0)); + for (size_t i = 0; i < N; ++i) { + // Must be aligned + offsets[i] = static_cast<Offset>((Random32(&rng) % range) * sizeof(T)); + CopyBytes<sizeof(T)>( + bytes.get() + i * sizeof(T), + reinterpret_cast<uint8_t*>(expected.get()) + offsets[i]); + } + const auto voffsets = Load(d_offsets, offsets.get()); + ScatterOffset(data, d, actual.get(), voffsets); + if (!BytesEqual(expected.get(), actual.get(), max_bytes)) { + Print(d, "Data", data); + Print(d_offsets, "Offsets", voffsets); + HWY_ASSERT(false); + } + + // Indices + std::fill(expected.get(), expected.get() + range, T(0)); + std::fill(actual.get(), actual.get() + range, T(0)); + for (size_t i = 0; i < N; ++i) { + offsets[i] = static_cast<Offset>(Random32(&rng) % range); + CopyBytes<sizeof(T)>(bytes.get() + i * sizeof(T), + &expected[size_t(offsets[i])]); + } + const auto vindices = Load(d_offsets, offsets.get()); + ScatterIndex(data, d, actual.get(), vindices); + if (!BytesEqual(expected.get(), actual.get(), max_bytes)) { + Print(d, "Data", data); + Print(d_offsets, "Indices", vindices); + HWY_ASSERT(false); + } + } + } +}; + +HWY_NOINLINE void TestAllScatter() { + ForUIF3264(ForPartialVectors<TestScatter>()); +} + +struct TestGather { + template <class T, class D> + HWY_NOINLINE void operator()(T /*unused*/, D d) { + using Offset = MakeSigned<T>; + + const size_t N = Lanes(d); + const size_t range = 4 * N; // number of items to gather + const size_t max_bytes = range * sizeof(T); // upper bound on offset + + RandomState rng; + + // Data to be gathered from + auto bytes = AllocateAligned<uint8_t>(max_bytes); + for (size_t i = 0; i < max_bytes; ++i) { + bytes[i] = static_cast<uint8_t>(Random32(&rng) & 0xFF); + } + + auto expected = AllocateAligned<T>(N); + auto offsets = AllocateAligned<Offset>(N); + auto indices = AllocateAligned<Offset>(N); + + for (size_t rep = 0; rep < 100; ++rep) { + // Offsets + for (size_t i = 0; i < N; ++i) { + // Must be aligned + offsets[i] = static_cast<Offset>((Random32(&rng) % range) * sizeof(T)); + CopyBytes<sizeof(T)>(bytes.get() + offsets[i], &expected[i]); + } + + const Rebind<Offset, D> d_offset; + const T* base = reinterpret_cast<const T*>(bytes.get()); + auto actual = GatherOffset(d, base, Load(d_offset, offsets.get())); + HWY_ASSERT_VEC_EQ(d, expected.get(), actual); + + // Indices + for (size_t i = 0; i < N; ++i) { + indices[i] = + static_cast<Offset>(Random32(&rng) % (max_bytes / sizeof(T))); + CopyBytes<sizeof(T)>(base + indices[i], &expected[i]); + } + actual = GatherIndex(d, base, Load(d_offset, indices.get())); + HWY_ASSERT_VEC_EQ(d, expected.get(), actual); + } + } +}; + +HWY_NOINLINE void TestAllGather() { + ForUIF3264(ForPartialVectors<TestGather>()); +} + +HWY_NOINLINE void TestAllCache() { + LoadFence(); + FlushStream(); + int test = 0; + Prefetch(&test); + FlushCacheline(&test); + Pause(); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace hwy +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace hwy { +HWY_BEFORE_TEST(HwyMemoryTest); +HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllLoadStore); +HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllSafeCopyN); +HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllLoadDup128); +HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllStream); +HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllScatter); +HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllGather); +HWY_EXPORT_AND_TEST_P(HwyMemoryTest, TestAllCache); +} // namespace hwy + +#endif |