From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- third_party/highway/hwy/tests/convert_test.cc | 643 ++++++++++++++++++++++++++ 1 file changed, 643 insertions(+) create mode 100644 third_party/highway/hwy/tests/convert_test.cc (limited to 'third_party/highway/hwy/tests/convert_test.cc') diff --git a/third_party/highway/hwy/tests/convert_test.cc b/third_party/highway/hwy/tests/convert_test.cc new file mode 100644 index 0000000000..a7aea5fe9e --- /dev/null +++ b/third_party/highway/hwy/tests/convert_test.cc @@ -0,0 +1,643 @@ +// Copyright 2019 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include // std::isfinite + +#include "hwy/base.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "tests/convert_test.cc" +#include "hwy/foreach_target.h" // IWYU pragma: keep +#include "hwy/highway.h" +#include "hwy/tests/test_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace hwy { +namespace HWY_NAMESPACE { + +// Cast and ensure bytes are the same. Called directly from TestAllBitCast or +// via TestBitCastFrom. +template +struct TestBitCast { + template + HWY_NOINLINE void operator()(T /*unused*/, D d) { + const Repartition dto; + const size_t N = Lanes(d); + const size_t Nto = Lanes(dto); + if (N == 0 || Nto == 0) return; + HWY_ASSERT_EQ(N * sizeof(T), Nto * sizeof(ToT)); + const auto vf = Iota(d, 1); + const auto vt = BitCast(dto, vf); + // Must return the same bits + auto from_lanes = AllocateAligned(Lanes(d)); + auto to_lanes = AllocateAligned(Lanes(dto)); + Store(vf, d, from_lanes.get()); + Store(vt, dto, to_lanes.get()); + HWY_ASSERT( + BytesEqual(from_lanes.get(), to_lanes.get(), Lanes(d) * sizeof(T))); + } +}; + +// From D to all types. +struct TestBitCastFrom { + template + HWY_NOINLINE void operator()(T t, D d) { + TestBitCast()(t, d); + TestBitCast()(t, d); + TestBitCast()(t, d); +#if HWY_HAVE_INTEGER64 + TestBitCast()(t, d); +#endif + TestBitCast()(t, d); + TestBitCast()(t, d); + TestBitCast()(t, d); +#if HWY_HAVE_INTEGER64 + TestBitCast()(t, d); +#endif + TestBitCast()(t, d); +#if HWY_HAVE_FLOAT64 + TestBitCast()(t, d); +#endif + } +}; + +HWY_NOINLINE void TestAllBitCast() { + // For HWY_SCALAR and partial vectors, we can only cast to same-sized types: + // the former can't partition its single lane, and the latter can be smaller + // than a destination type. + const ForPartialVectors> to_u8; + to_u8(uint8_t()); + to_u8(int8_t()); + + const ForPartialVectors> to_i8; + to_i8(uint8_t()); + to_i8(int8_t()); + + const ForPartialVectors> to_u16; + to_u16(uint16_t()); + to_u16(int16_t()); + + const ForPartialVectors> to_i16; + to_i16(uint16_t()); + to_i16(int16_t()); + + const ForPartialVectors> to_u32; + to_u32(uint32_t()); + to_u32(int32_t()); + to_u32(float()); + + const ForPartialVectors> to_i32; + to_i32(uint32_t()); + to_i32(int32_t()); + to_i32(float()); + +#if HWY_HAVE_INTEGER64 + const ForPartialVectors> to_u64; + to_u64(uint64_t()); + to_u64(int64_t()); +#if HWY_HAVE_FLOAT64 + to_u64(double()); +#endif + + const ForPartialVectors> to_i64; + to_i64(uint64_t()); + to_i64(int64_t()); +#if HWY_HAVE_FLOAT64 + to_i64(double()); +#endif +#endif // HWY_HAVE_INTEGER64 + + const ForPartialVectors> to_float; + to_float(uint32_t()); + to_float(int32_t()); + to_float(float()); + +#if HWY_HAVE_FLOAT64 + const ForPartialVectors> to_double; + to_double(double()); +#if HWY_HAVE_INTEGER64 + to_double(uint64_t()); + to_double(int64_t()); +#endif // HWY_HAVE_INTEGER64 +#endif // HWY_HAVE_FLOAT64 + +#if HWY_TARGET != HWY_SCALAR + // For non-scalar vectors, we can cast all types to all. + ForAllTypes(ForGEVectors<64, TestBitCastFrom>()); +#endif +} + +template +struct TestPromoteTo { + template + HWY_NOINLINE void operator()(T /*unused*/, D from_d) { + static_assert(sizeof(T) < sizeof(ToT), "Input type must be narrower"); + const Rebind to_d; + + const size_t N = Lanes(from_d); + auto from = AllocateAligned(N); + auto expected = AllocateAligned(N); + + RandomState rng; + for (size_t rep = 0; rep < AdjustedReps(200); ++rep) { + for (size_t i = 0; i < N; ++i) { + const uint64_t bits = rng(); + CopyBytes(&bits, &from[i]); // not same size + expected[i] = from[i]; + } + + HWY_ASSERT_VEC_EQ(to_d, expected.get(), + PromoteTo(to_d, Load(from_d, from.get()))); + } + } +}; + +HWY_NOINLINE void TestAllPromoteTo() { + const ForPromoteVectors, 1> to_u16div2; + to_u16div2(uint8_t()); + + const ForPromoteVectors, 2> to_u32div4; + to_u32div4(uint8_t()); + + const ForPromoteVectors, 1> to_u32div2; + to_u32div2(uint16_t()); + + const ForPromoteVectors, 1> to_i16div2; + to_i16div2(uint8_t()); + to_i16div2(int8_t()); + + const ForPromoteVectors, 1> to_i32div2; + to_i32div2(uint16_t()); + to_i32div2(int16_t()); + + const ForPromoteVectors, 2> to_i32div4; + to_i32div4(uint8_t()); + to_i32div4(int8_t()); + + // Must test f16/bf16 separately because we can only load/store/convert them. + +#if HWY_HAVE_INTEGER64 + const ForPromoteVectors, 1> to_u64div2; + to_u64div2(uint32_t()); + + const ForPromoteVectors, 1> to_i64div2; + to_i64div2(int32_t()); +#endif + +#if HWY_HAVE_FLOAT64 + const ForPromoteVectors, 1> to_f64div2; + to_f64div2(int32_t()); + to_f64div2(float()); +#endif +} + +template +bool IsFinite(T t) { + return std::isfinite(t); +} +// Wrapper avoids calling std::isfinite for integer types (ambiguous). +template +bool IsFinite(T /*unused*/) { + return true; +} + +template +AlignedFreeUniquePtr F16TestCases(D d, size_t& padded) { + const float test_cases[] = { + // +/- 1 + 1.0f, -1.0f, + // +/- 0 + 0.0f, -0.0f, + // near 0 + 0.25f, -0.25f, + // +/- integer + 4.0f, -32.0f, + // positive near limit + 65472.0f, 65504.0f, + // negative near limit + -65472.0f, -65504.0f, + // positive +/- delta + 2.00390625f, 3.99609375f, + // negative +/- delta + -2.00390625f, -3.99609375f, + // No infinity/NaN - implementation-defined due to ARM. + }; + constexpr size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]); + const size_t N = Lanes(d); + HWY_ASSERT(N != 0); + padded = RoundUpTo(kNumTestCases, N); // allow loading whole vectors + auto in = AllocateAligned(padded); + auto expected = AllocateAligned(padded); + size_t i = 0; + for (; i < kNumTestCases; ++i) { + in[i] = test_cases[i]; + } + for (; i < padded; ++i) { + in[i] = 0.0f; + } + return in; +} + +struct TestF16 { + template + HWY_NOINLINE void operator()(TF32 /*t*/, DF32 d32) { +#if HWY_HAVE_FLOAT16 + size_t padded; + const size_t N = Lanes(d32); // same count for f16 + HWY_ASSERT(N != 0); + auto in = F16TestCases(d32, padded); + using TF16 = float16_t; + const Rebind d16; + auto temp16 = AllocateAligned(N); + + for (size_t i = 0; i < padded; i += N) { + const auto loaded = Load(d32, &in[i]); + Store(DemoteTo(d16, loaded), d16, temp16.get()); + HWY_ASSERT_VEC_EQ(d32, loaded, PromoteTo(d32, Load(d16, temp16.get()))); + } +#else + (void)d32; +#endif + } +}; + +HWY_NOINLINE void TestAllF16() { ForDemoteVectors()(float()); } + +template +AlignedFreeUniquePtr BF16TestCases(D d, size_t& padded) { + const float test_cases[] = { + // +/- 1 + 1.0f, -1.0f, + // +/- 0 + 0.0f, -0.0f, + // near 0 + 0.25f, -0.25f, + // +/- integer + 4.0f, -32.0f, + // positive near limit + 3.389531389251535E38f, 1.99384199368e+38f, + // negative near limit + -3.389531389251535E38f, -1.99384199368e+38f, + // positive +/- delta + 2.015625f, 3.984375f, + // negative +/- delta + -2.015625f, -3.984375f, + }; + constexpr size_t kNumTestCases = sizeof(test_cases) / sizeof(test_cases[0]); + const size_t N = Lanes(d); + HWY_ASSERT(N != 0); + padded = RoundUpTo(kNumTestCases, N); // allow loading whole vectors + auto in = AllocateAligned(padded); + auto expected = AllocateAligned(padded); + size_t i = 0; + for (; i < kNumTestCases; ++i) { + in[i] = test_cases[i]; + } + for (; i < padded; ++i) { + in[i] = 0.0f; + } + return in; +} + +struct TestBF16 { + template + HWY_NOINLINE void operator()(TF32 /*t*/, DF32 d32) { +#if !defined(HWY_EMULATE_SVE) + size_t padded; + auto in = BF16TestCases(d32, padded); + using TBF16 = bfloat16_t; +#if HWY_TARGET == HWY_SCALAR + const Rebind dbf16; // avoid 4/2 = 2 lanes +#else + const Repartition dbf16; +#endif + const Half dbf16_half; + const size_t N = Lanes(d32); + HWY_ASSERT(Lanes(dbf16_half) <= N); + auto temp16 = AllocateAligned(N); + + for (size_t i = 0; i < padded; i += N) { + const auto loaded = Load(d32, &in[i]); + const auto v16 = DemoteTo(dbf16_half, loaded); + Store(v16, dbf16_half, temp16.get()); + const auto v16_loaded = Load(dbf16_half, temp16.get()); + HWY_ASSERT_VEC_EQ(d32, loaded, PromoteTo(d32, v16_loaded)); + } +#else + (void)d32; +#endif + } +}; + +HWY_NOINLINE void TestAllBF16() { ForShrinkableVectors()(float()); } + +struct TestConvertU8 { + template + HWY_NOINLINE void operator()(T /*unused*/, const D du32) { + const Rebind du8; + const auto wrap = Set(du32, 0xFF); + HWY_ASSERT_VEC_EQ(du8, Iota(du8, 0), U8FromU32(And(Iota(du32, 0), wrap))); + HWY_ASSERT_VEC_EQ(du8, Iota(du8, 0x7F), + U8FromU32(And(Iota(du32, 0x7F), wrap))); + } +}; + +HWY_NOINLINE void TestAllConvertU8() { + ForDemoteVectors()(uint32_t()); +} + +template +constexpr bool IsSupportedTruncation() { + return (sizeof(To) < sizeof(From)) && + (Pow2(Rebind()) + 3 >= static_cast(CeilLog2(sizeof(To)))); +} + +struct TestTruncateTo { + template ()>* = nullptr> + HWY_NOINLINE void testTo(From, To, const D) { + // do nothing + } + + template ()>* = nullptr> + HWY_NOINLINE void testTo(From, To, const D d) { + constexpr uint32_t base = 0xFA578D00; + const Rebind dTo; + const auto src = Iota(d, static_cast(base)); + const auto expected = Iota(dTo, static_cast(base)); + const VFromD actual = TruncateTo(dTo, src); + HWY_ASSERT_VEC_EQ(dTo, expected, actual); + } + + template + HWY_NOINLINE void operator()(T from, const D d) { + testTo(from, uint8_t(), d); + testTo(from, uint16_t(), d); + testTo(from, uint32_t(), d); + } +}; + +HWY_NOINLINE void TestAllTruncate() { + ForUnsignedTypes(ForPartialVectors()); +} + +// Separate function to attempt to work around a compiler bug on ARM: when this +// is merged with TestIntFromFloat, outputs match a previous Iota(-(N+1)) input. +struct TestIntFromFloatHuge { + template + HWY_NOINLINE void operator()(TF /*unused*/, const DF df) { + // The ARMv7 manual says that float->int saturates, i.e. chooses the + // nearest representable value. This works correctly on armhf with GCC, but + // not with clang. For reasons unknown, MSVC also runs into an out-of-memory + // error here. +#if HWY_COMPILER_CLANG || HWY_COMPILER_MSVC + (void)df; +#else + using TI = MakeSigned; + const Rebind di; + + // Workaround for incorrect 32-bit GCC codegen for SSSE3 - Print-ing + // the expected lvalue also seems to prevent the issue. + const size_t N = Lanes(df); + auto expected = AllocateAligned(N); + + // Huge positive + Store(Set(di, LimitsMax()), di, expected.get()); + HWY_ASSERT_VEC_EQ(di, expected.get(), ConvertTo(di, Set(df, TF(1E20)))); + + // Huge negative + Store(Set(di, LimitsMin()), di, expected.get()); + HWY_ASSERT_VEC_EQ(di, expected.get(), ConvertTo(di, Set(df, TF(-1E20)))); +#endif + } +}; + +class TestIntFromFloat { + template + static HWY_NOINLINE void TestPowers(TF /*unused*/, const DF df) { + using TI = MakeSigned; + const Rebind di; + constexpr size_t kBits = sizeof(TF) * 8; + + // Powers of two, plus offsets to set some mantissa bits. + const int64_t ofs_table[3] = {0LL, 3LL << (kBits / 2), 1LL << (kBits - 15)}; + for (int sign = 0; sign < 2; ++sign) { + for (size_t shift = 0; shift < kBits - 1; ++shift) { + for (int64_t ofs : ofs_table) { + const int64_t mag = (int64_t{1} << shift) + ofs; + const int64_t val = sign ? mag : -mag; + HWY_ASSERT_VEC_EQ(di, Set(di, static_cast(val)), + ConvertTo(di, Set(df, static_cast(val)))); + } + } + } + } + + template + static HWY_NOINLINE void TestRandom(TF /*unused*/, const DF df) { + using TI = MakeSigned; + const Rebind di; + const size_t N = Lanes(df); + + // TF does not have enough precision to represent TI. + const double min = static_cast(LimitsMin()); + const double max = static_cast(LimitsMax()); + + // Also check random values. + auto from = AllocateAligned(N); + auto expected = AllocateAligned(N); + RandomState rng; + for (size_t rep = 0; rep < AdjustedReps(1000); ++rep) { + for (size_t i = 0; i < N; ++i) { + do { + const uint64_t bits = rng(); + CopyBytes(&bits, &from[i]); // not same size + } while (!std::isfinite(from[i])); + if (from[i] >= max) { + expected[i] = LimitsMax(); + } else if (from[i] <= min) { + expected[i] = LimitsMin(); + } else { + expected[i] = static_cast(from[i]); + } + } + + HWY_ASSERT_VEC_EQ(di, expected.get(), + ConvertTo(di, Load(df, from.get()))); + } + } + + public: + template + HWY_NOINLINE void operator()(TF tf, const DF df) { + using TI = MakeSigned; + const Rebind di; + const size_t N = Lanes(df); + + // Integer positive + HWY_ASSERT_VEC_EQ(di, Iota(di, TI(4)), ConvertTo(di, Iota(df, TF(4.0)))); + + // Integer negative + HWY_ASSERT_VEC_EQ(di, Iota(di, -TI(N)), ConvertTo(di, Iota(df, -TF(N)))); + + // Above positive + HWY_ASSERT_VEC_EQ(di, Iota(di, TI(2)), ConvertTo(di, Iota(df, TF(2.001)))); + + // Below positive + HWY_ASSERT_VEC_EQ(di, Iota(di, TI(3)), ConvertTo(di, Iota(df, TF(3.9999)))); + + const TF eps = static_cast(0.0001); + // Above negative + HWY_ASSERT_VEC_EQ(di, Iota(di, -TI(N)), + ConvertTo(di, Iota(df, -TF(N + 1) + eps))); + + // Below negative + HWY_ASSERT_VEC_EQ(di, Iota(di, -TI(N + 1)), + ConvertTo(di, Iota(df, -TF(N + 1) - eps))); + + TestPowers(tf, df); + TestRandom(tf, df); + } +}; + +HWY_NOINLINE void TestAllIntFromFloat() { + ForFloatTypes(ForPartialVectors()); + ForFloatTypes(ForPartialVectors()); +} + +struct TestFloatFromInt { + template + HWY_NOINLINE void operator()(TF /*unused*/, const DF df) { + using TI = MakeSigned; + const RebindToSigned di; + const size_t N = Lanes(df); + + // Integer positive + HWY_ASSERT_VEC_EQ(df, Iota(df, TF(4.0)), ConvertTo(df, Iota(di, TI(4)))); + + // Integer negative + HWY_ASSERT_VEC_EQ(df, Iota(df, -TF(N)), ConvertTo(df, Iota(di, -TI(N)))); + + // Max positive + HWY_ASSERT_VEC_EQ(df, Set(df, TF(LimitsMax())), + ConvertTo(df, Set(di, LimitsMax()))); + + // Min negative + HWY_ASSERT_VEC_EQ(df, Set(df, TF(LimitsMin())), + ConvertTo(df, Set(di, LimitsMin()))); + } +}; + +HWY_NOINLINE void TestAllFloatFromInt() { + ForFloatTypes(ForPartialVectors()); +} + +struct TestFloatFromUint { + template + HWY_NOINLINE void operator()(TF /*unused*/, const DF df) { + using TU = MakeUnsigned; + const RebindToUnsigned du; + + // Integer positive + HWY_ASSERT_VEC_EQ(df, Iota(df, TF(4.0)), ConvertTo(df, Iota(du, TU(4)))); + HWY_ASSERT_VEC_EQ(df, Iota(df, TF(65535.0)), + ConvertTo(df, Iota(du, 65535))); // 2^16-1 + if (sizeof(TF) > 4) { + HWY_ASSERT_VEC_EQ(df, Iota(df, TF(4294967295.0)), + ConvertTo(df, Iota(du, 4294967295ULL))); // 2^32-1 + } + + // Max positive + HWY_ASSERT_VEC_EQ(df, Set(df, TF(LimitsMax())), + ConvertTo(df, Set(du, LimitsMax()))); + + // Zero + HWY_ASSERT_VEC_EQ(df, Zero(df), ConvertTo(df, Zero(du))); + } +}; + +HWY_NOINLINE void TestAllFloatFromUint() { + ForFloatTypes(ForPartialVectors()); +} + +struct TestI32F64 { + template + HWY_NOINLINE void operator()(TF /*unused*/, const DF df) { + using TI = int32_t; + const Rebind di; + const size_t N = Lanes(df); + + // Integer positive + HWY_ASSERT_VEC_EQ(df, Iota(df, TF(4.0)), PromoteTo(df, Iota(di, TI(4)))); + + // Integer negative + HWY_ASSERT_VEC_EQ(df, Iota(df, -TF(N)), PromoteTo(df, Iota(di, -TI(N)))); + + // Above positive + HWY_ASSERT_VEC_EQ(df, Iota(df, TF(2.0)), PromoteTo(df, Iota(di, TI(2)))); + + // Below positive + HWY_ASSERT_VEC_EQ(df, Iota(df, TF(4.0)), PromoteTo(df, Iota(di, TI(4)))); + + // Above negative + HWY_ASSERT_VEC_EQ(df, Iota(df, TF(-4.0)), PromoteTo(df, Iota(di, TI(-4)))); + + // Below negative + HWY_ASSERT_VEC_EQ(df, Iota(df, TF(-2.0)), PromoteTo(df, Iota(di, TI(-2)))); + + // Max positive int + HWY_ASSERT_VEC_EQ(df, Set(df, TF(LimitsMax())), + PromoteTo(df, Set(di, LimitsMax()))); + + // Min negative int + HWY_ASSERT_VEC_EQ(df, Set(df, TF(LimitsMin())), + PromoteTo(df, Set(di, LimitsMin()))); + } +}; + +HWY_NOINLINE void TestAllI32F64() { +#if HWY_HAVE_FLOAT64 + ForDemoteVectors()(double()); +#endif +} + + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace hwy +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace hwy { +HWY_BEFORE_TEST(HwyConvertTest); +HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllBitCast); +HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllPromoteTo); +HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllF16); +HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllBF16); +HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllConvertU8); +HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllTruncate); +HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllIntFromFloat); +HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllFloatFromInt); +HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllFloatFromUint); +HWY_EXPORT_AND_TEST_P(HwyConvertTest, TestAllI32F64); +} // namespace hwy + +#endif -- cgit v1.2.3