summaryrefslogtreecommitdiffstats
path: root/third_party/highway/hwy/tests/blockwise_shift_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/highway/hwy/tests/blockwise_shift_test.cc')
-rw-r--r--third_party/highway/hwy/tests/blockwise_shift_test.cc270
1 files changed, 270 insertions, 0 deletions
diff --git a/third_party/highway/hwy/tests/blockwise_shift_test.cc b/third_party/highway/hwy/tests/blockwise_shift_test.cc
new file mode 100644
index 0000000000..4e5250841b
--- /dev/null
+++ b/third_party/highway/hwy/tests/blockwise_shift_test.cc
@@ -0,0 +1,270 @@
+// Copyright 2019 Google LLC
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h> // memcpy
+
+#include <algorithm> // std::fill
+
+#undef HWY_TARGET_INCLUDE
+#define HWY_TARGET_INCLUDE "tests/blockwise_shift_test.cc"
+#include "hwy/foreach_target.h" // IWYU pragma: keep
+#include "hwy/highway.h"
+#include "hwy/tests/test_util-inl.h"
+
+HWY_BEFORE_NAMESPACE();
+namespace hwy {
+namespace HWY_NAMESPACE {
+
+struct TestShiftBytes {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ // Scalar does not define Shift*Bytes.
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+ const Repartition<uint8_t, D> du8;
+ const size_t N8 = Lanes(du8);
+
+ // Zero remains zero
+ const auto v0 = Zero(d);
+ HWY_ASSERT_VEC_EQ(d, v0, ShiftLeftBytes<1>(v0));
+ HWY_ASSERT_VEC_EQ(d, v0, ShiftLeftBytes<1>(d, v0));
+ HWY_ASSERT_VEC_EQ(d, v0, ShiftRightBytes<1>(d, v0));
+
+ // Zero after shifting out the high/low byte
+ auto bytes = AllocateAligned<uint8_t>(N8);
+ std::fill(bytes.get(), bytes.get() + N8, 0);
+ bytes[N8 - 1] = 0x7F;
+ const auto vhi = BitCast(d, Load(du8, bytes.get()));
+ bytes[N8 - 1] = 0;
+ bytes[0] = 0x7F;
+ const auto vlo = BitCast(d, Load(du8, bytes.get()));
+ HWY_ASSERT_VEC_EQ(d, v0, ShiftLeftBytes<1>(vhi));
+ HWY_ASSERT_VEC_EQ(d, v0, ShiftLeftBytes<1>(d, vhi));
+ HWY_ASSERT_VEC_EQ(d, v0, ShiftRightBytes<1>(d, vlo));
+
+ // Check expected result with Iota
+ const size_t N = Lanes(d);
+ auto in = AllocateAligned<T>(N);
+ const uint8_t* in_bytes = reinterpret_cast<const uint8_t*>(in.get());
+ const auto v = BitCast(d, Iota(du8, 1));
+ Store(v, d, in.get());
+
+ auto expected = AllocateAligned<T>(N);
+ uint8_t* expected_bytes = reinterpret_cast<uint8_t*>(expected.get());
+
+ const size_t block_size = HWY_MIN(N8, 16);
+ for (size_t block = 0; block < N8; block += block_size) {
+ expected_bytes[block] = 0;
+ memcpy(expected_bytes + block + 1, in_bytes + block, block_size - 1);
+ }
+ HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftBytes<1>(v));
+ HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftBytes<1>(d, v));
+
+ for (size_t block = 0; block < N8; block += block_size) {
+ memcpy(expected_bytes + block, in_bytes + block + 1, block_size - 1);
+ expected_bytes[block + block_size - 1] = 0;
+ }
+ HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightBytes<1>(d, v));
+#else
+ (void)d;
+#endif // #if HWY_TARGET != HWY_SCALAR
+ }
+};
+
+HWY_NOINLINE void TestAllShiftBytes() {
+ ForIntegerTypes(ForPartialVectors<TestShiftBytes>());
+}
+
+struct TestShiftLeftLanes {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ // Scalar does not define Shift*Lanes.
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+ const auto v = Iota(d, T(1));
+ const size_t N = Lanes(d);
+ if (N == 1) return;
+ auto expected = AllocateAligned<T>(N);
+
+ HWY_ASSERT_VEC_EQ(d, v, ShiftLeftLanes<0>(v));
+ HWY_ASSERT_VEC_EQ(d, v, ShiftLeftLanes<0>(d, v));
+
+ constexpr size_t kLanesPerBlock = 16 / sizeof(T);
+
+ for (size_t i = 0; i < N; ++i) {
+ expected[i] = (i % kLanesPerBlock) == 0 ? T(0) : T(i);
+ }
+ HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftLanes<1>(v));
+ HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftLeftLanes<1>(d, v));
+#else
+ (void)d;
+#endif // #if HWY_TARGET != HWY_SCALAR
+ }
+};
+
+struct TestShiftRightLanes {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T /*unused*/, D d) {
+ // Scalar does not define Shift*Lanes.
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+ const auto v = Iota(d, T(1));
+ const size_t N = Lanes(d);
+ if (N == 1) return;
+ auto expected = AllocateAligned<T>(N);
+
+ HWY_ASSERT_VEC_EQ(d, v, ShiftRightLanes<0>(d, v));
+
+ constexpr size_t kLanesPerBlock = 16 / sizeof(T);
+
+ for (size_t i = 0; i < N; ++i) {
+ const size_t mod = i % kLanesPerBlock;
+ expected[i] = mod == (kLanesPerBlock - 1) || i >= N - 1 ? T(0) : T(2 + i);
+ }
+ HWY_ASSERT_VEC_EQ(d, expected.get(), ShiftRightLanes<1>(d, v));
+#else
+ (void)d;
+#endif // #if HWY_TARGET != HWY_SCALAR
+ }
+};
+
+HWY_NOINLINE void TestAllShiftLeftLanes() {
+ ForAllTypes(ForPartialVectors<TestShiftLeftLanes>());
+}
+
+HWY_NOINLINE void TestAllShiftRightLanes() {
+ ForAllTypes(ForPartialVectors<TestShiftRightLanes>());
+}
+
+// Scalar does not define CombineShiftRightBytes.
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+
+template <int kBytes>
+struct TestCombineShiftRightBytes {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T, D d) {
+ constexpr size_t kBlockSize = 16;
+ static_assert(kBytes < kBlockSize, "Shift count is per block");
+ const Repartition<uint8_t, D> d8;
+ const size_t N8 = Lanes(d8);
+ if (N8 < 16) return;
+ auto hi_bytes = AllocateAligned<uint8_t>(N8);
+ auto lo_bytes = AllocateAligned<uint8_t>(N8);
+ auto expected_bytes = AllocateAligned<uint8_t>(N8);
+ uint8_t combined[2 * kBlockSize];
+
+ // Random inputs in each lane
+ RandomState rng;
+ for (size_t rep = 0; rep < AdjustedReps(100); ++rep) {
+ for (size_t i = 0; i < N8; ++i) {
+ hi_bytes[i] = static_cast<uint8_t>(Random64(&rng) & 0xFF);
+ lo_bytes[i] = static_cast<uint8_t>(Random64(&rng) & 0xFF);
+ }
+ for (size_t i = 0; i < N8; i += kBlockSize) {
+ // Arguments are not the same size.
+ CopyBytes<kBlockSize>(&lo_bytes[i], combined);
+ CopyBytes<kBlockSize>(&hi_bytes[i], combined + kBlockSize);
+ CopyBytes<kBlockSize>(combined + kBytes, &expected_bytes[i]);
+ }
+
+ const auto hi = BitCast(d, Load(d8, hi_bytes.get()));
+ const auto lo = BitCast(d, Load(d8, lo_bytes.get()));
+ const auto expected = BitCast(d, Load(d8, expected_bytes.get()));
+ HWY_ASSERT_VEC_EQ(d, expected, CombineShiftRightBytes<kBytes>(d, hi, lo));
+ }
+ }
+};
+
+template <int kLanes>
+struct TestCombineShiftRightLanes {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T, D d) {
+ const Repartition<uint8_t, D> d8;
+ const size_t N8 = Lanes(d8);
+ if (N8 < 16) return;
+
+ auto hi_bytes = AllocateAligned<uint8_t>(N8);
+ auto lo_bytes = AllocateAligned<uint8_t>(N8);
+ auto expected_bytes = AllocateAligned<uint8_t>(N8);
+ constexpr size_t kBlockSize = 16;
+ uint8_t combined[2 * kBlockSize];
+
+ // Random inputs in each lane
+ RandomState rng;
+ for (size_t rep = 0; rep < AdjustedReps(100); ++rep) {
+ for (size_t i = 0; i < N8; ++i) {
+ hi_bytes[i] = static_cast<uint8_t>(Random64(&rng) & 0xFF);
+ lo_bytes[i] = static_cast<uint8_t>(Random64(&rng) & 0xFF);
+ }
+ for (size_t i = 0; i < N8; i += kBlockSize) {
+ // Arguments are not the same size.
+ CopyBytes<kBlockSize>(&lo_bytes[i], combined);
+ CopyBytes<kBlockSize>(&hi_bytes[i], combined + kBlockSize);
+ CopyBytes<kBlockSize>(combined + kLanes * sizeof(T),
+ &expected_bytes[i]);
+ }
+
+ const auto hi = BitCast(d, Load(d8, hi_bytes.get()));
+ const auto lo = BitCast(d, Load(d8, lo_bytes.get()));
+ const auto expected = BitCast(d, Load(d8, expected_bytes.get()));
+ HWY_ASSERT_VEC_EQ(d, expected, CombineShiftRightLanes<kLanes>(d, hi, lo));
+ }
+ }
+};
+
+#endif // #if HWY_TARGET != HWY_SCALAR
+
+struct TestCombineShiftRight {
+ template <class T, class D>
+ HWY_NOINLINE void operator()(T t, D d) {
+// Scalar does not define CombineShiftRightBytes.
+#if HWY_TARGET != HWY_SCALAR || HWY_IDE
+ constexpr int kMaxBytes =
+ HWY_MIN(16, static_cast<int>(MaxLanes(d) * sizeof(T)));
+ constexpr int kMaxLanes = kMaxBytes / static_cast<int>(sizeof(T));
+ TestCombineShiftRightBytes<kMaxBytes - 1>()(t, d);
+ TestCombineShiftRightBytes<HWY_MAX(kMaxBytes / 2, 1)>()(t, d);
+ TestCombineShiftRightBytes<1>()(t, d);
+
+ TestCombineShiftRightLanes<kMaxLanes - 1>()(t, d);
+ TestCombineShiftRightLanes<HWY_MAX(kMaxLanes / 2, -1)>()(t, d);
+ TestCombineShiftRightLanes<1>()(t, d);
+#else
+ (void)t;
+ (void)d;
+#endif
+ }
+};
+
+HWY_NOINLINE void TestAllCombineShiftRight() {
+ // Need at least 2 lanes.
+ ForAllTypes(ForShrinkableVectors<TestCombineShiftRight>());
+}
+
+// NOLINTNEXTLINE(google-readability-namespace-comments)
+} // namespace HWY_NAMESPACE
+} // namespace hwy
+HWY_AFTER_NAMESPACE();
+
+#if HWY_ONCE
+
+namespace hwy {
+HWY_BEFORE_TEST(HwyBlockwiseShiftTest);
+HWY_EXPORT_AND_TEST_P(HwyBlockwiseShiftTest, TestAllShiftBytes);
+HWY_EXPORT_AND_TEST_P(HwyBlockwiseShiftTest, TestAllShiftLeftLanes);
+HWY_EXPORT_AND_TEST_P(HwyBlockwiseShiftTest, TestAllShiftRightLanes);
+HWY_EXPORT_AND_TEST_P(HwyBlockwiseShiftTest, TestAllCombineShiftRight);
+} // namespace hwy
+
+#endif