1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
|
// Copyright 2020 Google LLC
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Per-target definitions shared by ops/*.h and user code.
// IWYU pragma: begin_exports
// Export does not seem to be recursive, so re-export these (also in base.h)
#include <stddef.h>
#include "hwy/base.h"
// "IWYU pragma: keep" does not work for this include, so hide it from the IDE.
#if !HWY_IDE
#include <stdint.h>
#endif
#include "hwy/detect_compiler_arch.h"
// Separate header because foreach_target.h re-enables its include guard.
#include "hwy/ops/set_macros-inl.h"
// IWYU pragma: end_exports
#if HWY_IS_MSAN
#include <sanitizer/msan_interface.h>
#endif
// We are covered by the highway.h include guard, but generic_ops-inl.h
// includes this again #if HWY_IDE.
#if defined(HIGHWAY_HWY_OPS_SHARED_TOGGLE) == \
defined(HWY_TARGET_TOGGLE)
#ifdef HIGHWAY_HWY_OPS_SHARED_TOGGLE
#undef HIGHWAY_HWY_OPS_SHARED_TOGGLE
#else
#define HIGHWAY_HWY_OPS_SHARED_TOGGLE
#endif
HWY_BEFORE_NAMESPACE();
namespace hwy {
namespace HWY_NAMESPACE {
// NOTE: GCC generates incorrect code for vector arguments to non-inlined
// functions in two situations:
// - on Windows and GCC 10.3, passing by value crashes due to unaligned loads:
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412.
// - on aarch64 and GCC 9.3.0 or 11.2.1, passing by value causes many (but not
// all) tests to fail.
//
// We therefore pass by const& only on GCC and (Windows or aarch64). This alias
// must be used for all vector/mask parameters of functions marked HWY_NOINLINE,
// and possibly also other functions that are not inlined.
#if HWY_COMPILER_GCC_ACTUAL && (HWY_OS_WIN || HWY_ARCH_ARM_A64)
template <class V>
using VecArg = const V&;
#else
template <class V>
using VecArg = V;
#endif
namespace detail {
// Returns N * 2^pow2. N is the number of lanes in a full vector and pow2 the
// desired fraction or multiple of it, see Simd<>. `pow2` is most often in
// [-3, 3] but can also be lower for user-specified fractions.
constexpr size_t ScaleByPower(size_t N, int pow2) {
return pow2 >= 0 ? (N << pow2) : (N >> (-pow2));
}
template <typename T>
HWY_INLINE void MaybeUnpoison(T* HWY_RESTRICT unaligned, size_t count) {
// Workaround for MSAN not marking compressstore as initialized (b/233326619)
#if HWY_IS_MSAN
__msan_unpoison(unaligned, count * sizeof(T));
#else
(void)unaligned;
(void)count;
#endif
}
} // namespace detail
// Highway operations are implemented as overloaded functions selected using a
// zero-sized tag type D := Simd<T, N, kPow2>. T denotes the lane type.
//
// N defines how many lanes are in a 'full' vector, typically equal to
// HWY_LANES(T) (which is the actual count on targets with vectors of known
// size, and an upper bound in case of scalable vectors), otherwise a
// user-specified limit at most that large.
//
// 2^kPow2 is a _subsequently_ applied scaling factor that indicates the
// desired fraction of a 'full' vector: 0 means full, -1 means half; 1,2,3
// means two/four/eight full vectors ganged together. The largest supported
// kPow2 is `HWY_MAX_POW2` and the aliases below take care of clamping
// user-specified values to that. Note that `Simd<T, 1, 0>` and `Simd<T, 2, -1>`
// have the same `MaxLanes` and `Lanes`.
//
// We can theoretically keep halving Lanes(), but recursive instantiations of
// kPow2 - 1 will eventually fail e.g. because -64 is not a valid shift count.
// Users must terminate such compile-time recursions at or above HWY_MIN_POW2.
//
// WARNING: do not use N directly because it may be a special representation of
// a fractional MaxLanes. This arises when we Rebind Simd<uint8_t, 1, 0> to
// Simd<uint32_t, ??, 2>. RVV requires that the last argument (kPow2) be two,
// but we want MaxLanes to be the same in both cases. Hence ?? is a
// fixed-point encoding of 1/4.
//
// Instead of referring to Simd<> directly, users create D via aliases:
// - ScalableTag<T> for a full vector;
// - ScalableTag<T, kPow2>() for a fraction/group, where `kPow2` is
// interpreted as `HWY_MIN(kPow2, HWY_MAX_POW2)`;
// - CappedTag<T, kLimit> for a vector with up to kLimit lanes; or
// - FixedTag<T, kNumLanes> for a vector with exactly kNumLanes lanes.
//
// Instead of N, use Lanes(D()) for the actual number of lanes at runtime and
// D().MaxLanes() for a constexpr upper bound. Both are powers of two.
template <typename Lane, size_t N, int kPow2>
struct Simd {
constexpr Simd() = default;
using T = Lane;
private:
static_assert(sizeof(Lane) <= 8, "Lanes are up to 64-bit");
// 20 bits are sufficient for any HWY_MAX_BYTES. This is the 'normal' value of
// N when kFrac == 0, otherwise it is one (see FracN).
static constexpr size_t kWhole = N & 0xFFFFF;
// Fractional part is in the bits above kWhole.
static constexpr int kFrac = static_cast<int>(N >> 20);
// Can be 8x larger because kPow2 may be as low as -3 (Rebind of a larger
// type to u8 results in fractions).
static_assert(kWhole <= 8 * HWY_MAX_N && kFrac <= 3, "Out of range");
static_assert(kFrac == 0 || kWhole == 1, "If frac, whole must be 1");
static_assert((kWhole & (kWhole - 1)) == 0 && kWhole != 0, "Not 2^x");
// Important to check this here because kPow2 <= -64 causes confusing
// compile errors (invalid shift count).
static_assert(kPow2 >= HWY_MIN_POW2, "Forgot kPow2 recursion terminator?");
// However, do NOT verify kPow2 <= HWY_MAX_POW2 - users should be able to
// Rebind<uint64_t, ScalableTag<uint8_t, 3>> in order to discover that its
// kPow2 is out of bounds.
public:
// Upper bound on the number of lanes (tight if !HWY_HAVE_SCALABLE). In the
// common case, N == kWhole, but if kFrac is nonzero, we deduct it from kPow2.
// E.g. Rebind<uint32_t, Simd<uint8_t, 1, 0>> is Simd<uint32_t, 0x200001, 2>.
// The resulting number of lanes is still 1 because this N represents 1/4
// (the ratio of the sizes). Note that RVV requires kPow2 to be the ratio of
// the sizes so that the correct LMUL overloads are chosen, even if N is
// small enough that it would fit in an LMUL=1 vector.
//
// Cannot be an enum because GCC warns when using enums and non-enums in the
// same expression. Cannot be a static constexpr function (MSVC limitation).
// Rounded up to one so this is a valid array length.
//
// Do not use this directly - only 'public' so it is visible from the accessor
// macro required by MSVC.
static constexpr size_t kPrivateLanes =
HWY_MAX(size_t{1}, detail::ScaleByPower(kWhole, kPow2 - kFrac));
constexpr size_t MaxLanes() const { return kPrivateLanes; }
constexpr size_t MaxBytes() const { return kPrivateLanes * sizeof(Lane); }
// For SFINAE on RVV.
constexpr int Pow2() const { return kPow2; }
// ------------------------------ Changing lane type or count
// Do not use any of these directly. Anything used from member typedefs cannot
// be made private, but functions only used within other functions can.
// Returns number of NewT lanes that fit within MaxBytes().
template <typename NewT>
static constexpr size_t RepartitionLanes() {
// Round up to correctly handle larger NewT.
return (kPrivateLanes * sizeof(T) + sizeof(NewT) - 1) / sizeof(NewT);
}
// Returns the new kPow2 required for lanes of type NewT.
template <typename NewT>
static constexpr int RebindPow2() {
return kPow2 +
((sizeof(NewT) >= sizeof(T))
? static_cast<int>(CeilLog2(sizeof(NewT) / sizeof(T)))
: -static_cast<int>(CeilLog2(sizeof(T) / sizeof(NewT))));
}
private:
// Returns 0 or whole NewN such that kNewMaxLanes = NewN * 2^kNewPow2.
template <int kNewPow2, size_t kNewMaxLanes>
static constexpr size_t WholeN() {
return detail::ScaleByPower(kNewMaxLanes, -kNewPow2);
}
// Returns fractional NewN such that kNewMaxLanes = NewN * 2^kNewPow2.
template <int kNewPow2, size_t kNewMaxLanes>
static constexpr size_t FracN() {
// Only reached if kNewPow2 > CeilLog2(kNewMaxLanes) >= 0 (else WholeN
// would not have been zero), but clamp to zero to avoid warnings. kFrac is
// the difference, stored in the upper bits of N, and we also set kWhole =
// 1 so that the new kPrivateLanes = kNewMaxLanes.
static_assert(HWY_MAX_N <= (size_t{1} << 20), "Change bit shift");
return static_cast<size_t>(
1 + (HWY_MAX(0, kNewPow2 - static_cast<int>(CeilLog2(kNewMaxLanes)))
<< 20));
}
public:
// Returns (whole or fractional) NewN, see above.
template <int kNewPow2, size_t kNewMaxLanes>
static constexpr size_t NewN() {
// We require a fraction if inverting kNewPow2 results in 0.
return WholeN<kNewPow2, kNewMaxLanes>() == 0
? FracN<kNewPow2, kNewMaxLanes>()
: WholeN<kNewPow2, kNewMaxLanes>();
}
// PromoteTo/DemoteTo() with another lane type, but same number of lanes.
template <typename NewT>
using Rebind =
Simd<NewT, NewN<RebindPow2<NewT>(), kPrivateLanes>(), RebindPow2<NewT>()>;
// Change lane type while keeping the same vector size, e.g. for MulEven.
template <typename NewT>
using Repartition =
Simd<NewT, NewN<kPow2, RepartitionLanes<NewT>()>(), kPow2>;
// Half the lanes while keeping the same lane type, e.g. for LowerHalf.
using Half = Simd<T, N, kPow2 - 1>;
// Twice the lanes while keeping the same lane type, e.g. for Combine.
using Twice = Simd<T, N, kPow2 + 1>;
};
namespace detail {
template <typename T, size_t N, int kPow2>
constexpr bool IsFull(Simd<T, N, kPow2> /* d */) {
return N == HWY_LANES(T) && kPow2 == 0;
}
// Struct wrappers enable validation of arguments via static_assert.
template <typename T, size_t N, int kPow2>
struct ClampNAndPow2 {
using type = Simd<T, HWY_MIN(N, HWY_MAX_N), HWY_MIN(kPow2, HWY_MAX_POW2)>;
};
template <typename T, int kPow2>
struct ScalableTagChecker {
using type = typename ClampNAndPow2<T, HWY_LANES(T), kPow2>::type;
};
template <typename T, size_t kLimit, int kPow2>
struct CappedTagChecker {
static_assert(kLimit != 0, "Does not make sense to have zero lanes");
// Safely handle non-power-of-two inputs by rounding down, which is allowed by
// CappedTag. Otherwise, Simd<T, 3, 0> would static_assert.
static constexpr size_t kLimitPow2 = size_t{1} << hwy::FloorLog2(kLimit);
static constexpr size_t N = HWY_MIN(kLimitPow2, HWY_LANES(T));
using type = typename ClampNAndPow2<T, N, kPow2>::type;
};
template <typename T, size_t kNumLanes>
struct FixedTagChecker {
static_assert(kNumLanes != 0, "Does not make sense to have zero lanes");
static_assert(kNumLanes <= HWY_LANES(T), "Too many lanes");
using type = Simd<T, kNumLanes, 0>;
};
} // namespace detail
// ------------------------------ Aliases for Simd<>
// Tag describing a full vector (kPow2 == 0: the most common usage, e.g. 1D
// loops where the application does not care about the vector size) or a
// fraction/multiple of one. Fractions (kPow2 < 0) are useful for arguments or
// return values of type promotion and demotion. User-specified kPow2 is
// interpreted as `HWY_MIN(kPow2, HWY_MAX_POW2)`.
template <typename T, int kPow2 = 0>
using ScalableTag = typename detail::ScalableTagChecker<T, kPow2>::type;
// Tag describing a vector with *up to* kLimit active lanes, even on targets
// with scalable vectors and HWY_SCALAR. The runtime lane count `Lanes(tag)` may
// be less than kLimit, and is 1 on HWY_SCALAR. This alias is typically used for
// 1D loops with a relatively low application-defined upper bound, e.g. for 8x8
// DCTs. However, it is better if data structures are designed to be
// vector-length-agnostic (e.g. a hybrid SoA where there are chunks of `M >=
// MaxLanes(d)` DC components followed by M AC1, .., and M AC63; this would
// enable vector-length-agnostic loops using ScalableTag). User-specified kPow2
// is interpreted as `HWY_MIN(kPow2, HWY_MAX_POW2)`.
template <typename T, size_t kLimit, int kPow2 = 0>
using CappedTag = typename detail::CappedTagChecker<T, kLimit, kPow2>::type;
#if !HWY_HAVE_SCALABLE
// If the vector size is known, and the app knows it does not want more than
// kLimit lanes, then capping can be beneficial. For example, AVX-512 has lower
// IPC and potentially higher costs for unaligned load/store vs. 256-bit AVX2.
template <typename T, size_t kLimit, int kPow2 = 0>
using CappedTagIfFixed = CappedTag<T, kLimit, kPow2>;
#else // HWY_HAVE_SCALABLE
// .. whereas on RVV/SVE, the cost of clamping Lanes() may exceed the benefit.
template <typename T, size_t kLimit, int kPow2 = 0>
using CappedTagIfFixed = ScalableTag<T, kPow2>;
#endif
// Alias for a tag describing a vector with *exactly* kNumLanes active lanes,
// even on targets with scalable vectors. Requires `kNumLanes` to be a power of
// two not exceeding `HWY_LANES(T)`.
//
// NOTE: if the application does not need to support HWY_SCALAR (+), use this
// instead of CappedTag to emphasize that there will be exactly kNumLanes lanes.
// This is useful for data structures that rely on exactly 128-bit SIMD, but
// these are discouraged because they cannot benefit from wider vectors.
// Instead, applications would ideally define a larger problem size and loop
// over it with the (unknown size) vectors from ScalableTag.
//
// + e.g. if the baseline is known to support SIMD, or the application requires
// ops such as TableLookupBytes not supported by HWY_SCALAR.
template <typename T, size_t kNumLanes>
using FixedTag = typename detail::FixedTagChecker<T, kNumLanes>::type;
// Convenience form for fixed sizes.
template <typename T>
using Full16 = Simd<T, 2 / sizeof(T), 0>;
template <typename T>
using Full32 = Simd<T, 4 / sizeof(T), 0>;
template <typename T>
using Full64 = Simd<T, 8 / sizeof(T), 0>;
template <typename T>
using Full128 = Simd<T, 16 / sizeof(T), 0>;
// ------------------------------ Accessors for Simd<>
// Lane type.
template <class D>
using TFromD = typename D::T;
// Upper bound on the number of lanes, typically used for SFINAE conditions and
// to allocate storage for targets with known vector sizes. Note: this may be a
// loose bound, instead use Lanes() as the actual size for AllocateAligned.
// MSVC workaround: use static constant directly instead of a function.
#define HWY_MAX_LANES_D(D) D::kPrivateLanes
// Non-macro form of HWY_MAX_LANES_D in case that is preferable. WARNING: the
// macro form may be required for MSVC, which has limitations on deducing
// arguments.
template <class D>
HWY_INLINE HWY_MAYBE_UNUSED constexpr size_t MaxLanes(D) {
return HWY_MAX_LANES_D(D);
}
#if !HWY_HAVE_SCALABLE
// If non-scalable, this is constexpr; otherwise the target's header defines a
// non-constexpr version of this function. This is the actual vector length,
// used when advancing loop counters.
template <class D>
HWY_INLINE HWY_MAYBE_UNUSED constexpr size_t Lanes(D) {
return HWY_MAX_LANES_D(D);
}
#endif // !HWY_HAVE_SCALABLE
// Tag for the same number of lanes as D, but with the LaneType T.
template <class T, class D>
using Rebind = typename D::template Rebind<T>;
template <class D>
using RebindToSigned = Rebind<MakeSigned<TFromD<D>>, D>;
template <class D>
using RebindToUnsigned = Rebind<MakeUnsigned<TFromD<D>>, D>;
template <class D>
using RebindToFloat = Rebind<MakeFloat<TFromD<D>>, D>;
// Tag for the same total size as D, but with the LaneType T.
template <class T, class D>
using Repartition = typename D::template Repartition<T>;
template <class D>
using RepartitionToWide = Repartition<MakeWide<TFromD<D>>, D>;
template <class D>
using RepartitionToNarrow = Repartition<MakeNarrow<TFromD<D>>, D>;
// Tag for the same lane type as D, but half the lanes.
template <class D>
using Half = typename D::Half;
// Tag for the same lane type as D, but twice the lanes.
template <class D>
using Twice = typename D::Twice;
// ------------------------------ Choosing overloads (SFINAE)
// Same as base.h macros but with a Simd<T, N, kPow2> argument instead of T.
#define HWY_IF_UNSIGNED_D(D) HWY_IF_UNSIGNED(TFromD<D>)
#define HWY_IF_SIGNED_D(D) HWY_IF_SIGNED(TFromD<D>)
#define HWY_IF_FLOAT_D(D) HWY_IF_FLOAT(TFromD<D>)
#define HWY_IF_NOT_FLOAT_D(D) HWY_IF_NOT_FLOAT(TFromD<D>)
#define HWY_IF_SPECIAL_FLOAT_D(D) HWY_IF_SPECIAL_FLOAT(TFromD<D>)
#define HWY_IF_NOT_SPECIAL_FLOAT_D(D) HWY_IF_NOT_SPECIAL_FLOAT(TFromD<D>)
#define HWY_IF_T_SIZE_D(D, bytes) HWY_IF_T_SIZE(TFromD<D>, bytes)
#define HWY_IF_NOT_T_SIZE_D(D, bytes) HWY_IF_NOT_T_SIZE(TFromD<D>, bytes)
#define HWY_IF_T_SIZE_ONE_OF_D(D, bit_array) \
HWY_IF_T_SIZE_ONE_OF(TFromD<D>, bit_array)
#define HWY_IF_LANES_D(D, lanes) HWY_IF_LANES(HWY_MAX_LANES_D(D), lanes)
#define HWY_IF_LANES_LE_D(D, lanes) HWY_IF_LANES_LE(HWY_MAX_LANES_D(D), lanes)
#define HWY_IF_LANES_GT_D(D, lanes) HWY_IF_LANES_GT(HWY_MAX_LANES_D(D), lanes)
#define HWY_IF_LANES_PER_BLOCK_D(D, lanes) \
HWY_IF_LANES_PER_BLOCK( \
TFromD<D>, HWY_MIN(HWY_MAX_LANES_D(D), 16 / sizeof(TFromD<D>)), lanes)
#define HWY_IF_POW2_LE_D(D, pow2) hwy::EnableIf<D().Pow2() <= pow2>* = nullptr
#define HWY_IF_POW2_GT_D(D, pow2) hwy::EnableIf<(D().Pow2() > pow2)>* = nullptr
#define HWY_IF_U8_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint8_t>()>* = nullptr
#define HWY_IF_U16_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint16_t>()>* = nullptr
#define HWY_IF_U32_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint32_t>()>* = nullptr
#define HWY_IF_U64_D(D) hwy::EnableIf<IsSame<TFromD<D>, uint64_t>()>* = nullptr
#define HWY_IF_I8_D(D) hwy::EnableIf<IsSame<TFromD<D>, int8_t>()>* = nullptr
#define HWY_IF_I16_D(D) hwy::EnableIf<IsSame<TFromD<D>, int16_t>()>* = nullptr
#define HWY_IF_I32_D(D) hwy::EnableIf<IsSame<TFromD<D>, int32_t>()>* = nullptr
#define HWY_IF_I64_D(D) hwy::EnableIf<IsSame<TFromD<D>, int64_t>()>* = nullptr
// Use instead of HWY_IF_T_SIZE_D to avoid ambiguity with float/double
// overloads.
#define HWY_IF_UI32_D(D) \
hwy::EnableIf<IsSame<TFromD<D>, uint32_t>() || \
IsSame<TFromD<D>, int32_t>()>* = nullptr
#define HWY_IF_UI64_D(D) \
hwy::EnableIf<IsSame<TFromD<D>, uint64_t>() || \
IsSame<TFromD<D>, int64_t>()>* = nullptr
#define HWY_IF_BF16_D(D) \
hwy::EnableIf<IsSame<TFromD<D>, bfloat16_t>()>* = nullptr
#define HWY_IF_F16_D(D) hwy::EnableIf<IsSame<TFromD<D>, float16_t>()>* = nullptr
#define HWY_IF_F32_D(D) hwy::EnableIf<IsSame<TFromD<D>, float>()>* = nullptr
#define HWY_IF_F64_D(D) hwy::EnableIf<IsSame<TFromD<D>, double>()>* = nullptr
#define HWY_IF_V_SIZE_D(D, bytes) \
HWY_IF_V_SIZE(TFromD<D>, HWY_MAX_LANES_D(D), bytes)
#define HWY_IF_V_SIZE_LE_D(D, bytes) \
HWY_IF_V_SIZE_LE(TFromD<D>, HWY_MAX_LANES_D(D), bytes)
#define HWY_IF_V_SIZE_GT_D(D, bytes) \
HWY_IF_V_SIZE_GT(TFromD<D>, HWY_MAX_LANES_D(D), bytes)
// Same, but with a vector argument. ops/*-inl.h define their own TFromV.
#define HWY_IF_UNSIGNED_V(V) HWY_IF_UNSIGNED(TFromV<V>)
#define HWY_IF_SIGNED_V(V) HWY_IF_SIGNED(TFromV<V>)
#define HWY_IF_FLOAT_V(V) HWY_IF_FLOAT(TFromV<V>)
#define HWY_IF_NOT_FLOAT_V(V) HWY_IF_NOT_FLOAT(TFromV<V>)
#define HWY_IF_NOT_FLOAT_NOR_SPECIAL_V(V) \
HWY_IF_NOT_FLOAT_NOR_SPECIAL(TFromV<V>)
#define HWY_IF_T_SIZE_V(V, bytes) HWY_IF_T_SIZE(TFromV<V>, bytes)
#define HWY_IF_NOT_T_SIZE_V(V, bytes) HWY_IF_NOT_T_SIZE(TFromV<V>, bytes)
#define HWY_IF_T_SIZE_ONE_OF_V(V, bit_array) \
HWY_IF_T_SIZE_ONE_OF(TFromV<V>, bit_array)
#define HWY_MAX_LANES_V(V) HWY_MAX_LANES_D(DFromV<V>)
#define HWY_IF_V_SIZE_V(V, bytes) \
HWY_IF_V_SIZE(TFromV<V>, HWY_MAX_LANES_V(V), bytes)
#define HWY_IF_V_SIZE_LE_V(V, bytes) \
HWY_IF_V_SIZE_LE(TFromV<V>, HWY_MAX_LANES_V(V), bytes)
#define HWY_IF_V_SIZE_GT_V(V, bytes) \
HWY_IF_V_SIZE_GT(TFromV<V>, HWY_MAX_LANES_V(V), bytes)
// Old names (deprecated)
#define HWY_IF_LANE_SIZE_D(D, bytes) HWY_IF_T_SIZE_D(D, bytes)
#define HWY_IF_NOT_LANE_SIZE_D(D, bytes) HWY_IF_NOT_T_SIZE_D(D, bytes)
// NOLINTNEXTLINE(google-readability-namespace-comments)
} // namespace HWY_NAMESPACE
} // namespace hwy
HWY_AFTER_NAMESPACE();
#endif // HIGHWAY_HWY_OPS_SHARED_TOGGLE
|