diff options
Diffstat (limited to 'third_party/simde/simde/arm/sve/types.h')
-rw-r--r-- | third_party/simde/simde/arm/sve/types.h | 915 |
1 files changed, 915 insertions, 0 deletions
diff --git a/third_party/simde/simde/arm/sve/types.h b/third_party/simde/simde/arm/sve/types.h new file mode 100644 index 0000000000..f0579d96c8 --- /dev/null +++ b/third_party/simde/simde/arm/sve/types.h @@ -0,0 +1,915 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2021 Evan Nemerson <evan@nemerson.com> + */ + +/* TODO: SVE2 is going to be a bit awkward with this setup. We currently + * either use SVE vectors or assume that the vector length is known at + * compile-time. For CPUs which provide SVE but not SVE2 we're going + * to be getting scalable vectors, so we may need to loop through them. + * + * Currently I'm thinking we'll have a separate function for non-SVE + * types. We can call that function in a loop from an SVE version, + * and we can call it once from a resolver. + * + * Unfortunately this is going to mean a lot of boilerplate for SVE, + * which already has several variants of a lot of functions (*_z, *_m, + * etc.), plus overloaded functions in C++ and generic selectors in C. + * + * Anyways, all this means that we're going to need to always define + * the portable types. + * + * The good news is that at least we don't have to deal with + * to/from_private functions; since the no-SVE versions will only be + * called with non-SVE params. */ + +#if !defined(SIMDE_ARM_SVE_TYPES_H) +#define SIMDE_ARM_SVE_TYPES_H + +#include "../../simde-common.h" +#include "../../simde-f16.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_VECTOR_SUBSCRIPT) + #define SIMDE_ARM_SVE_DECLARE_VECTOR(Element_Type, Name, Vector_Size) Element_Type Name SIMDE_VECTOR(Vector_Size) +#else + #define SIMDE_ARM_SVE_DECLARE_VECTOR(Element_Type, Name, Vector_Size) Element_Type Name[(Vector_Size) / sizeof(Element_Type)] +#endif + +#if defined(SIMDE_ARM_SVE_NATIVE) + typedef svbool_t simde_svbool_t; + typedef svint8_t simde_svint8_t; + typedef svint16_t simde_svint16_t; + typedef svint32_t simde_svint32_t; + typedef svint64_t simde_svint64_t; + typedef svuint8_t simde_svuint8_t; + typedef svuint16_t simde_svuint16_t; + typedef svuint32_t simde_svuint32_t; + typedef svuint64_t simde_svuint64_t; + #if defined(__ARM_FEATURE_SVE_BF16) + typedef svbfloat16_t simde_svbfloat16_t; + #endif + typedef svfloat16_t simde_svfloat16_t; + typedef svfloat32_t simde_svfloat32_t; + typedef svfloat64_t simde_svfloat64_t; + typedef float32_t simde_float32_t; + typedef float64_t simde_float64_t; +#else + #if SIMDE_NATURAL_VECTOR_SIZE > 0 + #define SIMDE_ARM_SVE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE + #else + #define SIMDE_ARM_SVE_VECTOR_SIZE (128) + #endif + + typedef simde_float32 simde_float32_t; + typedef simde_float64 simde_float64_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(int8_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svint8_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(int16_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int16x8_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svint16_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(int32_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int32x4_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svint32_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(int64_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int64x2_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(signed long long int) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svint64_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(uint8_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint8x16_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svuint8_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(uint16_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint16x8_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svuint16_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(uint32_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint32x4_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svuint32_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(uint64_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + uint64x2_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long int) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svuint64_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(uint16_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) + float16x8_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svfloat16_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(uint16_t, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svbfloat16_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(simde_float32, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512 m512; + #endif + #if defined(SIMDE_X86_AVX_NATIVE) + __m256 m256[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256)]; + #endif + #if defined(SIMDE_X86_SSE_NATIVE) + __m128 m128[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + float32x4_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(float) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svfloat32_t; + + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR(simde_float64, values, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512d m512d; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256d m256d[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256d)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128d m128d[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128d)]; + #endif + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + float64x2_t neon; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(double) altivec; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svfloat64_t; + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + typedef struct { + __mmask64 value; + int type; + } simde_svbool_t; + + #if defined(__BMI2__) + static const uint64_t simde_arm_sve_mask_bp_lo_ = UINT64_C(0x5555555555555555); + static const uint64_t simde_arm_sve_mask_bp_hi_ = UINT64_C(0xaaaaaaaaaaaaaaaa); + + SIMDE_FUNCTION_ATTRIBUTES + __mmask64 + simde_arm_sve_mmask32_to_mmask64(__mmask32 m) { + return HEDLEY_STATIC_CAST(__mmask64, + _pdep_u64(HEDLEY_STATIC_CAST(uint64_t, m), simde_arm_sve_mask_bp_lo_) | + _pdep_u64(HEDLEY_STATIC_CAST(uint64_t, m), simde_arm_sve_mask_bp_hi_)); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask32 + simde_arm_sve_mmask16_to_mmask32(__mmask16 m) { + return HEDLEY_STATIC_CAST(__mmask32, + _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) | + _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask16 + simde_arm_sve_mmask8_to_mmask16(__mmask8 m) { + return HEDLEY_STATIC_CAST(__mmask16, + _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) | + _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask8 + simde_arm_sve_mmask4_to_mmask8(__mmask8 m) { + return HEDLEY_STATIC_CAST(__mmask8, + _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) | + _pdep_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask32 + simde_arm_sve_mmask64_to_mmask32(__mmask64 m) { + return HEDLEY_STATIC_CAST(__mmask32, + _pext_u64(HEDLEY_STATIC_CAST(uint64_t, m), HEDLEY_STATIC_CAST(uint64_t, simde_arm_sve_mask_bp_lo_)) & + _pext_u64(HEDLEY_STATIC_CAST(uint64_t, m), HEDLEY_STATIC_CAST(uint64_t, simde_arm_sve_mask_bp_hi_))); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask16 + simde_arm_sve_mmask32_to_mmask16(__mmask32 m) { + return HEDLEY_STATIC_CAST(__mmask16, + _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) & + _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask8 + simde_arm_sve_mmask16_to_mmask8(__mmask16 m) { + return HEDLEY_STATIC_CAST(__mmask8, + _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) & + _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask8 + simde_arm_sve_mmask8_to_mmask4(__mmask8 m) { + return HEDLEY_STATIC_CAST(__mmask8, + _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_lo_)) & + _pext_u32(HEDLEY_STATIC_CAST(uint32_t, m), HEDLEY_STATIC_CAST(uint32_t, simde_arm_sve_mask_bp_hi_))); + } + #else + SIMDE_FUNCTION_ATTRIBUTES + __mmask64 + simde_arm_sve_mmask32_to_mmask64(__mmask32 m) { + uint64_t e = HEDLEY_STATIC_CAST(uint64_t, m); + uint64_t o = HEDLEY_STATIC_CAST(uint64_t, m); + + e = (e | (e << 16)) & UINT64_C(0x0000ffff0000ffff); + e = (e | (e << 8)) & UINT64_C(0x00ff00ff00ff00ff); + e = (e | (e << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + e = (e | (e << 2)) & UINT64_C(0x3333333333333333); + e = (e | (e << 1)) & UINT64_C(0x5555555555555555); + + o = (o | (o << 16)) & UINT64_C(0x0000ffff0000ffff); + o = (o | (o << 8)) & UINT64_C(0x00ff00ff00ff00ff); + o = (o | (o << 4)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + o = (o | (o << 2)) & UINT64_C(0x3333333333333333); + o = (o | (o << 1)) & UINT64_C(0x5555555555555555); + + return HEDLEY_STATIC_CAST(__mmask64, e | (o << 1)); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask32 + simde_arm_sve_mmask16_to_mmask32(__mmask16 m) { + uint32_t e = HEDLEY_STATIC_CAST(uint32_t, m); + uint32_t o = HEDLEY_STATIC_CAST(uint32_t, m); + + e = (e | (e << 8)) & UINT32_C(0x00FF00FF); + e = (e | (e << 4)) & UINT32_C(0x0F0F0F0F); + e = (e | (e << 2)) & UINT32_C(0x33333333); + e = (e | (e << 1)) & UINT32_C(0x55555555); + + o = (o | (o << 8)) & UINT32_C(0x00FF00FF); + o = (o | (o << 4)) & UINT32_C(0x0F0F0F0F); + o = (o | (o << 2)) & UINT32_C(0x33333333); + o = (o | (o << 1)) & UINT32_C(0x55555555); + + return HEDLEY_STATIC_CAST(__mmask32, e | (o << 1)); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask16 + simde_arm_sve_mmask8_to_mmask16(__mmask8 m) { + uint16_t e = HEDLEY_STATIC_CAST(uint16_t, m); + uint16_t o = HEDLEY_STATIC_CAST(uint16_t, m); + + e = (e | (e << 4)) & UINT16_C(0x0f0f); + e = (e | (e << 2)) & UINT16_C(0x3333); + e = (e | (e << 1)) & UINT16_C(0x5555); + + o = (o | (o << 4)) & UINT16_C(0x0f0f); + o = (o | (o << 2)) & UINT16_C(0x3333); + o = (o | (o << 1)) & UINT16_C(0x5555); + + return HEDLEY_STATIC_CAST(uint16_t, e | (o << 1)); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask8 + simde_arm_sve_mmask4_to_mmask8(__mmask8 m) { + uint8_t e = HEDLEY_STATIC_CAST(uint8_t, m); + uint8_t o = HEDLEY_STATIC_CAST(uint8_t, m); + + e = (e | (e << 2)) & UINT8_C(0x33); + e = (e | (e << 1)) & UINT8_C(0x55); + + o = (o | (o << 2)) & UINT8_C(0x33); + o = (o | (o << 1)) & UINT8_C(0x55); + + return HEDLEY_STATIC_CAST(uint8_t, e | (o << 1)); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask32 + simde_arm_sve_mmask64_to_mmask32(__mmask64 m) { + uint64_t l = (HEDLEY_STATIC_CAST(uint64_t, m) ) & UINT64_C(0x5555555555555555); + l = (l | (l >> 1)) & UINT64_C(0x3333333333333333); + l = (l | (l >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + l = (l | (l >> 4)) & UINT64_C(0x00ff00ff00ff00ff); + l = (l | (l >> 8)) & UINT64_C(0x0000ffff0000ffff); + + uint64_t h = (HEDLEY_STATIC_CAST(uint64_t, m) >> 1) & UINT64_C(0x5555555555555555); + h = (h | (h >> 1)) & UINT64_C(0x3333333333333333); + h = (h | (h >> 2)) & UINT64_C(0x0f0f0f0f0f0f0f0f); + h = (h | (h >> 4)) & UINT64_C(0x00ff00ff00ff00ff); + h = (h | (h >> 8)) & UINT64_C(0x0000ffff0000ffff); + + return HEDLEY_STATIC_CAST(uint32_t, l & h); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask16 + simde_arm_sve_mmask32_to_mmask16(__mmask32 m) { + uint32_t l = (HEDLEY_STATIC_CAST(uint32_t, m) ) & UINT32_C(0x55555555); + l = (l | (l >> 1)) & UINT32_C(0x33333333); + l = (l | (l >> 2)) & UINT32_C(0x0f0f0f0f); + l = (l | (l >> 4)) & UINT32_C(0x00ff00ff); + l = (l | (l >> 8)) & UINT32_C(0x0000ffff); + + uint32_t h = (HEDLEY_STATIC_CAST(uint32_t, m) >> 1) & UINT32_C(0x55555555); + h = (h | (h >> 1)) & UINT32_C(0x33333333); + h = (h | (h >> 2)) & UINT32_C(0x0f0f0f0f); + h = (h | (h >> 4)) & UINT32_C(0x00ff00ff); + h = (h | (h >> 8)) & UINT32_C(0x0000ffff); + + return HEDLEY_STATIC_CAST(uint16_t, l & h); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask8 + simde_arm_sve_mmask16_to_mmask8(__mmask16 m) { + uint16_t l = (HEDLEY_STATIC_CAST(uint16_t, m) ) & UINT16_C(0x5555); + l = (l | (l >> 1)) & UINT16_C(0x3333); + l = (l | (l >> 2)) & UINT16_C(0x0f0f); + l = (l | (l >> 4)) & UINT16_C(0x00ff); + + uint16_t h = (HEDLEY_STATIC_CAST(uint16_t, m) >> 1) & UINT16_C(0x5555); + h = (h | (h >> 1)) & UINT16_C(0x3333); + h = (h | (h >> 2)) & UINT16_C(0x0f0f); + h = (h | (h >> 4)) & UINT16_C(0x00ff); + + return HEDLEY_STATIC_CAST(uint8_t, l & h); + } + + SIMDE_FUNCTION_ATTRIBUTES + __mmask8 + simde_arm_sve_mmask8_to_mmask4(__mmask8 m) { + uint8_t l = (HEDLEY_STATIC_CAST(uint8_t, m) ) & UINT8_C(0x55); + l = (l | (l >> 1)) & UINT8_C(0x33); + l = (l | (l >> 2)) & UINT8_C(0x0f); + l = (l | (l >> 4)) & UINT8_C(0xff); + + uint8_t h = (HEDLEY_STATIC_CAST(uint8_t, m) >> 1) & UINT8_C(0x55); + h = (h | (h >> 1)) & UINT8_C(0x33); + h = (h | (h >> 2)) & UINT8_C(0x0f); + h = (h | (h >> 4)) & UINT8_C(0xff); + + return HEDLEY_STATIC_CAST(uint8_t, l & h); + } + #endif + + typedef enum { + SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64, + SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32, + SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16, + SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8, + #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 + SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4, + #endif + } simde_svbool_mmask_type; + + HEDLEY_CONST HEDLEY_ALWAYS_INLINE + simde_svbool_t + simde_svbool_from_mmask64(__mmask64 mi) { + simde_svbool_t b; + + b.value = HEDLEY_STATIC_CAST(__mmask64, mi); + b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64; + + return b; + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + simde_svbool_t + simde_svbool_from_mmask32(__mmask32 mi) { + simde_svbool_t b; + + b.value = HEDLEY_STATIC_CAST(__mmask64, mi); + b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32; + + return b; + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + simde_svbool_t + simde_svbool_from_mmask16(__mmask16 mi) { + simde_svbool_t b; + + b.value = HEDLEY_STATIC_CAST(__mmask64, mi); + b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16; + + return b; + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + simde_svbool_t + simde_svbool_from_mmask8(__mmask8 mi) { + simde_svbool_t b; + + b.value = HEDLEY_STATIC_CAST(__mmask64, mi); + b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8; + + return b; + } + + #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + simde_svbool_t + simde_svbool_from_mmask4(__mmask8 mi) { + simde_svbool_t b; + + b.value = HEDLEY_STATIC_CAST(__mmask64, mi); + b.type = SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4; + + return b; + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + __mmask8 + simde_svbool_to_mmask4(simde_svbool_t b) { + __mmask64 tmp = b.value; + + switch (b.type) { + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask64_to_mmask32(HEDLEY_STATIC_CAST(__mmask64, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask32_to_mmask16(HEDLEY_STATIC_CAST(__mmask32, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask16_to_mmask8(HEDLEY_STATIC_CAST(__mmask16, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask8_to_mmask4(HEDLEY_STATIC_CAST(__mmask8, tmp))); + } + + return HEDLEY_STATIC_CAST(__mmask8, tmp); + } + #endif + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + __mmask8 + simde_svbool_to_mmask8(simde_svbool_t b) { + __mmask64 tmp = b.value; + + switch (b.type) { + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask64_to_mmask32(HEDLEY_STATIC_CAST(__mmask64, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask32_to_mmask16(HEDLEY_STATIC_CAST(__mmask32, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask16_to_mmask8(HEDLEY_STATIC_CAST(__mmask16, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: + break; + + #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask4_to_mmask8(HEDLEY_STATIC_CAST(__mmask8, tmp))); + #endif + } + + return HEDLEY_STATIC_CAST(__mmask8, tmp); + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + __mmask16 + simde_svbool_to_mmask16(simde_svbool_t b) { + __mmask64 tmp = b.value; + + switch (b.type) { + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask64_to_mmask32(HEDLEY_STATIC_CAST(__mmask64, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask32_to_mmask16(HEDLEY_STATIC_CAST(__mmask32, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: + break; + + #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask4_to_mmask8(HEDLEY_STATIC_CAST(__mmask8, tmp))); + HEDLEY_FALL_THROUGH; + #endif + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask8_to_mmask16(HEDLEY_STATIC_CAST(__mmask8, tmp))); + } + + return HEDLEY_STATIC_CAST(__mmask16, tmp); + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + __mmask32 + simde_svbool_to_mmask32(simde_svbool_t b) { + __mmask64 tmp = b.value; + + switch (b.type) { + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask64_to_mmask32(HEDLEY_STATIC_CAST(__mmask64, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: + break; + + #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask4_to_mmask8(HEDLEY_STATIC_CAST(__mmask8, tmp))); + HEDLEY_FALL_THROUGH; + #endif + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask8_to_mmask16(HEDLEY_STATIC_CAST(__mmask8, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask16_to_mmask32(HEDLEY_STATIC_CAST(__mmask16, tmp))); + } + + return HEDLEY_STATIC_CAST(__mmask32, tmp); + } + + SIMDE_FUNCTION_ATTRIBUTES HEDLEY_CONST + __mmask64 + simde_svbool_to_mmask64(simde_svbool_t b) { + __mmask64 tmp = b.value; + + switch (b.type) { + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK64: + break; + + #if SIMDE_ARM_SVE_VECTOR_SIZE < 512 + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK4: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask4_to_mmask8(HEDLEY_STATIC_CAST(__mmask8, tmp))); + HEDLEY_FALL_THROUGH; + #endif + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK8: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask8_to_mmask16(HEDLEY_STATIC_CAST(__mmask8, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK16: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask16_to_mmask32(HEDLEY_STATIC_CAST(__mmask16, tmp))); + HEDLEY_FALL_THROUGH; + case SIMDE_ARM_SVE_SVBOOL_TYPE_MMASK32: + tmp = HEDLEY_STATIC_CAST(__mmask64, simde_arm_sve_mmask32_to_mmask64(HEDLEY_STATIC_CAST(__mmask32, tmp))); + } + + return HEDLEY_STATIC_CAST(__mmask64, tmp); + } + + /* TODO: we're going to need need svbool_to/from_svint* functions + * for when we can't implement a function using AVX-512. */ + #else + typedef union { + SIMDE_ARM_SVE_DECLARE_VECTOR( int8_t, values_i8, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR( int16_t, values_i16, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR( int32_t, values_i32, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR( int64_t, values_i64, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR( uint8_t, values_u8, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR(uint16_t, values_u16, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR(uint32_t, values_u32, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + SIMDE_ARM_SVE_DECLARE_VECTOR(uint64_t, values_u64, (SIMDE_ARM_SVE_VECTOR_SIZE / 8)); + + #if defined(SIMDE_X86_AVX512BW_NATIVE) && (SIMDE_ARM_SVE_VECTOR_SIZE >= 512) + __m512i m512i; + #endif + #if defined(SIMDE_X86_AVX2_NATIVE) + __m256i m256i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m256i)]; + #endif + #if defined(SIMDE_X86_SSE2_NATIVE) + __m128i m128i[(SIMDE_ARM_SVE_VECTOR_SIZE / 8) / sizeof(__m128i)]; + #endif + + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + int8x16_t neon_i8; + int16x8_t neon_i16; + int32x4_t neon_i32; + int64x2_t neon_i64; + uint8x16_t neon_u8; + uint16x8_t neon_u16; + uint32x4_t neon_u32; + uint64x2_t neon_u64; + #endif + + #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL char) altivec_b8; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL short) altivec_b16; + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL int) altivec_b32; + #endif + #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) + SIMDE_POWER_ALTIVEC_VECTOR(SIMDE_POWER_ALTIVEC_BOOL long long) altivec_b64; + #endif + + #if defined(SIMDE_WASM_SIMD128_NATIVE) + v128_t v128; + #endif + } simde_svbool_t; + + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svint8, simde_svint8_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_from_svint8, simde_svbool_t, simde_svint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svint16, simde_svint16_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svint16, simde_svbool_t, simde_svint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svint32, simde_svint32_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svint32, simde_svbool_t, simde_svint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svint64, simde_svint64_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svint64, simde_svbool_t, simde_svint64_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svuint8, simde_svuint8_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svuint8, simde_svbool_t, simde_svuint8_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svuint16, simde_svuint16_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svuint16, simde_svbool_t, simde_svuint16_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svuint32, simde_svuint32_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svuint32, simde_svbool_t, simde_svuint32_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_( simde_svbool_to_svuint64, simde_svuint64_t, simde_svbool_t) + SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_svbool_from_svuint64, simde_svbool_t, simde_svuint64_t) + #endif + + #if defined(SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES) + typedef simde_svbool_t svbool_t; + typedef simde_svint8_t svint8_t; + typedef simde_svint16_t svint16_t; + typedef simde_svint32_t svint32_t; + typedef simde_svint64_t svint64_t; + typedef simde_svuint8_t svuint8_t; + typedef simde_svuint16_t svuint16_t; + typedef simde_svuint32_t svuint32_t; + typedef simde_svuint64_t svuint64_t; + typedef simde_svfloat16_t svfloat16_t; + typedef simde_svbfloat16_t svbfloat16_t; + typedef simde_svfloat32_t svfloat32_t; + typedef simde_svfloat64_t svfloat64_t; + #endif +#endif + +#if !defined(SIMDE_ARM_SVE_DEFAULT_UNDEFINED_SUFFIX) + #define SIMDE_ARM_SVE_DEFAULT_UNDEFINED_SUFFIX z +#endif +#define SIMDE_ARM_SVE_UNDEFINED_SYMBOL(name) HEDLEY_CONCAT3(name, _, SIMDE_ARM_SVE_DEFAULT_UNDEFINED_SUFFIX) + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +/* These are going to be used pretty much everywhere since they are + * used to create the loops SVE requires. Since we want to support + * only including the files you need instead of just using sve.h, + * it's helpful to pull these in here. While this file is called + * arm/sve/types.h, it might be better to think of it more as + * arm/sve/common.h. */ +#include "cnt.h" +#include "ld1.h" +#include "ptest.h" +#include "ptrue.h" +#include "st1.h" +#include "whilelt.h" + +#endif /* SIMDE_ARM_SVE_TYPES_H */ |