diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /security/nss/lib/freebl/verified/libintvector.h | |
parent | Initial commit. (diff) | |
download | firefox-esr-upstream.tar.xz firefox-esr-upstream.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'security/nss/lib/freebl/verified/libintvector.h')
-rw-r--r-- | security/nss/lib/freebl/verified/libintvector.h | 915 |
1 files changed, 915 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/verified/libintvector.h b/security/nss/lib/freebl/verified/libintvector.h new file mode 100644 index 0000000000..fab6a35d18 --- /dev/null +++ b/security/nss/lib/freebl/verified/libintvector.h @@ -0,0 +1,915 @@ +#ifndef __Vec_Intrin_H +#define __Vec_Intrin_H + +#include <sys/types.h> + +/* We include config.h here to ensure that the various feature-flags are + * properly brought into scope. Users can either run the configure script, or + * write a config.h themselves and put it under version control. */ +#if defined(__has_include) +#if __has_include("config.h") +#include "config.h" +#endif +#endif + +/* # DEBUGGING: + * ============ + * It is possible to debug the current definitions by using libintvector_debug.h + * See the include at the bottom of the file. */ + +#define Lib_IntVector_Intrinsics_bit_mask64(x) -((x)&1) + +#if defined(__x86_64__) || defined(_M_X64) + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include <emmintrin.h> +#include <tmmintrin.h> +#include <smmintrin.h> + +typedef __m128i Lib_IntVector_Intrinsics_vec128; + +#define Lib_IntVector_Intrinsics_ni_aes_enc(x0, x1) \ + (_mm_aesenc_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_aes_enc_last(x0, x1) \ + (_mm_aesenclast_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_aes_keygen_assist(x0, x1) \ + (_mm_aeskeygenassist_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_clmul(x0, x1, x2) \ + (_mm_clmulepi64_si128(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + (_mm_xor_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + (_mm_cmpeq_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + (_mm_cmpeq_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + (_mm_cmpgt_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + (_mm_cmpgt_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + (_mm_or_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + (_mm_and_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + (_mm_xor_si128(x0, _mm_set1_epi32(-1))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \ + (_mm_slli_si128(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \ + (_mm_srli_si128(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (_mm_slli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (_mm_srli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \ + (_mm_slli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (_mm_srli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + (((x1) == 8 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) : ((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : ((x1) == 24 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) : _mm_xor_si128(_mm_slli_epi32(x0, x1), _mm_srli_epi32(x0, 32 - (x1))))))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, 32 - (x1))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x4, x3, x2, x1))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2 * x1 + 1, 2 * x1, 2 * x2 + 1, 2 * x2))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE((2 * x1 + 3) % 4, (2 * x1 + 2) % 4, (2 * x1 + 1) % 4, (2 * x1) % 4))) + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \ + (_mm_loadu_si128((__m128i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \ + (_mm_loadu_si128((__m128i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))) + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))) + +#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))) + +#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))) + +#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))) + +#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))) + +#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \ + (_mm_insert_epi8(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + (_mm_insert_epi32(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + (_mm_insert_epi64(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \ + (_mm_extract_epi8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + (_mm_extract_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + (_mm_extract_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + (_mm_setzero_si128()) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + (_mm_add_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + (_mm_sub_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + (_mm_mul_epu32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + (_mm_mul_epu32(x0, _mm_set1_epi64x(x1))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + (_mm_add_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \ + (_mm_sub_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \ + (_mm_mullo_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \ + (_mm_mullo_epi32(x0, _mm_set1_epi32(x1))) + +#define Lib_IntVector_Intrinsics_vec128_load128(x) \ + ((__m128i)x) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + (_mm_set1_epi64x(x)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load64s(x0, x1) \ + (_mm_set_epi64x(x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + (_mm_set1_epi32(x)) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + (_mm_set_epi32(x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \ + (_mm_unpacklo_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \ + (_mm_unpackhi_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \ + (_mm_unpacklo_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ + (_mm_unpackhi_epi64(x1, x2)) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#if defined(HACL_CAN_COMPILE_VEC256) + +#include <immintrin.h> +#include <wmmintrin.h> + +typedef __m256i Lib_IntVector_Intrinsics_vec256; + +#define Lib_IntVector_Intrinsics_vec256_eq64(x0, x1) \ + (_mm256_cmpeq_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_eq32(x0, x1) \ + (_mm256_cmpeq_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_gt64(x0, x1) \ + (_mm256_cmpgt_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_gt32(x0, x1) \ + (_mm256_cmpgt_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_xor(x0, x1) \ + (_mm256_xor_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_or(x0, x1) \ + (_mm256_or_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_and(x0, x1) \ + (_mm256_and_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_lognot(x0) \ + (_mm256_xor_si256(x0, _mm256_set1_epi32(-1))) + +#define Lib_IntVector_Intrinsics_vec256_shift_left(x0, x1) \ + (_mm256_slli_si256(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right(x0, x1) \ + (_mm256_srli_si256(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec256_shift_left64(x0, x1) \ + (_mm256_slli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right64(x0, x1) \ + (_mm256_srli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_left32(x0, x1) \ + (_mm256_slli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right32(x0, x1) \ + (_mm256_srli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1, 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, x1) \ + ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) : _mm256_or_si256(_mm256_slli_epi32(x0, x1), _mm256_srli_epi32(x0, 32 - (x1))))))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, 32 - (x1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1, 8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2, 9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3, 10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5, 12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6, 13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7, 14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, x1) \ + ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) : (x1 == 32 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) : (x1 == 40 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) : (x1 == 48 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) : (x1 == 56 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) : _mm256_xor_si256(_mm256_srli_epi64((x0), (x1)), _mm256_slli_epi64((x0), (64 - (x1)))))))))))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left64(x0, x1) \ + (Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, 64 - (x1))) + +#define Lib_IntVector_Intrinsics_vec256_shuffle64(x0, x1, x2, x3, x4) \ + (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE(x4, x3, x2, x1))) + +#define Lib_IntVector_Intrinsics_vec256_shuffle32(x0, x1, x2, x3, x4, x5, x6, x7, x8) \ + (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32(x8, x7, x6, x5, x4, x3, x2, x1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes32(x0, x1) \ + (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32((x1 + 7) % 8, (x1 + 6) % 8, (x1 + 5) % 8, (x1 + 4) % 8, (x1 + 3 % 8), (x1 + 2) % 8, (x1 + 1) % 8, x1 % 8))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(x0, x1) \ + (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4))) + +#define Lib_IntVector_Intrinsics_vec256_load32_le(x0) \ + (_mm256_loadu_si256((__m256i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec256_load64_le(x0) \ + (_mm256_loadu_si256((__m256i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec256_load32_be(x0) \ + (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))) + +#define Lib_IntVector_Intrinsics_vec256_load64_be(x0) \ + (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))) + +#define Lib_IntVector_Intrinsics_vec256_store32_le(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec256_store64_le(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec256_store32_be(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))) + +#define Lib_IntVector_Intrinsics_vec256_store64_be(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))) + +#define Lib_IntVector_Intrinsics_vec256_insert8(x0, x1, x2) \ + (_mm256_insert_epi8(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_insert32(x0, x1, x2) \ + (_mm256_insert_epi32(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_insert64(x0, x1, x2) \ + (_mm256_insert_epi64(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_extract8(x0, x1) \ + (_mm256_extract_epi8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_extract32(x0, x1) \ + (_mm256_extract_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_extract64(x0, x1) \ + (_mm256_extract_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_zero \ + (_mm256_setzero_si256()) + +#define Lib_IntVector_Intrinsics_vec256_add64(x0, x1) \ + (_mm256_add_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_sub64(x0, x1) \ + (_mm256_sub_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_mul64(x0, x1) \ + (_mm256_mul_epu32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_smul64(x0, x1) \ + (_mm256_mul_epu32(x0, _mm256_set1_epi64x(x1))) + +#define Lib_IntVector_Intrinsics_vec256_add32(x0, x1) \ + (_mm256_add_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_sub32(x0, x1) \ + (_mm256_sub_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_mul32(x0, x1) \ + (_mm256_mullo_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_smul32(x0, x1) \ + (_mm256_mullo_epi32(x0, _mm256_set1_epi32(x1))) + +#define Lib_IntVector_Intrinsics_vec256_load64(x1) \ + (_mm256_set1_epi64x(x1)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load64s(x0, x1, x2, x3) \ + (_mm256_set_epi64x(x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load32(x) \ + (_mm256_set1_epi32(x)) + +#define Lib_IntVector_Intrinsics_vec256_load32s(x0, x1, x2, x3, x4, x5, x6, x7) \ + (_mm256_set_epi32(x7, x6, x5, x4, x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load128(x) \ + (_mm256_set_m128i((__m128i)x)) + +#define Lib_IntVector_Intrinsics_vec256_load128s(x0, x1) \ + (_mm256_set_m128i((__m128i)x1, (__m128i)x0)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low32(x1, x2) \ + (_mm256_unpacklo_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high32(x1, x2) \ + (_mm256_unpackhi_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low64(x1, x2) \ + (_mm256_unpacklo_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high64(x1, x2) \ + (_mm256_unpackhi_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low128(x1, x2) \ + (_mm256_permute2x128_si256(x1, x2, 0x20)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high128(x1, x2) \ + (_mm256_permute2x128_si256(x1, x2, 0x31)) + +#endif /* HACL_CAN_COMPILE_VEC256 */ + +#elif (defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && !defined(__ARM_32BIT_STATE) + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include <arm_neon.h> + +typedef uint32x4_t Lib_IntVector_Intrinsics_vec128; + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + (veorq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + (vceqq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + (vceqq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + (vcgtq_u32(x0, x1)) + +#define high32(x0) \ + (vmovn_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), 32))) + +#define low32(x0) \ + (vmovn_u64(vreinterpretq_u64_u32(x0))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + (vreinterpretq_u32_u64(vmovl_u32(vorr_u32(vcgt_u32(high32(x0), high32(x1)), vand_u32(vceq_u32(high32(x0), high32(x1)), vcgt_u32(low32(x0), low32(x1))))))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + (vorrq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + (vandq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + (vmvnq_u32(x0)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \ + (vextq_u32(x0, vdupq_n_u8(0), 16 - (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \ + (vextq_u32(x0, vdupq_n_u8(0), (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \ + (vshlq_n_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (vshrq_n_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x1) \ + (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), (x1)), (x0), 32 - (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x1) \ + (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), 32 - (x1)), (x0), (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + (vextq_u32(x0, x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \ + (vextq_u64(x0, x0, x1)) + +/* +#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x1,x2,x3,x4))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2*x1+1,2*x1,2*x2+1,2*x2))) +*/ + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \ + (vld1q_u32((const uint32_t*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \ + (vld1q_u32((const uint32_t*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (x1))) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (x1))) + +/* +#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \ + ( Lib_IntVector_Intrinsics_vec128 l = vrev64q_u8(vld1q_u32((uint32_t*)(x0))); + +*/ + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \ + (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0)))))) + +#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \ + (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0)))))) + +/* +#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))) +*/ + +#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x1)))))) + +#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(x1)))))) + +#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \ + (vsetq_lane_u8(x1, x0, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + (vsetq_lane_u32(x1, x0, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + (vreinterpretq_u32_u64(vsetq_lane_u64(x1, vreinterpretq_u64_u32(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \ + (vgetq_lane_u8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + (vgetq_lane_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + (vgetq_lane_u64(vreinterpretq_u64_u32(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + (vdupq_n_u32(0)) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + (vreinterpretq_u32_u64(vaddq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + (vreinterpretq_u32_u64(vsubq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + (vreinterpretq_u32_u64(vmull_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), vmovn_u64(vreinterpretq_u64_u32(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + (vreinterpretq_u32_u64(vmull_n_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), (uint32_t)x1))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + (vaddq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \ + (vsubq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \ + (vmulq_lane_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \ + (vmulq_lane_u32(x0, vdupq_n_u32(x1))) + +#define Lib_IntVector_Intrinsics_vec128_load128(x) \ + ((uint32x4_t)(x)) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + (vreinterpretq_u32_u64(vdupq_n_u64(x))) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + (vdupq_n_u32(x)) /* hi lo */ + +static inline Lib_IntVector_Intrinsics_vec128 +Lib_IntVector_Intrinsics_vec128_load64s(uint64_t x1, uint64_t x2) +{ + const uint64_t a[2] = { x1, x2 }; + return vreinterpretq_u32_u64(vld1q_u64(a)); +} + +static inline Lib_IntVector_Intrinsics_vec128 +Lib_IntVector_Intrinsics_vec128_load32s(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4) +{ + const uint32_t a[4] = { x1, x2, x3, x4 }; + return vld1q_u32(a); +} + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \ + (vzip1q_u32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \ + (vzip2q_u32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \ + (vreinterpretq_u32_u64(vzip1q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ + (vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2)))) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +/* IBM z architecture */ +#elif defined(__s390x__) /* this flag is for GCC only */ + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include <stdint.h> +#include <vecintrin.h> + +/* The main vector 128 type + * We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, + * unsigned int, unsigned long long: the compiler complains that the parameter + * combination is invalid. */ +typedef unsigned char vector128_8 __attribute__((vector_size(16))); +typedef unsigned int vector128_32 __attribute__((vector_size(16))); +typedef unsigned long long vector128_64 __attribute__((vector_size(16))); + +typedef vector128_8 Lib_IntVector_Intrinsics_vec128; +typedef vector128_8 vector128; + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ + (vector128)((vector128_32)vec_revb(*((vector128_32*)(const uint8_t*)(x)))) + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x) \ + (vector128)(*((vector128_32*)(const uint8_t*)(x))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ + (vector128)((vector128_64)vec_revb(*((vector128_64*)(const uint8_t*)(x)))) + +static inline void +Lib_IntVector_Intrinsics_vec128_store32_le(const uint8_t *x0, vector128 x1) +{ + *((vector128_32 *)x0) = vec_revb((vector128_32)x1); +} + +static inline void +Lib_IntVector_Intrinsics_vec128_store32_be(const uint8_t *x0, vector128 x1) +{ + *((vector128_32 *)x0) = (vector128_32)x1; +} + +static inline void +Lib_IntVector_Intrinsics_vec128_store64_le(const uint8_t *x0, vector128 x1) +{ + *((vector128_64 *)x0) = vec_revb((vector128_64)x1); +} + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + ((vector128)(vec_and((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + ((unsigned int)(vec_extract((vector128_32)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + ((unsigned long long)(vec_extract((vector128_64)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \ + ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \ + ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \ + ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \ + ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \ + (unsigned int)(x), (unsigned int)(x) })) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) })) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + ((vector128)((vector128_64)vec_load_pair((unsigned long long)(x), (unsigned long long)(x)))) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + ((vector128)(vec_mulo((vector128_32)(x0), \ + (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + ((vector128)(vec_or((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + ((vector128)(vec_rli((vector128_32)(x0), (unsigned long)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), (x1 % 4) * 4))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(x1)))) & \ + ((vector128)((vector128_64){ 0xffffffffffffffff << (x1), 0xffffffffffffffff << (x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(64 - (x1))))) & \ + ((vector128)((vector128_64){ 0xffffffffffffffff >> (x1), 0xffffffffffffffff >> (x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (((vector128)((vector128_32)vec_rli((vector128_32)(x0), (unsigned int)(32 - (x1))))) & \ + ((vector128)((vector128_32){ 0xffffffff >> (x1), 0xffffffff >> (x1), \ + 0xffffffff >> (x1), 0xffffffff >> (x1) }))) + +/* Doesn't work with vec_splat_u64 */ +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) })))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + ((vector128)((vector128_64)(x0) - (vector128_64)(x1))) + +static inline vector128 +Lib_IntVector_Intrinsics_vec128_xor(vector128 x0, vector128 x1) +{ + return ((vector128)(vec_xor((vector128)(x0), (vector128)(x1)))); +} + +#define Lib_IntVector_Intrinsics_vec128_zero \ + ((vector128){}) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#elif defined(__powerpc64__) // PowerPC 64 - this flag is for GCC only + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include <altivec.h> +#include <string.h> // for memcpy +#include <stdint.h> + +// The main vector 128 type +// We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, +// unsigned int, unsigned long long: the compiler complains that the parameter +// combination is invalid. +typedef vector unsigned char vector128_8; +typedef vector unsigned int vector128_32; +typedef vector unsigned long long vector128_64; + +typedef vector128_8 Lib_IntVector_Intrinsics_vec128; +typedef vector128_8 vector128; + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ + ((vector128)((vector128_32)(vec_xl(0, (const unsigned int*)((const uint8_t*)(x)))))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ + ((vector128)((vector128_64)(vec_xl(0, (const unsigned long long*)((const uint8_t*)(x)))))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (vec_xst((vector128_32)(x1), 0, (unsigned int*)((uint8_t*)(x0)))) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (vec_xst((vector128_64)(x1), 0, (unsigned long long*)((uint8_t*)(x0)))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + ((vector128)(vec_and((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + ((unsigned int)(vec_extract((vector128_32)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + ((unsigned long long)(vec_extract((vector128_64)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \ + ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \ + ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \ + ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \ + ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \ + (unsigned int)(x), (unsigned int)(x) })) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) })) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + ((vector128)((vector128_64){ (unsigned long long)(x), (unsigned long long)(x) })) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + ((vector128)(vec_mule((vector128_32)(x0), \ + (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + ((vector128)(vec_or((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + ((vector128)(vec_rl((vector128_32)(x0), (vector128_32){ (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), ((4 - (x1)) % 4) * 4))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + ((vector128)((vector128_64)vec_sl((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + ((vector128)((vector128_64)vec_sr((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) }))) + +// Doesn't work with vec_splat_u64 +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) })))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + ((vector128)((vector128_64)(x0) - (vector128_64)(x1))) + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + ((vector128){}) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#endif // PowerPC64 + +// DEBUGGING: +// If libintvector_debug.h exists, use it to debug the current implementations. +// Note that some flags must be enabled for the debugging to be effective: +// see libintvector_debug.h for more details. +#if defined(__has_include) +#if __has_include("libintvector_debug.h") +#include "libintvector_debug.h" +#endif +#endif + +#endif // __Vec_Intrin_H |