summaryrefslogtreecommitdiffstats
path: root/security/nss/lib/freebl/verified
diff options
context:
space:
mode:
Diffstat (limited to 'security/nss/lib/freebl/verified')
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20.c217
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20.h55
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c1174
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h60
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c1176
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h60
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c584
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h59
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c744
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h55
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c876
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h55
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Curve25519_51.c904
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Curve25519_51.h41
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Kremlib.h51
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_128.c1624
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_128.h66
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_256.c2095
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_256.h66
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_32.c574
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_32.h49
-rw-r--r--security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/callconv.h46
-rw-r--r--security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/compat.h32
-rw-r--r--security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/target.h113
-rw-r--r--security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/types.h99
-rw-r--r--security/nss/lib/freebl/verified/kremlin/include/kremlin/lowstar_endianness.h242
-rw-r--r--security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h76
-rw-r--r--security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h329
-rw-r--r--security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h203
-rw-r--r--security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h27
-rw-r--r--security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h220
-rw-r--r--security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h528
-rw-r--r--security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_struct_endianness.h84
-rw-r--r--security/nss/lib/freebl/verified/libintvector.h586
34 files changed, 13170 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20.c b/security/nss/lib/freebl/verified/Hacl_Chacha20.c
new file mode 100644
index 0000000000..2e552472bd
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20.c
@@ -0,0 +1,217 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Chacha20.h"
+
+const uint32_t
+ Hacl_Impl_Chacha20_Vec_chacha20_constants[4U] =
+ { (uint32_t)0x61707865U, (uint32_t)0x3320646eU, (uint32_t)0x79622d32U, (uint32_t)0x6b206574U };
+
+static inline void
+quarter_round(uint32_t *st, uint32_t a, uint32_t b, uint32_t c, uint32_t d)
+{
+ uint32_t sta = st[a];
+ uint32_t stb0 = st[b];
+ uint32_t std0 = st[d];
+ uint32_t sta10 = sta + stb0;
+ uint32_t std10 = std0 ^ sta10;
+ uint32_t std2 = std10 << (uint32_t)16U | std10 >> (uint32_t)16U;
+ st[a] = sta10;
+ st[d] = std2;
+ uint32_t sta0 = st[c];
+ uint32_t stb1 = st[d];
+ uint32_t std3 = st[b];
+ uint32_t sta11 = sta0 + stb1;
+ uint32_t std11 = std3 ^ sta11;
+ uint32_t std20 = std11 << (uint32_t)12U | std11 >> (uint32_t)20U;
+ st[c] = sta11;
+ st[b] = std20;
+ uint32_t sta2 = st[a];
+ uint32_t stb2 = st[b];
+ uint32_t std4 = st[d];
+ uint32_t sta12 = sta2 + stb2;
+ uint32_t std12 = std4 ^ sta12;
+ uint32_t std21 = std12 << (uint32_t)8U | std12 >> (uint32_t)24U;
+ st[a] = sta12;
+ st[d] = std21;
+ uint32_t sta3 = st[c];
+ uint32_t stb = st[d];
+ uint32_t std = st[b];
+ uint32_t sta1 = sta3 + stb;
+ uint32_t std1 = std ^ sta1;
+ uint32_t std22 = std1 << (uint32_t)7U | std1 >> (uint32_t)25U;
+ st[c] = sta1;
+ st[b] = std22;
+}
+
+static inline void
+double_round(uint32_t *st)
+{
+ quarter_round(st, (uint32_t)0U, (uint32_t)4U, (uint32_t)8U, (uint32_t)12U);
+ quarter_round(st, (uint32_t)1U, (uint32_t)5U, (uint32_t)9U, (uint32_t)13U);
+ quarter_round(st, (uint32_t)2U, (uint32_t)6U, (uint32_t)10U, (uint32_t)14U);
+ quarter_round(st, (uint32_t)3U, (uint32_t)7U, (uint32_t)11U, (uint32_t)15U);
+ quarter_round(st, (uint32_t)0U, (uint32_t)5U, (uint32_t)10U, (uint32_t)15U);
+ quarter_round(st, (uint32_t)1U, (uint32_t)6U, (uint32_t)11U, (uint32_t)12U);
+ quarter_round(st, (uint32_t)2U, (uint32_t)7U, (uint32_t)8U, (uint32_t)13U);
+ quarter_round(st, (uint32_t)3U, (uint32_t)4U, (uint32_t)9U, (uint32_t)14U);
+}
+
+static inline void
+rounds(uint32_t *st)
+{
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+ double_round(st);
+}
+
+static inline void
+chacha20_core(uint32_t *k, uint32_t *ctx, uint32_t ctr)
+{
+ memcpy(k, ctx, (uint32_t)16U * sizeof(ctx[0U]));
+ uint32_t ctr_u32 = ctr;
+ k[12U] = k[12U] + ctr_u32;
+ rounds(k);
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ uint32_t *os = k;
+ uint32_t x = k[i] + ctx[i];
+ os[i] = x;
+ }
+ k[12U] = k[12U] + ctr_u32;
+}
+
+static const uint32_t
+ chacha20_constants[4U] =
+ { (uint32_t)0x61707865U, (uint32_t)0x3320646eU, (uint32_t)0x79622d32U, (uint32_t)0x6b206574U };
+
+static inline void
+chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n, uint32_t ctr)
+{
+ uint32_t *uu____0 = ctx;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) {
+ uint32_t *os = uu____0;
+ uint32_t x = chacha20_constants[i];
+ os[i] = x;
+ }
+ uint32_t *uu____1 = ctx + (uint32_t)4U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)8U; i++) {
+ uint32_t *os = uu____1;
+ uint8_t *bj = k + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;
+ }
+ ctx[12U] = ctr;
+ uint32_t *uu____2 = ctx + (uint32_t)13U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i++) {
+ uint32_t *os = uu____2;
+ uint8_t *bj = n + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;
+ }
+}
+
+static inline void
+chacha20_encrypt_block(uint32_t *ctx, uint8_t *out, uint32_t incr, uint8_t *text)
+{
+ uint32_t k[16U] = { 0U };
+ chacha20_core(k, ctx, incr);
+ uint32_t bl[16U] = { 0U };
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ uint32_t *os = bl;
+ uint8_t *bj = text + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;
+ }
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ uint32_t *os = bl;
+ uint32_t x = bl[i] ^ k[i];
+ os[i] = x;
+ }
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ store32_le(out + i * (uint32_t)4U, bl[i]);
+ }
+}
+
+static inline void
+chacha20_encrypt_last(uint32_t *ctx, uint32_t len, uint8_t *out, uint32_t incr, uint8_t *text)
+{
+ uint8_t plain[64U] = { 0U };
+ memcpy(plain, text, len * sizeof(text[0U]));
+ chacha20_encrypt_block(ctx, plain, incr, plain);
+ memcpy(out, plain, len * sizeof(plain[0U]));
+}
+
+static inline void
+chacha20_update(uint32_t *ctx, uint32_t len, uint8_t *out, uint8_t *text)
+{
+ uint32_t rem = len % (uint32_t)64U;
+ uint32_t nb = len / (uint32_t)64U;
+ uint32_t rem1 = len % (uint32_t)64U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ chacha20_encrypt_block(ctx, out + i * (uint32_t)64U, i, text + i * (uint32_t)64U);
+ }
+ if (rem1 > (uint32_t)0U) {
+ chacha20_encrypt_last(ctx, rem, out + nb * (uint32_t)64U, nb, text + nb * (uint32_t)64U);
+ }
+}
+
+void
+Hacl_Chacha20_chacha20_encrypt(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *text,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr)
+{
+ uint32_t ctx[16U] = { 0U };
+ chacha20_init(ctx, key, n, ctr);
+ chacha20_update(ctx, len, out, text);
+}
+
+void
+Hacl_Chacha20_chacha20_decrypt(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *cipher,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr)
+{
+ uint32_t ctx[16U] = { 0U };
+ chacha20_init(ctx, key, n, ctr);
+ chacha20_update(ctx, len, out, cipher);
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20.h b/security/nss/lib/freebl/verified/Hacl_Chacha20.h
new file mode 100644
index 0000000000..bd54a315e4
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20.h
@@ -0,0 +1,55 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include <stdbool.h>
+
+#ifndef __Hacl_Chacha20_H
+#define __Hacl_Chacha20_H
+
+#include "Hacl_Kremlib.h"
+
+extern const uint32_t Hacl_Impl_Chacha20_Vec_chacha20_constants[4U];
+
+void
+Hacl_Chacha20_chacha20_encrypt(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *text,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr);
+
+void
+Hacl_Chacha20_chacha20_decrypt(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *cipher,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr);
+
+#define __Hacl_Chacha20_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c
new file mode 100644
index 0000000000..1b98e18af6
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c
@@ -0,0 +1,1174 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Chacha20Poly1305_128.h"
+
+static inline void
+poly1305_padded_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t len, uint8_t *text)
+{
+ uint32_t n = len / (uint32_t)16U;
+ uint32_t r = len % (uint32_t)16U;
+ uint8_t *blocks = text;
+ uint8_t *rem = text + n * (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec128 *pre0 = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 *acc0 = ctx;
+ uint32_t sz_block = (uint32_t)32U;
+ uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block;
+ uint8_t *t00 = blocks;
+ if (len0 > (uint32_t)0U) {
+ uint32_t bs = (uint32_t)32U;
+ uint8_t *text0 = t00;
+ Hacl_Impl_Poly1305_Field32xN_128_load_acc2(acc0, text0);
+ uint32_t len1 = len0 - bs;
+ uint8_t *text1 = t00 + bs;
+ uint32_t nb = len1 / bs;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = text1 + i * bs;
+ Lib_IntVector_Intrinsics_vec128 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load_le(block);
+ Lib_IntVector_Intrinsics_vec128
+ b2 = Lib_IntVector_Intrinsics_vec128_load_le(block + (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2);
+ Lib_IntVector_Intrinsics_vec128
+ hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2);
+ Lib_IntVector_Intrinsics_vec128
+ f00 =
+ Lib_IntVector_Intrinsics_vec128_and(lo,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f15 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f25 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f0 = f00;
+ Lib_IntVector_Intrinsics_vec128 f1 = f15;
+ Lib_IntVector_Intrinsics_vec128 f2 = f25;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f0;
+ e[1U] = f1;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *rn = pre0 + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec128 *rn5 = pre0 + (uint32_t)15U;
+ Lib_IntVector_Intrinsics_vec128 r0 = rn[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = rn[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = rn[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = rn[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = rn[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = rn5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = rn5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = rn5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = rn5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec128 f110 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec128 f120 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec128 f130 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec128 f140 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10);
+ Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10);
+ Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10);
+ Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10);
+ Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10);
+ Lib_IntVector_Intrinsics_vec128
+ a01 =
+ Lib_IntVector_Intrinsics_vec128_add64(a0,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a11 =
+ Lib_IntVector_Intrinsics_vec128_add64(a1,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a21 =
+ Lib_IntVector_Intrinsics_vec128_add64(a2,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a31 =
+ Lib_IntVector_Intrinsics_vec128_add64(a3,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a41 =
+ Lib_IntVector_Intrinsics_vec128_add64(a4,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a02 =
+ Lib_IntVector_Intrinsics_vec128_add64(a01,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a12 =
+ Lib_IntVector_Intrinsics_vec128_add64(a11,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a22 =
+ Lib_IntVector_Intrinsics_vec128_add64(a21,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a32 =
+ Lib_IntVector_Intrinsics_vec128_add64(a31,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a42 =
+ Lib_IntVector_Intrinsics_vec128_add64(a41,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f140));
+ Lib_IntVector_Intrinsics_vec128 t01 = a04;
+ Lib_IntVector_Intrinsics_vec128 t1 = a14;
+ Lib_IntVector_Intrinsics_vec128 t2 = a24;
+ Lib_IntVector_Intrinsics_vec128 t3 = a34;
+ Lib_IntVector_Intrinsics_vec128 t4 = a44;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o00 = x02;
+ Lib_IntVector_Intrinsics_vec128 o10 = x12;
+ Lib_IntVector_Intrinsics_vec128 o20 = x21;
+ Lib_IntVector_Intrinsics_vec128 o30 = x32;
+ Lib_IntVector_Intrinsics_vec128 o40 = x42;
+ acc0[0U] = o00;
+ acc0[1U] = o10;
+ acc0[2U] = o20;
+ acc0[3U] = o30;
+ acc0[4U] = o40;
+ Lib_IntVector_Intrinsics_vec128 f100 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec128 f20 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f21 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f22 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f23 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f24 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_add64(f100, f20);
+ Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_add64(f11, f21);
+ Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_add64(f12, f22);
+ Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_add64(f13, f23);
+ Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_add64(f14, f24);
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(acc0, pre0);
+ }
+ uint32_t len1 = n * (uint32_t)16U - len0;
+ uint8_t *t10 = blocks + len0;
+ uint32_t nb = len1 / (uint32_t)16U;
+ uint32_t rem1 = len1 % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = t10 + i * (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec128 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *r1 = pre0;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre0 + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r1[0U];
+ Lib_IntVector_Intrinsics_vec128 r11 = r1[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r1[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r1[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r1[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t01 = a06;
+ Lib_IntVector_Intrinsics_vec128 t11 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *last = t10 + nb * (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec128 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem1 * sizeof(last[0U]));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
+ e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec128_or(fi, mask);
+ Lib_IntVector_Intrinsics_vec128 *r1 = pre0;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre0 + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r1[0U];
+ Lib_IntVector_Intrinsics_vec128 r11 = r1[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r1[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r1[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r1[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t01 = a06;
+ Lib_IntVector_Intrinsics_vec128 t11 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, rem, r * sizeof(rem[0U]));
+ if (r > (uint32_t)0U) {
+ Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 *acc = ctx;
+ Lib_IntVector_Intrinsics_vec128 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *r1 = pre;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r1[0U];
+ Lib_IntVector_Intrinsics_vec128 r11 = r1[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r1[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r1[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r1[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r11, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t0 = a06;
+ Lib_IntVector_Intrinsics_vec128 t1 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+static inline void
+poly1305_do_128(
+ uint8_t *k,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *out)
+{
+ Lib_IntVector_Intrinsics_vec128 ctx[25U];
+ for (uint32_t _i = 0U; _i < (uint32_t)25U; ++_i)
+ ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ uint8_t block[16U] = { 0U };
+ Hacl_Poly1305_128_poly1305_init(ctx, k);
+ poly1305_padded_128(ctx, aadlen, aad);
+ poly1305_padded_128(ctx, mlen, m);
+ store64_le(block, (uint64_t)aadlen);
+ store64_le(block + (uint32_t)8U, (uint64_t)mlen);
+ Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 *acc = ctx;
+ Lib_IntVector_Intrinsics_vec128 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *r = pre;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t0 = a06;
+ Lib_IntVector_Intrinsics_vec128 t1 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ Hacl_Poly1305_128_poly1305_finish(out, k, ctx);
+}
+
+void
+Hacl_Chacha20Poly1305_128_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ Hacl_Chacha20_Vec128_chacha20_encrypt_128(mlen, cipher, m, k, n, (uint32_t)1U);
+ uint8_t tmp[64U] = { 0U };
+ Hacl_Chacha20_Vec128_chacha20_encrypt_128((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_128(key, aadlen, aad, mlen, cipher, mac);
+}
+
+uint32_t
+Hacl_Chacha20Poly1305_128_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ uint8_t computed_mac[16U] = { 0U };
+ uint8_t tmp[64U] = { 0U };
+ Hacl_Chacha20_Vec128_chacha20_encrypt_128((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_128(key, aadlen, aad, mlen, cipher, computed_mac);
+ uint8_t res = (uint8_t)255U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
+ res = uu____0 & res;
+ }
+ uint8_t z = res;
+ if (z == (uint8_t)255U) {
+ Hacl_Chacha20_Vec128_chacha20_encrypt_128(mlen, m, cipher, k, n, (uint32_t)1U);
+ return (uint32_t)0U;
+ }
+ return (uint32_t)1U;
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h
new file mode 100644
index 0000000000..2c4d46bce4
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h
@@ -0,0 +1,60 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "libintvector.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include <stdbool.h>
+
+#ifndef __Hacl_Chacha20Poly1305_128_H
+#define __Hacl_Chacha20Poly1305_128_H
+
+#include "Hacl_Kremlib.h"
+#include "Hacl_Chacha20_Vec128.h"
+#include "Hacl_Poly1305_128.h"
+
+void
+Hacl_Chacha20Poly1305_128_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac);
+
+uint32_t
+Hacl_Chacha20Poly1305_128_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac);
+
+#define __Hacl_Chacha20Poly1305_128_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c
new file mode 100644
index 0000000000..efbccbfddb
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c
@@ -0,0 +1,1176 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Chacha20Poly1305_256.h"
+
+static inline void
+poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t *text)
+{
+ uint32_t n = len / (uint32_t)16U;
+ uint32_t r = len % (uint32_t)16U;
+ uint8_t *blocks = text;
+ uint8_t *rem = text + n * (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec256 *pre0 = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *acc0 = ctx;
+ uint32_t sz_block = (uint32_t)64U;
+ uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block;
+ uint8_t *t00 = blocks;
+ if (len0 > (uint32_t)0U) {
+ uint32_t bs = (uint32_t)64U;
+ uint8_t *text0 = t00;
+ Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc0, text0);
+ uint32_t len1 = len0 - bs;
+ uint8_t *text1 = t00 + bs;
+ uint32_t nb = len1 / bs;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = text1 + i * bs;
+ Lib_IntVector_Intrinsics_vec256 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load_le(block);
+ Lib_IntVector_Intrinsics_vec256
+ hi = Lib_IntVector_Intrinsics_vec256_load_le(block + (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256
+ mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi);
+ Lib_IntVector_Intrinsics_vec256
+ m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi);
+ Lib_IntVector_Intrinsics_vec256
+ m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U);
+ Lib_IntVector_Intrinsics_vec256
+ m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U);
+ Lib_IntVector_Intrinsics_vec256
+ m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1);
+ Lib_IntVector_Intrinsics_vec256
+ t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1);
+ Lib_IntVector_Intrinsics_vec256
+ t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3);
+ Lib_IntVector_Intrinsics_vec256
+ t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U);
+ Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260);
+ Lib_IntVector_Intrinsics_vec256
+ t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260);
+ Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260);
+ Lib_IntVector_Intrinsics_vec256
+ t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U);
+ Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260);
+ Lib_IntVector_Intrinsics_vec256
+ o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 o00 = o5;
+ Lib_IntVector_Intrinsics_vec256 o11 = o10;
+ Lib_IntVector_Intrinsics_vec256 o21 = o20;
+ Lib_IntVector_Intrinsics_vec256 o31 = o30;
+ Lib_IntVector_Intrinsics_vec256 o41 = o40;
+ e[0U] = o00;
+ e[1U] = o11;
+ e[2U] = o21;
+ e[3U] = o31;
+ e[4U] = o41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *rn = pre0 + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec256 *rn5 = pre0 + (uint32_t)15U;
+ Lib_IntVector_Intrinsics_vec256 r0 = rn[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = rn[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = rn[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = rn[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = rn[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec256 f110 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec256 f120 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec256 f130 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec256 f140 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10);
+ Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10);
+ Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10);
+ Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10);
+ Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10);
+ Lib_IntVector_Intrinsics_vec256
+ a01 =
+ Lib_IntVector_Intrinsics_vec256_add64(a0,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a11 =
+ Lib_IntVector_Intrinsics_vec256_add64(a1,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a21 =
+ Lib_IntVector_Intrinsics_vec256_add64(a2,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a31 =
+ Lib_IntVector_Intrinsics_vec256_add64(a3,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a41 =
+ Lib_IntVector_Intrinsics_vec256_add64(a4,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a02 =
+ Lib_IntVector_Intrinsics_vec256_add64(a01,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a12 =
+ Lib_IntVector_Intrinsics_vec256_add64(a11,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a22 =
+ Lib_IntVector_Intrinsics_vec256_add64(a21,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a32 =
+ Lib_IntVector_Intrinsics_vec256_add64(a31,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a42 =
+ Lib_IntVector_Intrinsics_vec256_add64(a41,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f140));
+ Lib_IntVector_Intrinsics_vec256 t01 = a04;
+ Lib_IntVector_Intrinsics_vec256 t1 = a14;
+ Lib_IntVector_Intrinsics_vec256 t2 = a24;
+ Lib_IntVector_Intrinsics_vec256 t3 = a34;
+ Lib_IntVector_Intrinsics_vec256 t4 = a44;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o01 = x02;
+ Lib_IntVector_Intrinsics_vec256 o12 = x12;
+ Lib_IntVector_Intrinsics_vec256 o22 = x21;
+ Lib_IntVector_Intrinsics_vec256 o32 = x32;
+ Lib_IntVector_Intrinsics_vec256 o42 = x42;
+ acc0[0U] = o01;
+ acc0[1U] = o12;
+ acc0[2U] = o22;
+ acc0[3U] = o32;
+ acc0[4U] = o42;
+ Lib_IntVector_Intrinsics_vec256 f100 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec256 f20 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f21 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f22 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f23 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f24 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20);
+ Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21);
+ Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22);
+ Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23);
+ Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24);
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc0, pre0);
+ }
+ uint32_t len1 = n * (uint32_t)16U - len0;
+ uint8_t *t10 = blocks + len0;
+ uint32_t nb = len1 / (uint32_t)16U;
+ uint32_t rem1 = len1 % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = t10 + i * (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec256 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *r1 = pre0;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
+ Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t01 = a06;
+ Lib_IntVector_Intrinsics_vec256 t11 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *last = t10 + nb * (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec256 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem1 * sizeof(last[0U]));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
+ e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask);
+ Lib_IntVector_Intrinsics_vec256 *r1 = pre0;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
+ Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t01 = a06;
+ Lib_IntVector_Intrinsics_vec256 t11 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, rem, r * sizeof(rem[0U]));
+ if (r > (uint32_t)0U) {
+ Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *acc = ctx;
+ Lib_IntVector_Intrinsics_vec256 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *r1 = pre;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r1[0U];
+ Lib_IntVector_Intrinsics_vec256 r11 = r1[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r1[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r1[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r1[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t0 = a06;
+ Lib_IntVector_Intrinsics_vec256 t1 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+static inline void
+poly1305_do_256(
+ uint8_t *k,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *out)
+{
+ Lib_IntVector_Intrinsics_vec256 ctx[25U];
+ for (uint32_t _i = 0U; _i < (uint32_t)25U; ++_i)
+ ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ uint8_t block[16U] = { 0U };
+ Hacl_Poly1305_256_poly1305_init(ctx, k);
+ poly1305_padded_256(ctx, aadlen, aad);
+ poly1305_padded_256(ctx, mlen, m);
+ store64_le(block, (uint64_t)aadlen);
+ store64_le(block + (uint32_t)8U, (uint64_t)mlen);
+ Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *acc = ctx;
+ Lib_IntVector_Intrinsics_vec256 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *r = pre;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t0 = a06;
+ Lib_IntVector_Intrinsics_vec256 t1 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ Hacl_Poly1305_256_poly1305_finish(out, k, ctx);
+}
+
+void
+Hacl_Chacha20Poly1305_256_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, cipher, m, k, n, (uint32_t)1U);
+ uint8_t tmp[64U] = { 0U };
+ Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_256(key, aadlen, aad, mlen, cipher, mac);
+}
+
+uint32_t
+Hacl_Chacha20Poly1305_256_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ uint8_t computed_mac[16U] = { 0U };
+ uint8_t tmp[64U] = { 0U };
+ Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_256(key, aadlen, aad, mlen, cipher, computed_mac);
+ uint8_t res = (uint8_t)255U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
+ res = uu____0 & res;
+ }
+ uint8_t z = res;
+ if (z == (uint8_t)255U) {
+ Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, m, cipher, k, n, (uint32_t)1U);
+ return (uint32_t)0U;
+ }
+ return (uint32_t)1U;
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h
new file mode 100644
index 0000000000..550cc0564c
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h
@@ -0,0 +1,60 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "libintvector.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include <stdbool.h>
+
+#ifndef __Hacl_Chacha20Poly1305_256_H
+#define __Hacl_Chacha20Poly1305_256_H
+
+#include "Hacl_Kremlib.h"
+#include "Hacl_Chacha20_Vec256.h"
+#include "Hacl_Poly1305_256.h"
+
+void
+Hacl_Chacha20Poly1305_256_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac);
+
+uint32_t
+Hacl_Chacha20Poly1305_256_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac);
+
+#define __Hacl_Chacha20Poly1305_256_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c
new file mode 100644
index 0000000000..760e3d548a
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c
@@ -0,0 +1,584 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Chacha20Poly1305_32.h"
+
+static inline void
+poly1305_padded_32(uint64_t *ctx, uint32_t len, uint8_t *text)
+{
+ uint32_t n = len / (uint32_t)16U;
+ uint32_t r = len % (uint32_t)16U;
+ uint8_t *blocks = text;
+ uint8_t *rem = text + n * (uint32_t)16U;
+ uint64_t *pre0 = ctx + (uint32_t)5U;
+ uint64_t *acc0 = ctx;
+ uint32_t nb = n * (uint32_t)16U / (uint32_t)16U;
+ uint32_t rem1 = n * (uint32_t)16U % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = blocks + i * (uint32_t)16U;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r1 = pre0;
+ uint64_t *r5 = pre0 + (uint32_t)5U;
+ uint64_t r0 = r1[0U];
+ uint64_t r11 = r1[1U];
+ uint64_t r2 = r1[2U];
+ uint64_t r3 = r1[3U];
+ uint64_t r4 = r1[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc0[0U];
+ uint64_t a1 = acc0[1U];
+ uint64_t a2 = acc0[2U];
+ uint64_t a3 = acc0[3U];
+ uint64_t a4 = acc0[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r11 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r11 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r11 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r11 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *last = blocks + nb * (uint32_t)16U;
+ uint64_t e[5U] = { 0U };
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem1 * sizeof(last[0U]));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U;
+ uint64_t mask = b;
+ uint64_t fi = e[rem1 * (uint32_t)8U / (uint32_t)26U];
+ e[rem1 * (uint32_t)8U / (uint32_t)26U] = fi | mask;
+ uint64_t *r1 = pre0;
+ uint64_t *r5 = pre0 + (uint32_t)5U;
+ uint64_t r0 = r1[0U];
+ uint64_t r11 = r1[1U];
+ uint64_t r2 = r1[2U];
+ uint64_t r3 = r1[3U];
+ uint64_t r4 = r1[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc0[0U];
+ uint64_t a1 = acc0[1U];
+ uint64_t a2 = acc0[2U];
+ uint64_t a3 = acc0[3U];
+ uint64_t a4 = acc0[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r11 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r11 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r11 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r11 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc0[0U] = o0;
+ acc0[1U] = o1;
+ acc0[2U] = o2;
+ acc0[3U] = o3;
+ acc0[4U] = o4;
+ }
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, rem, r * sizeof(rem[0U]));
+ if (r > (uint32_t)0U) {
+ uint64_t *pre = ctx + (uint32_t)5U;
+ uint64_t *acc = ctx;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r1 = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t r0 = r1[0U];
+ uint64_t r11 = r1[1U];
+ uint64_t r2 = r1[2U];
+ uint64_t r3 = r1[3U];
+ uint64_t r4 = r1[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc[0U];
+ uint64_t a1 = acc[1U];
+ uint64_t a2 = acc[2U];
+ uint64_t a3 = acc[3U];
+ uint64_t a4 = acc[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r11 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r11 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r11 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r11 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+static inline void
+poly1305_do_32(
+ uint8_t *k,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *out)
+{
+ uint64_t ctx[25U] = { 0U };
+ uint8_t block[16U] = { 0U };
+ Hacl_Poly1305_32_poly1305_init(ctx, k);
+ poly1305_padded_32(ctx, aadlen, aad);
+ poly1305_padded_32(ctx, mlen, m);
+ store64_le(block, (uint64_t)aadlen);
+ store64_le(block + (uint32_t)8U, (uint64_t)mlen);
+ uint64_t *pre = ctx + (uint32_t)5U;
+ uint64_t *acc = ctx;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t r0 = r[0U];
+ uint64_t r1 = r[1U];
+ uint64_t r2 = r[2U];
+ uint64_t r3 = r[3U];
+ uint64_t r4 = r[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc[0U];
+ uint64_t a1 = acc[1U];
+ uint64_t a2 = acc[2U];
+ uint64_t a3 = acc[3U];
+ uint64_t a4 = acc[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r1 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r1 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r1 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r1 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ Hacl_Poly1305_32_poly1305_finish(out, k, ctx);
+}
+
+void
+Hacl_Chacha20Poly1305_32_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ Hacl_Chacha20_chacha20_encrypt(mlen, cipher, m, k, n, (uint32_t)1U);
+ uint8_t tmp[64U] = { 0U };
+ Hacl_Chacha20_chacha20_encrypt((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_32(key, aadlen, aad, mlen, cipher, mac);
+}
+
+uint32_t
+Hacl_Chacha20Poly1305_32_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac)
+{
+ uint8_t computed_mac[16U] = { 0U };
+ uint8_t tmp[64U] = { 0U };
+ Hacl_Chacha20_chacha20_encrypt((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U);
+ uint8_t *key = tmp;
+ poly1305_do_32(key, aadlen, aad, mlen, cipher, computed_mac);
+ uint8_t res = (uint8_t)255U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]);
+ res = uu____0 & res;
+ }
+ uint8_t z = res;
+ if (z == (uint8_t)255U) {
+ Hacl_Chacha20_chacha20_encrypt(mlen, m, cipher, k, n, (uint32_t)1U);
+ return (uint32_t)0U;
+ }
+ return (uint32_t)1U;
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h
new file mode 100644
index 0000000000..615f3f96d7
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h
@@ -0,0 +1,59 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include <stdbool.h>
+
+#ifndef __Hacl_Chacha20Poly1305_32_H
+#define __Hacl_Chacha20Poly1305_32_H
+
+#include "Hacl_Chacha20.h"
+#include "Hacl_Kremlib.h"
+#include "Hacl_Poly1305_32.h"
+
+void
+Hacl_Chacha20Poly1305_32_aead_encrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac);
+
+uint32_t
+Hacl_Chacha20Poly1305_32_aead_decrypt(
+ uint8_t *k,
+ uint8_t *n,
+ uint32_t aadlen,
+ uint8_t *aad,
+ uint32_t mlen,
+ uint8_t *m,
+ uint8_t *cipher,
+ uint8_t *mac);
+
+#define __Hacl_Chacha20Poly1305_32_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c
new file mode 100644
index 0000000000..cf80c431de
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c
@@ -0,0 +1,744 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Chacha20_Vec128.h"
+
+static inline void
+double_round_128(Lib_IntVector_Intrinsics_vec128 *st)
+{
+ st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[4U]);
+ Lib_IntVector_Intrinsics_vec128 std = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[0U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std, (uint32_t)16U);
+ st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[12U]);
+ Lib_IntVector_Intrinsics_vec128 std0 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[8U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std0, (uint32_t)12U);
+ st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[4U]);
+ Lib_IntVector_Intrinsics_vec128 std1 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[0U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std1, (uint32_t)8U);
+ st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[12U]);
+ Lib_IntVector_Intrinsics_vec128 std2 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[8U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std2, (uint32_t)7U);
+ st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[5U]);
+ Lib_IntVector_Intrinsics_vec128 std3 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[1U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std3, (uint32_t)16U);
+ st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[13U]);
+ Lib_IntVector_Intrinsics_vec128 std4 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[9U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std4, (uint32_t)12U);
+ st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[5U]);
+ Lib_IntVector_Intrinsics_vec128 std5 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[1U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std5, (uint32_t)8U);
+ st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[13U]);
+ Lib_IntVector_Intrinsics_vec128 std6 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[9U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std6, (uint32_t)7U);
+ st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[6U]);
+ Lib_IntVector_Intrinsics_vec128 std7 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[2U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std7, (uint32_t)16U);
+ st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[14U]);
+ Lib_IntVector_Intrinsics_vec128 std8 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[10U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std8, (uint32_t)12U);
+ st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[6U]);
+ Lib_IntVector_Intrinsics_vec128 std9 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[2U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std9, (uint32_t)8U);
+ st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[14U]);
+ Lib_IntVector_Intrinsics_vec128 std10 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[10U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std10, (uint32_t)7U);
+ st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[7U]);
+ Lib_IntVector_Intrinsics_vec128 std11 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[3U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std11, (uint32_t)16U);
+ st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[15U]);
+ Lib_IntVector_Intrinsics_vec128 std12 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[11U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std12, (uint32_t)12U);
+ st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[7U]);
+ Lib_IntVector_Intrinsics_vec128 std13 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[3U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std13, (uint32_t)8U);
+ st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[15U]);
+ Lib_IntVector_Intrinsics_vec128 std14 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[11U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std14, (uint32_t)7U);
+ st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[5U]);
+ Lib_IntVector_Intrinsics_vec128 std15 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[0U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std15, (uint32_t)16U);
+ st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[15U]);
+ Lib_IntVector_Intrinsics_vec128 std16 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[10U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std16, (uint32_t)12U);
+ st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[5U]);
+ Lib_IntVector_Intrinsics_vec128 std17 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[0U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std17, (uint32_t)8U);
+ st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[15U]);
+ Lib_IntVector_Intrinsics_vec128 std18 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[10U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std18, (uint32_t)7U);
+ st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[6U]);
+ Lib_IntVector_Intrinsics_vec128 std19 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[1U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std19, (uint32_t)16U);
+ st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[12U]);
+ Lib_IntVector_Intrinsics_vec128 std20 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[11U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std20, (uint32_t)12U);
+ st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[6U]);
+ Lib_IntVector_Intrinsics_vec128 std21 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[1U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std21, (uint32_t)8U);
+ st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[12U]);
+ Lib_IntVector_Intrinsics_vec128 std22 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[11U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std22, (uint32_t)7U);
+ st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[7U]);
+ Lib_IntVector_Intrinsics_vec128 std23 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[2U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std23, (uint32_t)16U);
+ st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[13U]);
+ Lib_IntVector_Intrinsics_vec128 std24 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[8U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std24, (uint32_t)12U);
+ st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[7U]);
+ Lib_IntVector_Intrinsics_vec128 std25 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[2U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std25, (uint32_t)8U);
+ st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[13U]);
+ Lib_IntVector_Intrinsics_vec128 std26 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[8U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std26, (uint32_t)7U);
+ st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[4U]);
+ Lib_IntVector_Intrinsics_vec128 std27 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[3U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std27, (uint32_t)16U);
+ st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[14U]);
+ Lib_IntVector_Intrinsics_vec128 std28 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[9U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std28, (uint32_t)12U);
+ st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[4U]);
+ Lib_IntVector_Intrinsics_vec128 std29 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[3U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std29, (uint32_t)8U);
+ st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[14U]);
+ Lib_IntVector_Intrinsics_vec128 std30 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[9U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std30, (uint32_t)7U);
+}
+
+static inline void
+chacha20_core_128(
+ Lib_IntVector_Intrinsics_vec128 *k,
+ Lib_IntVector_Intrinsics_vec128 *ctx,
+ uint32_t ctr)
+{
+ memcpy(k, ctx, (uint32_t)16U * sizeof(ctx[0U]));
+ uint32_t ctr_u32 = (uint32_t)4U * ctr;
+ Lib_IntVector_Intrinsics_vec128 cv = Lib_IntVector_Intrinsics_vec128_load32(ctr_u32);
+ k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ double_round_128(k);
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ Lib_IntVector_Intrinsics_vec128 *os = k;
+ Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_add32(k[i], ctx[i]);
+ os[i] = x;
+ }
+ k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv);
+}
+
+static inline void
+chacha20_init_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *k, uint8_t *n, uint32_t ctr)
+{
+ uint32_t ctx1[16U] = { 0U };
+ uint32_t *uu____0 = ctx1;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) {
+ uint32_t *os = uu____0;
+ uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
+ os[i] = x;
+ }
+ uint32_t *uu____1 = ctx1 + (uint32_t)4U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)8U; i++) {
+ uint32_t *os = uu____1;
+ uint8_t *bj = k + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;
+ }
+ ctx1[12U] = ctr;
+ uint32_t *uu____2 = ctx1 + (uint32_t)13U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i++) {
+ uint32_t *os = uu____2;
+ uint8_t *bj = n + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;
+ }
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ Lib_IntVector_Intrinsics_vec128 *os = ctx;
+ uint32_t x = ctx1[i];
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_load32(x);
+ os[i] = x0;
+ }
+ Lib_IntVector_Intrinsics_vec128
+ ctr1 =
+ Lib_IntVector_Intrinsics_vec128_load32s((uint32_t)0U,
+ (uint32_t)1U,
+ (uint32_t)2U,
+ (uint32_t)3U);
+ Lib_IntVector_Intrinsics_vec128 c12 = ctx[12U];
+ ctx[12U] = Lib_IntVector_Intrinsics_vec128_add32(c12, ctr1);
+}
+
+void
+Hacl_Chacha20_Vec128_chacha20_encrypt_128(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *text,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr)
+{
+ Lib_IntVector_Intrinsics_vec128 ctx[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ chacha20_init_128(ctx, key, n, ctr);
+ uint32_t rem = len % (uint32_t)256U;
+ uint32_t nb = len / (uint32_t)256U;
+ uint32_t rem1 = len % (uint32_t)256U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *uu____0 = out + i * (uint32_t)256U;
+ uint8_t *uu____1 = text + i * (uint32_t)256U;
+ Lib_IntVector_Intrinsics_vec128 k[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ k[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ chacha20_core_128(k, ctx, i);
+ Lib_IntVector_Intrinsics_vec128 v00 = k[0U];
+ Lib_IntVector_Intrinsics_vec128 v16 = k[1U];
+ Lib_IntVector_Intrinsics_vec128 v20 = k[2U];
+ Lib_IntVector_Intrinsics_vec128 v30 = k[3U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v00, v16);
+ Lib_IntVector_Intrinsics_vec128
+ v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v00, v16);
+ Lib_IntVector_Intrinsics_vec128
+ v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v20, v30);
+ Lib_IntVector_Intrinsics_vec128
+ v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v20, v30);
+ Lib_IntVector_Intrinsics_vec128
+ v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128
+ v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128 v0 = v0__;
+ Lib_IntVector_Intrinsics_vec128 v1 = v1__;
+ Lib_IntVector_Intrinsics_vec128 v2 = v2__;
+ Lib_IntVector_Intrinsics_vec128 v3 = v3__;
+ Lib_IntVector_Intrinsics_vec128 v010 = k[4U];
+ Lib_IntVector_Intrinsics_vec128 v110 = k[5U];
+ Lib_IntVector_Intrinsics_vec128 v210 = k[6U];
+ Lib_IntVector_Intrinsics_vec128 v310 = k[7U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v010, v110);
+ Lib_IntVector_Intrinsics_vec128
+ v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v010, v110);
+ Lib_IntVector_Intrinsics_vec128
+ v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v210, v310);
+ Lib_IntVector_Intrinsics_vec128
+ v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v210, v310);
+ Lib_IntVector_Intrinsics_vec128
+ v0__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v1__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v2__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128
+ v3__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128 v4 = v0__0;
+ Lib_IntVector_Intrinsics_vec128 v5 = v1__0;
+ Lib_IntVector_Intrinsics_vec128 v6 = v2__0;
+ Lib_IntVector_Intrinsics_vec128 v7 = v3__0;
+ Lib_IntVector_Intrinsics_vec128 v011 = k[8U];
+ Lib_IntVector_Intrinsics_vec128 v111 = k[9U];
+ Lib_IntVector_Intrinsics_vec128 v211 = k[10U];
+ Lib_IntVector_Intrinsics_vec128 v311 = k[11U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v011, v111);
+ Lib_IntVector_Intrinsics_vec128
+ v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v011, v111);
+ Lib_IntVector_Intrinsics_vec128
+ v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v211, v311);
+ Lib_IntVector_Intrinsics_vec128
+ v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v211, v311);
+ Lib_IntVector_Intrinsics_vec128
+ v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128
+ v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128 v8 = v0__1;
+ Lib_IntVector_Intrinsics_vec128 v9 = v1__1;
+ Lib_IntVector_Intrinsics_vec128 v10 = v2__1;
+ Lib_IntVector_Intrinsics_vec128 v11 = v3__1;
+ Lib_IntVector_Intrinsics_vec128 v01 = k[12U];
+ Lib_IntVector_Intrinsics_vec128 v120 = k[13U];
+ Lib_IntVector_Intrinsics_vec128 v21 = k[14U];
+ Lib_IntVector_Intrinsics_vec128 v31 = k[15U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v01, v120);
+ Lib_IntVector_Intrinsics_vec128
+ v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v01, v120);
+ Lib_IntVector_Intrinsics_vec128
+ v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v21, v31);
+ Lib_IntVector_Intrinsics_vec128
+ v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v21, v31);
+ Lib_IntVector_Intrinsics_vec128
+ v0__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v1__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v2__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128
+ v3__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128 v12 = v0__2;
+ Lib_IntVector_Intrinsics_vec128 v13 = v1__2;
+ Lib_IntVector_Intrinsics_vec128 v14 = v2__2;
+ Lib_IntVector_Intrinsics_vec128 v15 = v3__2;
+ k[0U] = v0;
+ k[1U] = v4;
+ k[2U] = v8;
+ k[3U] = v12;
+ k[4U] = v1;
+ k[5U] = v5;
+ k[6U] = v9;
+ k[7U] = v13;
+ k[8U] = v2;
+ k[9U] = v6;
+ k[10U] = v10;
+ k[11U] = v14;
+ k[12U] = v3;
+ k[13U] = v7;
+ k[14U] = v11;
+ k[15U] = v15;
+ for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) {
+ Lib_IntVector_Intrinsics_vec128
+ x = Lib_IntVector_Intrinsics_vec128_load_le(uu____1 + i0 * (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]);
+ Lib_IntVector_Intrinsics_vec128_store_le(uu____0 + i0 * (uint32_t)16U, y);
+ }
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *uu____2 = out + nb * (uint32_t)256U;
+ uint8_t *uu____3 = text + nb * (uint32_t)256U;
+ uint8_t plain[256U] = { 0U };
+ memcpy(plain, uu____3, rem * sizeof(uu____3[0U]));
+ Lib_IntVector_Intrinsics_vec128 k[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ k[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ chacha20_core_128(k, ctx, nb);
+ Lib_IntVector_Intrinsics_vec128 v00 = k[0U];
+ Lib_IntVector_Intrinsics_vec128 v16 = k[1U];
+ Lib_IntVector_Intrinsics_vec128 v20 = k[2U];
+ Lib_IntVector_Intrinsics_vec128 v30 = k[3U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v00, v16);
+ Lib_IntVector_Intrinsics_vec128
+ v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v00, v16);
+ Lib_IntVector_Intrinsics_vec128
+ v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v20, v30);
+ Lib_IntVector_Intrinsics_vec128
+ v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v20, v30);
+ Lib_IntVector_Intrinsics_vec128
+ v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128
+ v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128 v0 = v0__;
+ Lib_IntVector_Intrinsics_vec128 v1 = v1__;
+ Lib_IntVector_Intrinsics_vec128 v2 = v2__;
+ Lib_IntVector_Intrinsics_vec128 v3 = v3__;
+ Lib_IntVector_Intrinsics_vec128 v010 = k[4U];
+ Lib_IntVector_Intrinsics_vec128 v110 = k[5U];
+ Lib_IntVector_Intrinsics_vec128 v210 = k[6U];
+ Lib_IntVector_Intrinsics_vec128 v310 = k[7U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v010, v110);
+ Lib_IntVector_Intrinsics_vec128
+ v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v010, v110);
+ Lib_IntVector_Intrinsics_vec128
+ v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v210, v310);
+ Lib_IntVector_Intrinsics_vec128
+ v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v210, v310);
+ Lib_IntVector_Intrinsics_vec128
+ v0__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v1__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v2__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128
+ v3__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128 v4 = v0__0;
+ Lib_IntVector_Intrinsics_vec128 v5 = v1__0;
+ Lib_IntVector_Intrinsics_vec128 v6 = v2__0;
+ Lib_IntVector_Intrinsics_vec128 v7 = v3__0;
+ Lib_IntVector_Intrinsics_vec128 v011 = k[8U];
+ Lib_IntVector_Intrinsics_vec128 v111 = k[9U];
+ Lib_IntVector_Intrinsics_vec128 v211 = k[10U];
+ Lib_IntVector_Intrinsics_vec128 v311 = k[11U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v011, v111);
+ Lib_IntVector_Intrinsics_vec128
+ v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v011, v111);
+ Lib_IntVector_Intrinsics_vec128
+ v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v211, v311);
+ Lib_IntVector_Intrinsics_vec128
+ v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v211, v311);
+ Lib_IntVector_Intrinsics_vec128
+ v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128
+ v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128 v8 = v0__1;
+ Lib_IntVector_Intrinsics_vec128 v9 = v1__1;
+ Lib_IntVector_Intrinsics_vec128 v10 = v2__1;
+ Lib_IntVector_Intrinsics_vec128 v11 = v3__1;
+ Lib_IntVector_Intrinsics_vec128 v01 = k[12U];
+ Lib_IntVector_Intrinsics_vec128 v120 = k[13U];
+ Lib_IntVector_Intrinsics_vec128 v21 = k[14U];
+ Lib_IntVector_Intrinsics_vec128 v31 = k[15U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v01, v120);
+ Lib_IntVector_Intrinsics_vec128
+ v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v01, v120);
+ Lib_IntVector_Intrinsics_vec128
+ v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v21, v31);
+ Lib_IntVector_Intrinsics_vec128
+ v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v21, v31);
+ Lib_IntVector_Intrinsics_vec128
+ v0__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v1__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v2__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128
+ v3__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128 v12 = v0__2;
+ Lib_IntVector_Intrinsics_vec128 v13 = v1__2;
+ Lib_IntVector_Intrinsics_vec128 v14 = v2__2;
+ Lib_IntVector_Intrinsics_vec128 v15 = v3__2;
+ k[0U] = v0;
+ k[1U] = v4;
+ k[2U] = v8;
+ k[3U] = v12;
+ k[4U] = v1;
+ k[5U] = v5;
+ k[6U] = v9;
+ k[7U] = v13;
+ k[8U] = v2;
+ k[9U] = v6;
+ k[10U] = v10;
+ k[11U] = v14;
+ k[12U] = v3;
+ k[13U] = v7;
+ k[14U] = v11;
+ k[15U] = v15;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ Lib_IntVector_Intrinsics_vec128
+ x = Lib_IntVector_Intrinsics_vec128_load_le(plain + i * (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]);
+ Lib_IntVector_Intrinsics_vec128_store_le(plain + i * (uint32_t)16U, y);
+ }
+ memcpy(uu____2, plain, rem * sizeof(plain[0U]));
+ }
+}
+
+void
+Hacl_Chacha20_Vec128_chacha20_decrypt_128(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *cipher,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr)
+{
+ Lib_IntVector_Intrinsics_vec128 ctx[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ chacha20_init_128(ctx, key, n, ctr);
+ uint32_t rem = len % (uint32_t)256U;
+ uint32_t nb = len / (uint32_t)256U;
+ uint32_t rem1 = len % (uint32_t)256U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *uu____0 = out + i * (uint32_t)256U;
+ uint8_t *uu____1 = cipher + i * (uint32_t)256U;
+ Lib_IntVector_Intrinsics_vec128 k[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ k[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ chacha20_core_128(k, ctx, i);
+ Lib_IntVector_Intrinsics_vec128 v00 = k[0U];
+ Lib_IntVector_Intrinsics_vec128 v16 = k[1U];
+ Lib_IntVector_Intrinsics_vec128 v20 = k[2U];
+ Lib_IntVector_Intrinsics_vec128 v30 = k[3U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v00, v16);
+ Lib_IntVector_Intrinsics_vec128
+ v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v00, v16);
+ Lib_IntVector_Intrinsics_vec128
+ v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v20, v30);
+ Lib_IntVector_Intrinsics_vec128
+ v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v20, v30);
+ Lib_IntVector_Intrinsics_vec128
+ v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128
+ v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128 v0 = v0__;
+ Lib_IntVector_Intrinsics_vec128 v1 = v1__;
+ Lib_IntVector_Intrinsics_vec128 v2 = v2__;
+ Lib_IntVector_Intrinsics_vec128 v3 = v3__;
+ Lib_IntVector_Intrinsics_vec128 v010 = k[4U];
+ Lib_IntVector_Intrinsics_vec128 v110 = k[5U];
+ Lib_IntVector_Intrinsics_vec128 v210 = k[6U];
+ Lib_IntVector_Intrinsics_vec128 v310 = k[7U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v010, v110);
+ Lib_IntVector_Intrinsics_vec128
+ v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v010, v110);
+ Lib_IntVector_Intrinsics_vec128
+ v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v210, v310);
+ Lib_IntVector_Intrinsics_vec128
+ v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v210, v310);
+ Lib_IntVector_Intrinsics_vec128
+ v0__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v1__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v2__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128
+ v3__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128 v4 = v0__0;
+ Lib_IntVector_Intrinsics_vec128 v5 = v1__0;
+ Lib_IntVector_Intrinsics_vec128 v6 = v2__0;
+ Lib_IntVector_Intrinsics_vec128 v7 = v3__0;
+ Lib_IntVector_Intrinsics_vec128 v011 = k[8U];
+ Lib_IntVector_Intrinsics_vec128 v111 = k[9U];
+ Lib_IntVector_Intrinsics_vec128 v211 = k[10U];
+ Lib_IntVector_Intrinsics_vec128 v311 = k[11U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v011, v111);
+ Lib_IntVector_Intrinsics_vec128
+ v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v011, v111);
+ Lib_IntVector_Intrinsics_vec128
+ v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v211, v311);
+ Lib_IntVector_Intrinsics_vec128
+ v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v211, v311);
+ Lib_IntVector_Intrinsics_vec128
+ v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128
+ v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128 v8 = v0__1;
+ Lib_IntVector_Intrinsics_vec128 v9 = v1__1;
+ Lib_IntVector_Intrinsics_vec128 v10 = v2__1;
+ Lib_IntVector_Intrinsics_vec128 v11 = v3__1;
+ Lib_IntVector_Intrinsics_vec128 v01 = k[12U];
+ Lib_IntVector_Intrinsics_vec128 v120 = k[13U];
+ Lib_IntVector_Intrinsics_vec128 v21 = k[14U];
+ Lib_IntVector_Intrinsics_vec128 v31 = k[15U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v01, v120);
+ Lib_IntVector_Intrinsics_vec128
+ v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v01, v120);
+ Lib_IntVector_Intrinsics_vec128
+ v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v21, v31);
+ Lib_IntVector_Intrinsics_vec128
+ v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v21, v31);
+ Lib_IntVector_Intrinsics_vec128
+ v0__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v1__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v2__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128
+ v3__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128 v12 = v0__2;
+ Lib_IntVector_Intrinsics_vec128 v13 = v1__2;
+ Lib_IntVector_Intrinsics_vec128 v14 = v2__2;
+ Lib_IntVector_Intrinsics_vec128 v15 = v3__2;
+ k[0U] = v0;
+ k[1U] = v4;
+ k[2U] = v8;
+ k[3U] = v12;
+ k[4U] = v1;
+ k[5U] = v5;
+ k[6U] = v9;
+ k[7U] = v13;
+ k[8U] = v2;
+ k[9U] = v6;
+ k[10U] = v10;
+ k[11U] = v14;
+ k[12U] = v3;
+ k[13U] = v7;
+ k[14U] = v11;
+ k[15U] = v15;
+ for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) {
+ Lib_IntVector_Intrinsics_vec128
+ x = Lib_IntVector_Intrinsics_vec128_load_le(uu____1 + i0 * (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]);
+ Lib_IntVector_Intrinsics_vec128_store_le(uu____0 + i0 * (uint32_t)16U, y);
+ }
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *uu____2 = out + nb * (uint32_t)256U;
+ uint8_t *uu____3 = cipher + nb * (uint32_t)256U;
+ uint8_t plain[256U] = { 0U };
+ memcpy(plain, uu____3, rem * sizeof(uu____3[0U]));
+ Lib_IntVector_Intrinsics_vec128 k[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ k[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ chacha20_core_128(k, ctx, nb);
+ Lib_IntVector_Intrinsics_vec128 v00 = k[0U];
+ Lib_IntVector_Intrinsics_vec128 v16 = k[1U];
+ Lib_IntVector_Intrinsics_vec128 v20 = k[2U];
+ Lib_IntVector_Intrinsics_vec128 v30 = k[3U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v00, v16);
+ Lib_IntVector_Intrinsics_vec128
+ v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v00, v16);
+ Lib_IntVector_Intrinsics_vec128
+ v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v20, v30);
+ Lib_IntVector_Intrinsics_vec128
+ v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v20, v30);
+ Lib_IntVector_Intrinsics_vec128
+ v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec128
+ v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128
+ v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec128 v0 = v0__;
+ Lib_IntVector_Intrinsics_vec128 v1 = v1__;
+ Lib_IntVector_Intrinsics_vec128 v2 = v2__;
+ Lib_IntVector_Intrinsics_vec128 v3 = v3__;
+ Lib_IntVector_Intrinsics_vec128 v010 = k[4U];
+ Lib_IntVector_Intrinsics_vec128 v110 = k[5U];
+ Lib_IntVector_Intrinsics_vec128 v210 = k[6U];
+ Lib_IntVector_Intrinsics_vec128 v310 = k[7U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v010, v110);
+ Lib_IntVector_Intrinsics_vec128
+ v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v010, v110);
+ Lib_IntVector_Intrinsics_vec128
+ v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v210, v310);
+ Lib_IntVector_Intrinsics_vec128
+ v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v210, v310);
+ Lib_IntVector_Intrinsics_vec128
+ v0__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v1__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec128
+ v2__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128
+ v3__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec128 v4 = v0__0;
+ Lib_IntVector_Intrinsics_vec128 v5 = v1__0;
+ Lib_IntVector_Intrinsics_vec128 v6 = v2__0;
+ Lib_IntVector_Intrinsics_vec128 v7 = v3__0;
+ Lib_IntVector_Intrinsics_vec128 v011 = k[8U];
+ Lib_IntVector_Intrinsics_vec128 v111 = k[9U];
+ Lib_IntVector_Intrinsics_vec128 v211 = k[10U];
+ Lib_IntVector_Intrinsics_vec128 v311 = k[11U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v011, v111);
+ Lib_IntVector_Intrinsics_vec128
+ v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v011, v111);
+ Lib_IntVector_Intrinsics_vec128
+ v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v211, v311);
+ Lib_IntVector_Intrinsics_vec128
+ v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v211, v311);
+ Lib_IntVector_Intrinsics_vec128
+ v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1);
+ Lib_IntVector_Intrinsics_vec128
+ v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128
+ v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1);
+ Lib_IntVector_Intrinsics_vec128 v8 = v0__1;
+ Lib_IntVector_Intrinsics_vec128 v9 = v1__1;
+ Lib_IntVector_Intrinsics_vec128 v10 = v2__1;
+ Lib_IntVector_Intrinsics_vec128 v11 = v3__1;
+ Lib_IntVector_Intrinsics_vec128 v01 = k[12U];
+ Lib_IntVector_Intrinsics_vec128 v120 = k[13U];
+ Lib_IntVector_Intrinsics_vec128 v21 = k[14U];
+ Lib_IntVector_Intrinsics_vec128 v31 = k[15U];
+ Lib_IntVector_Intrinsics_vec128
+ v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v01, v120);
+ Lib_IntVector_Intrinsics_vec128
+ v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v01, v120);
+ Lib_IntVector_Intrinsics_vec128
+ v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v21, v31);
+ Lib_IntVector_Intrinsics_vec128
+ v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v21, v31);
+ Lib_IntVector_Intrinsics_vec128
+ v0__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v1__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2);
+ Lib_IntVector_Intrinsics_vec128
+ v2__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128
+ v3__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2);
+ Lib_IntVector_Intrinsics_vec128 v12 = v0__2;
+ Lib_IntVector_Intrinsics_vec128 v13 = v1__2;
+ Lib_IntVector_Intrinsics_vec128 v14 = v2__2;
+ Lib_IntVector_Intrinsics_vec128 v15 = v3__2;
+ k[0U] = v0;
+ k[1U] = v4;
+ k[2U] = v8;
+ k[3U] = v12;
+ k[4U] = v1;
+ k[5U] = v5;
+ k[6U] = v9;
+ k[7U] = v13;
+ k[8U] = v2;
+ k[9U] = v6;
+ k[10U] = v10;
+ k[11U] = v14;
+ k[12U] = v3;
+ k[13U] = v7;
+ k[14U] = v11;
+ k[15U] = v15;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ Lib_IntVector_Intrinsics_vec128
+ x = Lib_IntVector_Intrinsics_vec128_load_le(plain + i * (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]);
+ Lib_IntVector_Intrinsics_vec128_store_le(plain + i * (uint32_t)16U, y);
+ }
+ memcpy(uu____2, plain, rem * sizeof(plain[0U]));
+ }
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h
new file mode 100644
index 0000000000..dc59ba1c7a
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h
@@ -0,0 +1,55 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "libintvector.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include <stdbool.h>
+
+#ifndef __Hacl_Chacha20_Vec128_H
+#define __Hacl_Chacha20_Vec128_H
+
+#include "Hacl_Chacha20.h"
+#include "Hacl_Kremlib.h"
+
+void
+Hacl_Chacha20_Vec128_chacha20_encrypt_128(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *text,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr);
+
+void
+Hacl_Chacha20_Vec128_chacha20_decrypt_128(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *cipher,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr);
+
+#define __Hacl_Chacha20_Vec128_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c
new file mode 100644
index 0000000000..49902c1304
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c
@@ -0,0 +1,876 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Chacha20_Vec256.h"
+
+static inline void
+double_round_256(Lib_IntVector_Intrinsics_vec256 *st)
+{
+ st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[4U]);
+ Lib_IntVector_Intrinsics_vec256 std = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[0U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std, (uint32_t)16U);
+ st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[12U]);
+ Lib_IntVector_Intrinsics_vec256 std0 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[8U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std0, (uint32_t)12U);
+ st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[4U]);
+ Lib_IntVector_Intrinsics_vec256 std1 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[0U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std1, (uint32_t)8U);
+ st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[12U]);
+ Lib_IntVector_Intrinsics_vec256 std2 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[8U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std2, (uint32_t)7U);
+ st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[5U]);
+ Lib_IntVector_Intrinsics_vec256 std3 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[1U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std3, (uint32_t)16U);
+ st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[13U]);
+ Lib_IntVector_Intrinsics_vec256 std4 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[9U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std4, (uint32_t)12U);
+ st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[5U]);
+ Lib_IntVector_Intrinsics_vec256 std5 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[1U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std5, (uint32_t)8U);
+ st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[13U]);
+ Lib_IntVector_Intrinsics_vec256 std6 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[9U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std6, (uint32_t)7U);
+ st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[6U]);
+ Lib_IntVector_Intrinsics_vec256 std7 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[2U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std7, (uint32_t)16U);
+ st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[14U]);
+ Lib_IntVector_Intrinsics_vec256 std8 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[10U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std8, (uint32_t)12U);
+ st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[6U]);
+ Lib_IntVector_Intrinsics_vec256 std9 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[2U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std9, (uint32_t)8U);
+ st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[14U]);
+ Lib_IntVector_Intrinsics_vec256 std10 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[10U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std10, (uint32_t)7U);
+ st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[7U]);
+ Lib_IntVector_Intrinsics_vec256 std11 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[3U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std11, (uint32_t)16U);
+ st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[15U]);
+ Lib_IntVector_Intrinsics_vec256 std12 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[11U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std12, (uint32_t)12U);
+ st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[7U]);
+ Lib_IntVector_Intrinsics_vec256 std13 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[3U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std13, (uint32_t)8U);
+ st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[15U]);
+ Lib_IntVector_Intrinsics_vec256 std14 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[11U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std14, (uint32_t)7U);
+ st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[5U]);
+ Lib_IntVector_Intrinsics_vec256 std15 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[0U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std15, (uint32_t)16U);
+ st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[15U]);
+ Lib_IntVector_Intrinsics_vec256 std16 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[10U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std16, (uint32_t)12U);
+ st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[5U]);
+ Lib_IntVector_Intrinsics_vec256 std17 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[0U]);
+ st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std17, (uint32_t)8U);
+ st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[15U]);
+ Lib_IntVector_Intrinsics_vec256 std18 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[10U]);
+ st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std18, (uint32_t)7U);
+ st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[6U]);
+ Lib_IntVector_Intrinsics_vec256 std19 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[1U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std19, (uint32_t)16U);
+ st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[12U]);
+ Lib_IntVector_Intrinsics_vec256 std20 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[11U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std20, (uint32_t)12U);
+ st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[6U]);
+ Lib_IntVector_Intrinsics_vec256 std21 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[1U]);
+ st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std21, (uint32_t)8U);
+ st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[12U]);
+ Lib_IntVector_Intrinsics_vec256 std22 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[11U]);
+ st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std22, (uint32_t)7U);
+ st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[7U]);
+ Lib_IntVector_Intrinsics_vec256 std23 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[2U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std23, (uint32_t)16U);
+ st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[13U]);
+ Lib_IntVector_Intrinsics_vec256 std24 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[8U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std24, (uint32_t)12U);
+ st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[7U]);
+ Lib_IntVector_Intrinsics_vec256 std25 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[2U]);
+ st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std25, (uint32_t)8U);
+ st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[13U]);
+ Lib_IntVector_Intrinsics_vec256 std26 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[8U]);
+ st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std26, (uint32_t)7U);
+ st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[4U]);
+ Lib_IntVector_Intrinsics_vec256 std27 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[3U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std27, (uint32_t)16U);
+ st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[14U]);
+ Lib_IntVector_Intrinsics_vec256 std28 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[9U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std28, (uint32_t)12U);
+ st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[4U]);
+ Lib_IntVector_Intrinsics_vec256 std29 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[3U]);
+ st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std29, (uint32_t)8U);
+ st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[14U]);
+ Lib_IntVector_Intrinsics_vec256 std30 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[9U]);
+ st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std30, (uint32_t)7U);
+}
+
+static inline void
+chacha20_core_256(
+ Lib_IntVector_Intrinsics_vec256 *k,
+ Lib_IntVector_Intrinsics_vec256 *ctx,
+ uint32_t ctr)
+{
+ memcpy(k, ctx, (uint32_t)16U * sizeof(ctx[0U]));
+ uint32_t ctr_u32 = (uint32_t)8U * ctr;
+ Lib_IntVector_Intrinsics_vec256 cv = Lib_IntVector_Intrinsics_vec256_load32(ctr_u32);
+ k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ double_round_256(k);
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ Lib_IntVector_Intrinsics_vec256 *os = k;
+ Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_add32(k[i], ctx[i]);
+ os[i] = x;
+ }
+ k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv);
+}
+
+static inline void
+chacha20_init_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *k, uint8_t *n, uint32_t ctr)
+{
+ uint32_t ctx1[16U] = { 0U };
+ uint32_t *uu____0 = ctx1;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) {
+ uint32_t *os = uu____0;
+ uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i];
+ os[i] = x;
+ }
+ uint32_t *uu____1 = ctx1 + (uint32_t)4U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)8U; i++) {
+ uint32_t *os = uu____1;
+ uint8_t *bj = k + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;
+ }
+ ctx1[12U] = ctr;
+ uint32_t *uu____2 = ctx1 + (uint32_t)13U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i++) {
+ uint32_t *os = uu____2;
+ uint8_t *bj = n + i * (uint32_t)4U;
+ uint32_t u = load32_le(bj);
+ uint32_t r = u;
+ uint32_t x = r;
+ os[i] = x;
+ }
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ Lib_IntVector_Intrinsics_vec256 *os = ctx;
+ uint32_t x = ctx1[i];
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_load32(x);
+ os[i] = x0;
+ }
+ Lib_IntVector_Intrinsics_vec256
+ ctr1 =
+ Lib_IntVector_Intrinsics_vec256_load32s((uint32_t)0U,
+ (uint32_t)1U,
+ (uint32_t)2U,
+ (uint32_t)3U,
+ (uint32_t)4U,
+ (uint32_t)5U,
+ (uint32_t)6U,
+ (uint32_t)7U);
+ Lib_IntVector_Intrinsics_vec256 c12 = ctx[12U];
+ ctx[12U] = Lib_IntVector_Intrinsics_vec256_add32(c12, ctr1);
+}
+
+void
+Hacl_Chacha20_Vec256_chacha20_encrypt_256(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *text,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr)
+{
+ Lib_IntVector_Intrinsics_vec256 ctx[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ chacha20_init_256(ctx, key, n, ctr);
+ uint32_t rem = len % (uint32_t)512U;
+ uint32_t nb = len / (uint32_t)512U;
+ uint32_t rem1 = len % (uint32_t)512U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *uu____0 = out + i * (uint32_t)512U;
+ uint8_t *uu____1 = text + i * (uint32_t)512U;
+ Lib_IntVector_Intrinsics_vec256 k[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ k[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ chacha20_core_256(k, ctx, i);
+ Lib_IntVector_Intrinsics_vec256 v00 = k[0U];
+ Lib_IntVector_Intrinsics_vec256 v16 = k[1U];
+ Lib_IntVector_Intrinsics_vec256 v20 = k[2U];
+ Lib_IntVector_Intrinsics_vec256 v30 = k[3U];
+ Lib_IntVector_Intrinsics_vec256 v40 = k[4U];
+ Lib_IntVector_Intrinsics_vec256 v50 = k[5U];
+ Lib_IntVector_Intrinsics_vec256 v60 = k[6U];
+ Lib_IntVector_Intrinsics_vec256 v70 = k[7U];
+ Lib_IntVector_Intrinsics_vec256
+ v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256
+ v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256
+ v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec256
+ v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec256
+ v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec256
+ v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec256
+ v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_);
+ Lib_IntVector_Intrinsics_vec256
+ v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_);
+ Lib_IntVector_Intrinsics_vec256
+ v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_);
+ Lib_IntVector_Intrinsics_vec256
+ v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_);
+ Lib_IntVector_Intrinsics_vec256
+ v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__);
+ Lib_IntVector_Intrinsics_vec256
+ v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__);
+ Lib_IntVector_Intrinsics_vec256
+ v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__);
+ Lib_IntVector_Intrinsics_vec256
+ v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__);
+ Lib_IntVector_Intrinsics_vec256
+ v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__);
+ Lib_IntVector_Intrinsics_vec256
+ v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__);
+ Lib_IntVector_Intrinsics_vec256
+ v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__);
+ Lib_IntVector_Intrinsics_vec256
+ v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__);
+ Lib_IntVector_Intrinsics_vec256 v0 = v0___;
+ Lib_IntVector_Intrinsics_vec256 v1 = v2___;
+ Lib_IntVector_Intrinsics_vec256 v2 = v4___;
+ Lib_IntVector_Intrinsics_vec256 v3 = v6___;
+ Lib_IntVector_Intrinsics_vec256 v4 = v1___;
+ Lib_IntVector_Intrinsics_vec256 v5 = v3___;
+ Lib_IntVector_Intrinsics_vec256 v6 = v5___;
+ Lib_IntVector_Intrinsics_vec256 v7 = v7___;
+ Lib_IntVector_Intrinsics_vec256 v01 = k[8U];
+ Lib_IntVector_Intrinsics_vec256 v110 = k[9U];
+ Lib_IntVector_Intrinsics_vec256 v21 = k[10U];
+ Lib_IntVector_Intrinsics_vec256 v31 = k[11U];
+ Lib_IntVector_Intrinsics_vec256 v41 = k[12U];
+ Lib_IntVector_Intrinsics_vec256 v51 = k[13U];
+ Lib_IntVector_Intrinsics_vec256 v61 = k[14U];
+ Lib_IntVector_Intrinsics_vec256 v71 = k[15U];
+ Lib_IntVector_Intrinsics_vec256
+ v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256
+ v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256
+ v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256
+ v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256
+ v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0);
+ Lib_IntVector_Intrinsics_vec256
+ v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0);
+ Lib_IntVector_Intrinsics_vec256
+ v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0);
+ Lib_IntVector_Intrinsics_vec256
+ v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0);
+ Lib_IntVector_Intrinsics_vec256
+ v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0);
+ Lib_IntVector_Intrinsics_vec256
+ v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0);
+ Lib_IntVector_Intrinsics_vec256
+ v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0);
+ Lib_IntVector_Intrinsics_vec256
+ v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0);
+ Lib_IntVector_Intrinsics_vec256 v8 = v0___0;
+ Lib_IntVector_Intrinsics_vec256 v9 = v2___0;
+ Lib_IntVector_Intrinsics_vec256 v10 = v4___0;
+ Lib_IntVector_Intrinsics_vec256 v11 = v6___0;
+ Lib_IntVector_Intrinsics_vec256 v12 = v1___0;
+ Lib_IntVector_Intrinsics_vec256 v13 = v3___0;
+ Lib_IntVector_Intrinsics_vec256 v14 = v5___0;
+ Lib_IntVector_Intrinsics_vec256 v15 = v7___0;
+ k[0U] = v0;
+ k[1U] = v8;
+ k[2U] = v1;
+ k[3U] = v9;
+ k[4U] = v2;
+ k[5U] = v10;
+ k[6U] = v3;
+ k[7U] = v11;
+ k[8U] = v4;
+ k[9U] = v12;
+ k[10U] = v5;
+ k[11U] = v13;
+ k[12U] = v6;
+ k[13U] = v14;
+ k[14U] = v7;
+ k[15U] = v15;
+ for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) {
+ Lib_IntVector_Intrinsics_vec256
+ x = Lib_IntVector_Intrinsics_vec256_load_le(uu____1 + i0 * (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]);
+ Lib_IntVector_Intrinsics_vec256_store_le(uu____0 + i0 * (uint32_t)32U, y);
+ }
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *uu____2 = out + nb * (uint32_t)512U;
+ uint8_t *uu____3 = text + nb * (uint32_t)512U;
+ uint8_t plain[512U] = { 0U };
+ memcpy(plain, uu____3, rem * sizeof(uu____3[0U]));
+ Lib_IntVector_Intrinsics_vec256 k[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ k[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ chacha20_core_256(k, ctx, nb);
+ Lib_IntVector_Intrinsics_vec256 v00 = k[0U];
+ Lib_IntVector_Intrinsics_vec256 v16 = k[1U];
+ Lib_IntVector_Intrinsics_vec256 v20 = k[2U];
+ Lib_IntVector_Intrinsics_vec256 v30 = k[3U];
+ Lib_IntVector_Intrinsics_vec256 v40 = k[4U];
+ Lib_IntVector_Intrinsics_vec256 v50 = k[5U];
+ Lib_IntVector_Intrinsics_vec256 v60 = k[6U];
+ Lib_IntVector_Intrinsics_vec256 v70 = k[7U];
+ Lib_IntVector_Intrinsics_vec256
+ v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256
+ v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256
+ v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec256
+ v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec256
+ v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec256
+ v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec256
+ v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_);
+ Lib_IntVector_Intrinsics_vec256
+ v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_);
+ Lib_IntVector_Intrinsics_vec256
+ v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_);
+ Lib_IntVector_Intrinsics_vec256
+ v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_);
+ Lib_IntVector_Intrinsics_vec256
+ v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__);
+ Lib_IntVector_Intrinsics_vec256
+ v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__);
+ Lib_IntVector_Intrinsics_vec256
+ v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__);
+ Lib_IntVector_Intrinsics_vec256
+ v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__);
+ Lib_IntVector_Intrinsics_vec256
+ v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__);
+ Lib_IntVector_Intrinsics_vec256
+ v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__);
+ Lib_IntVector_Intrinsics_vec256
+ v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__);
+ Lib_IntVector_Intrinsics_vec256
+ v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__);
+ Lib_IntVector_Intrinsics_vec256 v0 = v0___;
+ Lib_IntVector_Intrinsics_vec256 v1 = v2___;
+ Lib_IntVector_Intrinsics_vec256 v2 = v4___;
+ Lib_IntVector_Intrinsics_vec256 v3 = v6___;
+ Lib_IntVector_Intrinsics_vec256 v4 = v1___;
+ Lib_IntVector_Intrinsics_vec256 v5 = v3___;
+ Lib_IntVector_Intrinsics_vec256 v6 = v5___;
+ Lib_IntVector_Intrinsics_vec256 v7 = v7___;
+ Lib_IntVector_Intrinsics_vec256 v01 = k[8U];
+ Lib_IntVector_Intrinsics_vec256 v110 = k[9U];
+ Lib_IntVector_Intrinsics_vec256 v21 = k[10U];
+ Lib_IntVector_Intrinsics_vec256 v31 = k[11U];
+ Lib_IntVector_Intrinsics_vec256 v41 = k[12U];
+ Lib_IntVector_Intrinsics_vec256 v51 = k[13U];
+ Lib_IntVector_Intrinsics_vec256 v61 = k[14U];
+ Lib_IntVector_Intrinsics_vec256 v71 = k[15U];
+ Lib_IntVector_Intrinsics_vec256
+ v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256
+ v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256
+ v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256
+ v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256
+ v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0);
+ Lib_IntVector_Intrinsics_vec256
+ v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0);
+ Lib_IntVector_Intrinsics_vec256
+ v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0);
+ Lib_IntVector_Intrinsics_vec256
+ v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0);
+ Lib_IntVector_Intrinsics_vec256
+ v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0);
+ Lib_IntVector_Intrinsics_vec256
+ v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0);
+ Lib_IntVector_Intrinsics_vec256
+ v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0);
+ Lib_IntVector_Intrinsics_vec256
+ v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0);
+ Lib_IntVector_Intrinsics_vec256 v8 = v0___0;
+ Lib_IntVector_Intrinsics_vec256 v9 = v2___0;
+ Lib_IntVector_Intrinsics_vec256 v10 = v4___0;
+ Lib_IntVector_Intrinsics_vec256 v11 = v6___0;
+ Lib_IntVector_Intrinsics_vec256 v12 = v1___0;
+ Lib_IntVector_Intrinsics_vec256 v13 = v3___0;
+ Lib_IntVector_Intrinsics_vec256 v14 = v5___0;
+ Lib_IntVector_Intrinsics_vec256 v15 = v7___0;
+ k[0U] = v0;
+ k[1U] = v8;
+ k[2U] = v1;
+ k[3U] = v9;
+ k[4U] = v2;
+ k[5U] = v10;
+ k[6U] = v3;
+ k[7U] = v11;
+ k[8U] = v4;
+ k[9U] = v12;
+ k[10U] = v5;
+ k[11U] = v13;
+ k[12U] = v6;
+ k[13U] = v14;
+ k[14U] = v7;
+ k[15U] = v15;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ Lib_IntVector_Intrinsics_vec256
+ x = Lib_IntVector_Intrinsics_vec256_load_le(plain + i * (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]);
+ Lib_IntVector_Intrinsics_vec256_store_le(plain + i * (uint32_t)32U, y);
+ }
+ memcpy(uu____2, plain, rem * sizeof(plain[0U]));
+ }
+}
+
+void
+Hacl_Chacha20_Vec256_chacha20_decrypt_256(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *cipher,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr)
+{
+ Lib_IntVector_Intrinsics_vec256 ctx[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ chacha20_init_256(ctx, key, n, ctr);
+ uint32_t rem = len % (uint32_t)512U;
+ uint32_t nb = len / (uint32_t)512U;
+ uint32_t rem1 = len % (uint32_t)512U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *uu____0 = out + i * (uint32_t)512U;
+ uint8_t *uu____1 = cipher + i * (uint32_t)512U;
+ Lib_IntVector_Intrinsics_vec256 k[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ k[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ chacha20_core_256(k, ctx, i);
+ Lib_IntVector_Intrinsics_vec256 v00 = k[0U];
+ Lib_IntVector_Intrinsics_vec256 v16 = k[1U];
+ Lib_IntVector_Intrinsics_vec256 v20 = k[2U];
+ Lib_IntVector_Intrinsics_vec256 v30 = k[3U];
+ Lib_IntVector_Intrinsics_vec256 v40 = k[4U];
+ Lib_IntVector_Intrinsics_vec256 v50 = k[5U];
+ Lib_IntVector_Intrinsics_vec256 v60 = k[6U];
+ Lib_IntVector_Intrinsics_vec256 v70 = k[7U];
+ Lib_IntVector_Intrinsics_vec256
+ v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256
+ v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256
+ v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec256
+ v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec256
+ v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec256
+ v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec256
+ v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_);
+ Lib_IntVector_Intrinsics_vec256
+ v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_);
+ Lib_IntVector_Intrinsics_vec256
+ v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_);
+ Lib_IntVector_Intrinsics_vec256
+ v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_);
+ Lib_IntVector_Intrinsics_vec256
+ v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__);
+ Lib_IntVector_Intrinsics_vec256
+ v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__);
+ Lib_IntVector_Intrinsics_vec256
+ v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__);
+ Lib_IntVector_Intrinsics_vec256
+ v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__);
+ Lib_IntVector_Intrinsics_vec256
+ v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__);
+ Lib_IntVector_Intrinsics_vec256
+ v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__);
+ Lib_IntVector_Intrinsics_vec256
+ v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__);
+ Lib_IntVector_Intrinsics_vec256
+ v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__);
+ Lib_IntVector_Intrinsics_vec256 v0 = v0___;
+ Lib_IntVector_Intrinsics_vec256 v1 = v2___;
+ Lib_IntVector_Intrinsics_vec256 v2 = v4___;
+ Lib_IntVector_Intrinsics_vec256 v3 = v6___;
+ Lib_IntVector_Intrinsics_vec256 v4 = v1___;
+ Lib_IntVector_Intrinsics_vec256 v5 = v3___;
+ Lib_IntVector_Intrinsics_vec256 v6 = v5___;
+ Lib_IntVector_Intrinsics_vec256 v7 = v7___;
+ Lib_IntVector_Intrinsics_vec256 v01 = k[8U];
+ Lib_IntVector_Intrinsics_vec256 v110 = k[9U];
+ Lib_IntVector_Intrinsics_vec256 v21 = k[10U];
+ Lib_IntVector_Intrinsics_vec256 v31 = k[11U];
+ Lib_IntVector_Intrinsics_vec256 v41 = k[12U];
+ Lib_IntVector_Intrinsics_vec256 v51 = k[13U];
+ Lib_IntVector_Intrinsics_vec256 v61 = k[14U];
+ Lib_IntVector_Intrinsics_vec256 v71 = k[15U];
+ Lib_IntVector_Intrinsics_vec256
+ v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256
+ v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256
+ v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256
+ v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256
+ v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0);
+ Lib_IntVector_Intrinsics_vec256
+ v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0);
+ Lib_IntVector_Intrinsics_vec256
+ v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0);
+ Lib_IntVector_Intrinsics_vec256
+ v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0);
+ Lib_IntVector_Intrinsics_vec256
+ v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0);
+ Lib_IntVector_Intrinsics_vec256
+ v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0);
+ Lib_IntVector_Intrinsics_vec256
+ v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0);
+ Lib_IntVector_Intrinsics_vec256
+ v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0);
+ Lib_IntVector_Intrinsics_vec256 v8 = v0___0;
+ Lib_IntVector_Intrinsics_vec256 v9 = v2___0;
+ Lib_IntVector_Intrinsics_vec256 v10 = v4___0;
+ Lib_IntVector_Intrinsics_vec256 v11 = v6___0;
+ Lib_IntVector_Intrinsics_vec256 v12 = v1___0;
+ Lib_IntVector_Intrinsics_vec256 v13 = v3___0;
+ Lib_IntVector_Intrinsics_vec256 v14 = v5___0;
+ Lib_IntVector_Intrinsics_vec256 v15 = v7___0;
+ k[0U] = v0;
+ k[1U] = v8;
+ k[2U] = v1;
+ k[3U] = v9;
+ k[4U] = v2;
+ k[5U] = v10;
+ k[6U] = v3;
+ k[7U] = v11;
+ k[8U] = v4;
+ k[9U] = v12;
+ k[10U] = v5;
+ k[11U] = v13;
+ k[12U] = v6;
+ k[13U] = v14;
+ k[14U] = v7;
+ k[15U] = v15;
+ for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) {
+ Lib_IntVector_Intrinsics_vec256
+ x = Lib_IntVector_Intrinsics_vec256_load_le(uu____1 + i0 * (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]);
+ Lib_IntVector_Intrinsics_vec256_store_le(uu____0 + i0 * (uint32_t)32U, y);
+ }
+ }
+ if (rem1 > (uint32_t)0U) {
+ uint8_t *uu____2 = out + nb * (uint32_t)512U;
+ uint8_t *uu____3 = cipher + nb * (uint32_t)512U;
+ uint8_t plain[512U] = { 0U };
+ memcpy(plain, uu____3, rem * sizeof(uu____3[0U]));
+ Lib_IntVector_Intrinsics_vec256 k[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ k[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ chacha20_core_256(k, ctx, nb);
+ Lib_IntVector_Intrinsics_vec256 v00 = k[0U];
+ Lib_IntVector_Intrinsics_vec256 v16 = k[1U];
+ Lib_IntVector_Intrinsics_vec256 v20 = k[2U];
+ Lib_IntVector_Intrinsics_vec256 v30 = k[3U];
+ Lib_IntVector_Intrinsics_vec256 v40 = k[4U];
+ Lib_IntVector_Intrinsics_vec256 v50 = k[5U];
+ Lib_IntVector_Intrinsics_vec256 v60 = k[6U];
+ Lib_IntVector_Intrinsics_vec256 v70 = k[7U];
+ Lib_IntVector_Intrinsics_vec256
+ v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16);
+ Lib_IntVector_Intrinsics_vec256
+ v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30);
+ Lib_IntVector_Intrinsics_vec256
+ v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50);
+ Lib_IntVector_Intrinsics_vec256
+ v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256
+ v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70);
+ Lib_IntVector_Intrinsics_vec256
+ v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec256
+ v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_);
+ Lib_IntVector_Intrinsics_vec256
+ v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec256
+ v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_);
+ Lib_IntVector_Intrinsics_vec256
+ v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_);
+ Lib_IntVector_Intrinsics_vec256
+ v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_);
+ Lib_IntVector_Intrinsics_vec256
+ v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_);
+ Lib_IntVector_Intrinsics_vec256
+ v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_);
+ Lib_IntVector_Intrinsics_vec256
+ v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__);
+ Lib_IntVector_Intrinsics_vec256
+ v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__);
+ Lib_IntVector_Intrinsics_vec256
+ v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__);
+ Lib_IntVector_Intrinsics_vec256
+ v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__);
+ Lib_IntVector_Intrinsics_vec256
+ v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__);
+ Lib_IntVector_Intrinsics_vec256
+ v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__);
+ Lib_IntVector_Intrinsics_vec256
+ v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__);
+ Lib_IntVector_Intrinsics_vec256
+ v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__);
+ Lib_IntVector_Intrinsics_vec256 v0 = v0___;
+ Lib_IntVector_Intrinsics_vec256 v1 = v2___;
+ Lib_IntVector_Intrinsics_vec256 v2 = v4___;
+ Lib_IntVector_Intrinsics_vec256 v3 = v6___;
+ Lib_IntVector_Intrinsics_vec256 v4 = v1___;
+ Lib_IntVector_Intrinsics_vec256 v5 = v3___;
+ Lib_IntVector_Intrinsics_vec256 v6 = v5___;
+ Lib_IntVector_Intrinsics_vec256 v7 = v7___;
+ Lib_IntVector_Intrinsics_vec256 v01 = k[8U];
+ Lib_IntVector_Intrinsics_vec256 v110 = k[9U];
+ Lib_IntVector_Intrinsics_vec256 v21 = k[10U];
+ Lib_IntVector_Intrinsics_vec256 v31 = k[11U];
+ Lib_IntVector_Intrinsics_vec256 v41 = k[12U];
+ Lib_IntVector_Intrinsics_vec256 v51 = k[13U];
+ Lib_IntVector_Intrinsics_vec256 v61 = k[14U];
+ Lib_IntVector_Intrinsics_vec256 v71 = k[15U];
+ Lib_IntVector_Intrinsics_vec256
+ v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110);
+ Lib_IntVector_Intrinsics_vec256
+ v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31);
+ Lib_IntVector_Intrinsics_vec256
+ v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51);
+ Lib_IntVector_Intrinsics_vec256
+ v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256
+ v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71);
+ Lib_IntVector_Intrinsics_vec256
+ v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0);
+ Lib_IntVector_Intrinsics_vec256
+ v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0);
+ Lib_IntVector_Intrinsics_vec256
+ v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0);
+ Lib_IntVector_Intrinsics_vec256
+ v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256
+ v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0);
+ Lib_IntVector_Intrinsics_vec256
+ v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0);
+ Lib_IntVector_Intrinsics_vec256
+ v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0);
+ Lib_IntVector_Intrinsics_vec256
+ v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0);
+ Lib_IntVector_Intrinsics_vec256
+ v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0);
+ Lib_IntVector_Intrinsics_vec256
+ v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0);
+ Lib_IntVector_Intrinsics_vec256
+ v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0);
+ Lib_IntVector_Intrinsics_vec256
+ v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0);
+ Lib_IntVector_Intrinsics_vec256
+ v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0);
+ Lib_IntVector_Intrinsics_vec256 v8 = v0___0;
+ Lib_IntVector_Intrinsics_vec256 v9 = v2___0;
+ Lib_IntVector_Intrinsics_vec256 v10 = v4___0;
+ Lib_IntVector_Intrinsics_vec256 v11 = v6___0;
+ Lib_IntVector_Intrinsics_vec256 v12 = v1___0;
+ Lib_IntVector_Intrinsics_vec256 v13 = v3___0;
+ Lib_IntVector_Intrinsics_vec256 v14 = v5___0;
+ Lib_IntVector_Intrinsics_vec256 v15 = v7___0;
+ k[0U] = v0;
+ k[1U] = v8;
+ k[2U] = v1;
+ k[3U] = v9;
+ k[4U] = v2;
+ k[5U] = v10;
+ k[6U] = v3;
+ k[7U] = v11;
+ k[8U] = v4;
+ k[9U] = v12;
+ k[10U] = v5;
+ k[11U] = v13;
+ k[12U] = v6;
+ k[13U] = v14;
+ k[14U] = v7;
+ k[15U] = v15;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) {
+ Lib_IntVector_Intrinsics_vec256
+ x = Lib_IntVector_Intrinsics_vec256_load_le(plain + i * (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]);
+ Lib_IntVector_Intrinsics_vec256_store_le(plain + i * (uint32_t)32U, y);
+ }
+ memcpy(uu____2, plain, rem * sizeof(plain[0U]));
+ }
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h
new file mode 100644
index 0000000000..1ae9ea3798
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h
@@ -0,0 +1,55 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "libintvector.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include <stdbool.h>
+
+#ifndef __Hacl_Chacha20_Vec256_H
+#define __Hacl_Chacha20_Vec256_H
+
+#include "Hacl_Chacha20.h"
+#include "Hacl_Kremlib.h"
+
+void
+Hacl_Chacha20_Vec256_chacha20_encrypt_256(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *text,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr);
+
+void
+Hacl_Chacha20_Vec256_chacha20_decrypt_256(
+ uint32_t len,
+ uint8_t *out,
+ uint8_t *cipher,
+ uint8_t *key,
+ uint8_t *n,
+ uint32_t ctr);
+
+#define __Hacl_Chacha20_Vec256_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_51.c b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.c
new file mode 100644
index 0000000000..2b76eafe27
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.c
@@ -0,0 +1,904 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Curve25519_51.h"
+
+static inline void
+fadd0(uint64_t *out, uint64_t *f1, uint64_t *f2)
+{
+ uint64_t f10 = f1[0U];
+ uint64_t f20 = f2[0U];
+ uint64_t f11 = f1[1U];
+ uint64_t f21 = f2[1U];
+ uint64_t f12 = f1[2U];
+ uint64_t f22 = f2[2U];
+ uint64_t f13 = f1[3U];
+ uint64_t f23 = f2[3U];
+ uint64_t f14 = f1[4U];
+ uint64_t f24 = f2[4U];
+ out[0U] = f10 + f20;
+ out[1U] = f11 + f21;
+ out[2U] = f12 + f22;
+ out[3U] = f13 + f23;
+ out[4U] = f14 + f24;
+}
+
+static inline void
+fsub0(uint64_t *out, uint64_t *f1, uint64_t *f2)
+{
+ uint64_t f10 = f1[0U];
+ uint64_t f20 = f2[0U];
+ uint64_t f11 = f1[1U];
+ uint64_t f21 = f2[1U];
+ uint64_t f12 = f1[2U];
+ uint64_t f22 = f2[2U];
+ uint64_t f13 = f1[3U];
+ uint64_t f23 = f2[3U];
+ uint64_t f14 = f1[4U];
+ uint64_t f24 = f2[4U];
+ out[0U] = f10 + (uint64_t)0x3fffffffffff68U - f20;
+ out[1U] = f11 + (uint64_t)0x3ffffffffffff8U - f21;
+ out[2U] = f12 + (uint64_t)0x3ffffffffffff8U - f22;
+ out[3U] = f13 + (uint64_t)0x3ffffffffffff8U - f23;
+ out[4U] = f14 + (uint64_t)0x3ffffffffffff8U - f24;
+}
+
+static inline void
+fmul0(uint64_t *out, uint64_t *f1, uint64_t *f2)
+{
+ uint64_t f10 = f1[0U];
+ uint64_t f11 = f1[1U];
+ uint64_t f12 = f1[2U];
+ uint64_t f13 = f1[3U];
+ uint64_t f14 = f1[4U];
+ uint64_t f20 = f2[0U];
+ uint64_t f21 = f2[1U];
+ uint64_t f22 = f2[2U];
+ uint64_t f23 = f2[3U];
+ uint64_t f24 = f2[4U];
+ uint64_t tmp1 = f21 * (uint64_t)19U;
+ uint64_t tmp2 = f22 * (uint64_t)19U;
+ uint64_t tmp3 = f23 * (uint64_t)19U;
+ uint64_t tmp4 = f24 * (uint64_t)19U;
+ FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20);
+ FStar_UInt128_uint128 o10 = FStar_UInt128_mul_wide(f10, f21);
+ FStar_UInt128_uint128 o20 = FStar_UInt128_mul_wide(f10, f22);
+ FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23);
+ FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24);
+ FStar_UInt128_uint128 o01 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp4));
+ FStar_UInt128_uint128 o11 = FStar_UInt128_add(o10, FStar_UInt128_mul_wide(f11, f20));
+ FStar_UInt128_uint128 o21 = FStar_UInt128_add(o20, FStar_UInt128_mul_wide(f11, f21));
+ FStar_UInt128_uint128 o31 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22));
+ FStar_UInt128_uint128 o41 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23));
+ FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f12, tmp3));
+ FStar_UInt128_uint128 o12 = FStar_UInt128_add(o11, FStar_UInt128_mul_wide(f12, tmp4));
+ FStar_UInt128_uint128 o22 = FStar_UInt128_add(o21, FStar_UInt128_mul_wide(f12, f20));
+ FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f12, f21));
+ FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f12, f22));
+ FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f13, tmp2));
+ FStar_UInt128_uint128 o13 = FStar_UInt128_add(o12, FStar_UInt128_mul_wide(f13, tmp3));
+ FStar_UInt128_uint128 o23 = FStar_UInt128_add(o22, FStar_UInt128_mul_wide(f13, tmp4));
+ FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f13, f20));
+ FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f13, f21));
+ FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f14, tmp1));
+ FStar_UInt128_uint128 o14 = FStar_UInt128_add(o13, FStar_UInt128_mul_wide(f14, tmp2));
+ FStar_UInt128_uint128 o24 = FStar_UInt128_add(o23, FStar_UInt128_mul_wide(f14, tmp3));
+ FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f14, tmp4));
+ FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f14, f20));
+ FStar_UInt128_uint128 tmp_w0 = o04;
+ FStar_UInt128_uint128 tmp_w1 = o14;
+ FStar_UInt128_uint128 tmp_w2 = o24;
+ FStar_UInt128_uint128 tmp_w3 = o34;
+ FStar_UInt128_uint128 tmp_w4 = o44;
+ FStar_UInt128_uint128
+ l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp01 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U));
+ FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0));
+ uint64_t tmp11 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U));
+ FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1));
+ uint64_t tmp21 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U));
+ FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2));
+ uint64_t tmp31 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U));
+ FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3));
+ uint64_t tmp41 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U));
+ uint64_t l_4 = tmp01 + c4 * (uint64_t)19U;
+ uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c5 = l_4 >> (uint32_t)51U;
+ uint64_t o0 = tmp0_;
+ uint64_t o1 = tmp11 + c5;
+ uint64_t o2 = tmp21;
+ uint64_t o3 = tmp31;
+ uint64_t o4 = tmp41;
+ out[0U] = o0;
+ out[1U] = o1;
+ out[2U] = o2;
+ out[3U] = o3;
+ out[4U] = o4;
+}
+
+static inline void
+fmul20(uint64_t *out, uint64_t *f1, uint64_t *f2)
+{
+ uint64_t f10 = f1[0U];
+ uint64_t f11 = f1[1U];
+ uint64_t f12 = f1[2U];
+ uint64_t f13 = f1[3U];
+ uint64_t f14 = f1[4U];
+ uint64_t f20 = f2[0U];
+ uint64_t f21 = f2[1U];
+ uint64_t f22 = f2[2U];
+ uint64_t f23 = f2[3U];
+ uint64_t f24 = f2[4U];
+ uint64_t f30 = f1[5U];
+ uint64_t f31 = f1[6U];
+ uint64_t f32 = f1[7U];
+ uint64_t f33 = f1[8U];
+ uint64_t f34 = f1[9U];
+ uint64_t f40 = f2[5U];
+ uint64_t f41 = f2[6U];
+ uint64_t f42 = f2[7U];
+ uint64_t f43 = f2[8U];
+ uint64_t f44 = f2[9U];
+ uint64_t tmp11 = f21 * (uint64_t)19U;
+ uint64_t tmp12 = f22 * (uint64_t)19U;
+ uint64_t tmp13 = f23 * (uint64_t)19U;
+ uint64_t tmp14 = f24 * (uint64_t)19U;
+ uint64_t tmp21 = f41 * (uint64_t)19U;
+ uint64_t tmp22 = f42 * (uint64_t)19U;
+ uint64_t tmp23 = f43 * (uint64_t)19U;
+ uint64_t tmp24 = f44 * (uint64_t)19U;
+ FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20);
+ FStar_UInt128_uint128 o15 = FStar_UInt128_mul_wide(f10, f21);
+ FStar_UInt128_uint128 o25 = FStar_UInt128_mul_wide(f10, f22);
+ FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23);
+ FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24);
+ FStar_UInt128_uint128 o010 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp14));
+ FStar_UInt128_uint128 o110 = FStar_UInt128_add(o15, FStar_UInt128_mul_wide(f11, f20));
+ FStar_UInt128_uint128 o210 = FStar_UInt128_add(o25, FStar_UInt128_mul_wide(f11, f21));
+ FStar_UInt128_uint128 o310 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22));
+ FStar_UInt128_uint128 o410 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23));
+ FStar_UInt128_uint128 o020 = FStar_UInt128_add(o010, FStar_UInt128_mul_wide(f12, tmp13));
+ FStar_UInt128_uint128 o120 = FStar_UInt128_add(o110, FStar_UInt128_mul_wide(f12, tmp14));
+ FStar_UInt128_uint128 o220 = FStar_UInt128_add(o210, FStar_UInt128_mul_wide(f12, f20));
+ FStar_UInt128_uint128 o320 = FStar_UInt128_add(o310, FStar_UInt128_mul_wide(f12, f21));
+ FStar_UInt128_uint128 o420 = FStar_UInt128_add(o410, FStar_UInt128_mul_wide(f12, f22));
+ FStar_UInt128_uint128 o030 = FStar_UInt128_add(o020, FStar_UInt128_mul_wide(f13, tmp12));
+ FStar_UInt128_uint128 o130 = FStar_UInt128_add(o120, FStar_UInt128_mul_wide(f13, tmp13));
+ FStar_UInt128_uint128 o230 = FStar_UInt128_add(o220, FStar_UInt128_mul_wide(f13, tmp14));
+ FStar_UInt128_uint128 o330 = FStar_UInt128_add(o320, FStar_UInt128_mul_wide(f13, f20));
+ FStar_UInt128_uint128 o430 = FStar_UInt128_add(o420, FStar_UInt128_mul_wide(f13, f21));
+ FStar_UInt128_uint128 o040 = FStar_UInt128_add(o030, FStar_UInt128_mul_wide(f14, tmp11));
+ FStar_UInt128_uint128 o140 = FStar_UInt128_add(o130, FStar_UInt128_mul_wide(f14, tmp12));
+ FStar_UInt128_uint128 o240 = FStar_UInt128_add(o230, FStar_UInt128_mul_wide(f14, tmp13));
+ FStar_UInt128_uint128 o340 = FStar_UInt128_add(o330, FStar_UInt128_mul_wide(f14, tmp14));
+ FStar_UInt128_uint128 o440 = FStar_UInt128_add(o430, FStar_UInt128_mul_wide(f14, f20));
+ FStar_UInt128_uint128 tmp_w10 = o040;
+ FStar_UInt128_uint128 tmp_w11 = o140;
+ FStar_UInt128_uint128 tmp_w12 = o240;
+ FStar_UInt128_uint128 tmp_w13 = o340;
+ FStar_UInt128_uint128 tmp_w14 = o440;
+ FStar_UInt128_uint128 o0 = FStar_UInt128_mul_wide(f30, f40);
+ FStar_UInt128_uint128 o1 = FStar_UInt128_mul_wide(f30, f41);
+ FStar_UInt128_uint128 o2 = FStar_UInt128_mul_wide(f30, f42);
+ FStar_UInt128_uint128 o3 = FStar_UInt128_mul_wide(f30, f43);
+ FStar_UInt128_uint128 o4 = FStar_UInt128_mul_wide(f30, f44);
+ FStar_UInt128_uint128 o01 = FStar_UInt128_add(o0, FStar_UInt128_mul_wide(f31, tmp24));
+ FStar_UInt128_uint128 o111 = FStar_UInt128_add(o1, FStar_UInt128_mul_wide(f31, f40));
+ FStar_UInt128_uint128 o211 = FStar_UInt128_add(o2, FStar_UInt128_mul_wide(f31, f41));
+ FStar_UInt128_uint128 o31 = FStar_UInt128_add(o3, FStar_UInt128_mul_wide(f31, f42));
+ FStar_UInt128_uint128 o41 = FStar_UInt128_add(o4, FStar_UInt128_mul_wide(f31, f43));
+ FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f32, tmp23));
+ FStar_UInt128_uint128 o121 = FStar_UInt128_add(o111, FStar_UInt128_mul_wide(f32, tmp24));
+ FStar_UInt128_uint128 o221 = FStar_UInt128_add(o211, FStar_UInt128_mul_wide(f32, f40));
+ FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f32, f41));
+ FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f32, f42));
+ FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f33, tmp22));
+ FStar_UInt128_uint128 o131 = FStar_UInt128_add(o121, FStar_UInt128_mul_wide(f33, tmp23));
+ FStar_UInt128_uint128 o231 = FStar_UInt128_add(o221, FStar_UInt128_mul_wide(f33, tmp24));
+ FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f33, f40));
+ FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f33, f41));
+ FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f34, tmp21));
+ FStar_UInt128_uint128 o141 = FStar_UInt128_add(o131, FStar_UInt128_mul_wide(f34, tmp22));
+ FStar_UInt128_uint128 o241 = FStar_UInt128_add(o231, FStar_UInt128_mul_wide(f34, tmp23));
+ FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f34, tmp24));
+ FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f34, f40));
+ FStar_UInt128_uint128 tmp_w20 = o04;
+ FStar_UInt128_uint128 tmp_w21 = o141;
+ FStar_UInt128_uint128 tmp_w22 = o241;
+ FStar_UInt128_uint128 tmp_w23 = o34;
+ FStar_UInt128_uint128 tmp_w24 = o44;
+ FStar_UInt128_uint128
+ l_ = FStar_UInt128_add(tmp_w10, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U));
+ FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w11, FStar_UInt128_uint64_to_uint128(c00));
+ uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U));
+ FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w12, FStar_UInt128_uint64_to_uint128(c10));
+ uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U));
+ FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w13, FStar_UInt128_uint64_to_uint128(c20));
+ uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U));
+ FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w14, FStar_UInt128_uint64_to_uint128(c30));
+ uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U));
+ uint64_t l_4 = tmp00 + c40 * (uint64_t)19U;
+ uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c50 = l_4 >> (uint32_t)51U;
+ uint64_t o100 = tmp0_;
+ uint64_t o112 = tmp10 + c50;
+ uint64_t o122 = tmp20;
+ uint64_t o132 = tmp30;
+ uint64_t o142 = tmp40;
+ FStar_UInt128_uint128
+ l_5 = FStar_UInt128_add(tmp_w20, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U));
+ FStar_UInt128_uint128 l_6 = FStar_UInt128_add(tmp_w21, FStar_UInt128_uint64_to_uint128(c0));
+ uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U));
+ FStar_UInt128_uint128 l_7 = FStar_UInt128_add(tmp_w22, FStar_UInt128_uint64_to_uint128(c1));
+ uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U));
+ FStar_UInt128_uint128 l_8 = FStar_UInt128_add(tmp_w23, FStar_UInt128_uint64_to_uint128(c2));
+ uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U));
+ FStar_UInt128_uint128 l_9 = FStar_UInt128_add(tmp_w24, FStar_UInt128_uint64_to_uint128(c3));
+ uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U));
+ uint64_t l_10 = tmp0 + c4 * (uint64_t)19U;
+ uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c5 = l_10 >> (uint32_t)51U;
+ uint64_t o200 = tmp0_0;
+ uint64_t o212 = tmp1 + c5;
+ uint64_t o222 = tmp2;
+ uint64_t o232 = tmp3;
+ uint64_t o242 = tmp4;
+ uint64_t o10 = o100;
+ uint64_t o11 = o112;
+ uint64_t o12 = o122;
+ uint64_t o13 = o132;
+ uint64_t o14 = o142;
+ uint64_t o20 = o200;
+ uint64_t o21 = o212;
+ uint64_t o22 = o222;
+ uint64_t o23 = o232;
+ uint64_t o24 = o242;
+ out[0U] = o10;
+ out[1U] = o11;
+ out[2U] = o12;
+ out[3U] = o13;
+ out[4U] = o14;
+ out[5U] = o20;
+ out[6U] = o21;
+ out[7U] = o22;
+ out[8U] = o23;
+ out[9U] = o24;
+}
+
+static inline void
+fmul1(uint64_t *out, uint64_t *f1, uint64_t f2)
+{
+ uint64_t f10 = f1[0U];
+ uint64_t f11 = f1[1U];
+ uint64_t f12 = f1[2U];
+ uint64_t f13 = f1[3U];
+ uint64_t f14 = f1[4U];
+ FStar_UInt128_uint128 tmp_w0 = FStar_UInt128_mul_wide(f2, f10);
+ FStar_UInt128_uint128 tmp_w1 = FStar_UInt128_mul_wide(f2, f11);
+ FStar_UInt128_uint128 tmp_w2 = FStar_UInt128_mul_wide(f2, f12);
+ FStar_UInt128_uint128 tmp_w3 = FStar_UInt128_mul_wide(f2, f13);
+ FStar_UInt128_uint128 tmp_w4 = FStar_UInt128_mul_wide(f2, f14);
+ FStar_UInt128_uint128
+ l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U));
+ FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0));
+ uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U));
+ FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1));
+ uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U));
+ FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2));
+ uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U));
+ FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3));
+ uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U));
+ uint64_t l_4 = tmp0 + c4 * (uint64_t)19U;
+ uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c5 = l_4 >> (uint32_t)51U;
+ uint64_t o0 = tmp0_;
+ uint64_t o1 = tmp1 + c5;
+ uint64_t o2 = tmp2;
+ uint64_t o3 = tmp3;
+ uint64_t o4 = tmp4;
+ out[0U] = o0;
+ out[1U] = o1;
+ out[2U] = o2;
+ out[3U] = o3;
+ out[4U] = o4;
+}
+
+static inline void
+fsqr0(uint64_t *out, uint64_t *f)
+{
+ uint64_t f0 = f[0U];
+ uint64_t f1 = f[1U];
+ uint64_t f2 = f[2U];
+ uint64_t f3 = f[3U];
+ uint64_t f4 = f[4U];
+ uint64_t d0 = (uint64_t)2U * f0;
+ uint64_t d1 = (uint64_t)2U * f1;
+ uint64_t d2 = (uint64_t)38U * f2;
+ uint64_t d3 = (uint64_t)19U * f3;
+ uint64_t d419 = (uint64_t)19U * f4;
+ uint64_t d4 = (uint64_t)2U * d419;
+ FStar_UInt128_uint128
+ s0 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f0, f0),
+ FStar_UInt128_mul_wide(d4, f1)),
+ FStar_UInt128_mul_wide(d2, f3));
+ FStar_UInt128_uint128
+ s1 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f1),
+ FStar_UInt128_mul_wide(d4, f2)),
+ FStar_UInt128_mul_wide(d3, f3));
+ FStar_UInt128_uint128
+ s2 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f2),
+ FStar_UInt128_mul_wide(f1, f1)),
+ FStar_UInt128_mul_wide(d4, f3));
+ FStar_UInt128_uint128
+ s3 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f3),
+ FStar_UInt128_mul_wide(d1, f2)),
+ FStar_UInt128_mul_wide(f4, d419));
+ FStar_UInt128_uint128
+ s4 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f4),
+ FStar_UInt128_mul_wide(d1, f3)),
+ FStar_UInt128_mul_wide(f2, f2));
+ FStar_UInt128_uint128 o00 = s0;
+ FStar_UInt128_uint128 o10 = s1;
+ FStar_UInt128_uint128 o20 = s2;
+ FStar_UInt128_uint128 o30 = s3;
+ FStar_UInt128_uint128 o40 = s4;
+ FStar_UInt128_uint128
+ l_ = FStar_UInt128_add(o00, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U));
+ FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o10, FStar_UInt128_uint64_to_uint128(c0));
+ uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U));
+ FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o20, FStar_UInt128_uint64_to_uint128(c1));
+ uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U));
+ FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o30, FStar_UInt128_uint64_to_uint128(c2));
+ uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U));
+ FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o40, FStar_UInt128_uint64_to_uint128(c3));
+ uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U));
+ uint64_t l_4 = tmp0 + c4 * (uint64_t)19U;
+ uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c5 = l_4 >> (uint32_t)51U;
+ uint64_t o0 = tmp0_;
+ uint64_t o1 = tmp1 + c5;
+ uint64_t o2 = tmp2;
+ uint64_t o3 = tmp3;
+ uint64_t o4 = tmp4;
+ out[0U] = o0;
+ out[1U] = o1;
+ out[2U] = o2;
+ out[3U] = o3;
+ out[4U] = o4;
+}
+
+static inline void
+fsqr20(uint64_t *out, uint64_t *f)
+{
+ uint64_t f10 = f[0U];
+ uint64_t f11 = f[1U];
+ uint64_t f12 = f[2U];
+ uint64_t f13 = f[3U];
+ uint64_t f14 = f[4U];
+ uint64_t f20 = f[5U];
+ uint64_t f21 = f[6U];
+ uint64_t f22 = f[7U];
+ uint64_t f23 = f[8U];
+ uint64_t f24 = f[9U];
+ uint64_t d00 = (uint64_t)2U * f10;
+ uint64_t d10 = (uint64_t)2U * f11;
+ uint64_t d20 = (uint64_t)38U * f12;
+ uint64_t d30 = (uint64_t)19U * f13;
+ uint64_t d4190 = (uint64_t)19U * f14;
+ uint64_t d40 = (uint64_t)2U * d4190;
+ FStar_UInt128_uint128
+ s00 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f10, f10),
+ FStar_UInt128_mul_wide(d40, f11)),
+ FStar_UInt128_mul_wide(d20, f13));
+ FStar_UInt128_uint128
+ s10 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f11),
+ FStar_UInt128_mul_wide(d40, f12)),
+ FStar_UInt128_mul_wide(d30, f13));
+ FStar_UInt128_uint128
+ s20 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f12),
+ FStar_UInt128_mul_wide(f11, f11)),
+ FStar_UInt128_mul_wide(d40, f13));
+ FStar_UInt128_uint128
+ s30 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f13),
+ FStar_UInt128_mul_wide(d10, f12)),
+ FStar_UInt128_mul_wide(f14, d4190));
+ FStar_UInt128_uint128
+ s40 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f14),
+ FStar_UInt128_mul_wide(d10, f13)),
+ FStar_UInt128_mul_wide(f12, f12));
+ FStar_UInt128_uint128 o100 = s00;
+ FStar_UInt128_uint128 o110 = s10;
+ FStar_UInt128_uint128 o120 = s20;
+ FStar_UInt128_uint128 o130 = s30;
+ FStar_UInt128_uint128 o140 = s40;
+ uint64_t d0 = (uint64_t)2U * f20;
+ uint64_t d1 = (uint64_t)2U * f21;
+ uint64_t d2 = (uint64_t)38U * f22;
+ uint64_t d3 = (uint64_t)19U * f23;
+ uint64_t d419 = (uint64_t)19U * f24;
+ uint64_t d4 = (uint64_t)2U * d419;
+ FStar_UInt128_uint128
+ s0 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f20, f20),
+ FStar_UInt128_mul_wide(d4, f21)),
+ FStar_UInt128_mul_wide(d2, f23));
+ FStar_UInt128_uint128
+ s1 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f21),
+ FStar_UInt128_mul_wide(d4, f22)),
+ FStar_UInt128_mul_wide(d3, f23));
+ FStar_UInt128_uint128
+ s2 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f22),
+ FStar_UInt128_mul_wide(f21, f21)),
+ FStar_UInt128_mul_wide(d4, f23));
+ FStar_UInt128_uint128
+ s3 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f23),
+ FStar_UInt128_mul_wide(d1, f22)),
+ FStar_UInt128_mul_wide(f24, d419));
+ FStar_UInt128_uint128
+ s4 =
+ FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f24),
+ FStar_UInt128_mul_wide(d1, f23)),
+ FStar_UInt128_mul_wide(f22, f22));
+ FStar_UInt128_uint128 o200 = s0;
+ FStar_UInt128_uint128 o210 = s1;
+ FStar_UInt128_uint128 o220 = s2;
+ FStar_UInt128_uint128 o230 = s3;
+ FStar_UInt128_uint128 o240 = s4;
+ FStar_UInt128_uint128
+ l_ = FStar_UInt128_add(o100, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U));
+ FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o110, FStar_UInt128_uint64_to_uint128(c00));
+ uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U));
+ FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o120, FStar_UInt128_uint64_to_uint128(c10));
+ uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U));
+ FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o130, FStar_UInt128_uint64_to_uint128(c20));
+ uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U));
+ FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o140, FStar_UInt128_uint64_to_uint128(c30));
+ uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U));
+ uint64_t l_4 = tmp00 + c40 * (uint64_t)19U;
+ uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c50 = l_4 >> (uint32_t)51U;
+ uint64_t o101 = tmp0_;
+ uint64_t o111 = tmp10 + c50;
+ uint64_t o121 = tmp20;
+ uint64_t o131 = tmp30;
+ uint64_t o141 = tmp40;
+ FStar_UInt128_uint128
+ l_5 = FStar_UInt128_add(o200, FStar_UInt128_uint64_to_uint128((uint64_t)0U));
+ uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U));
+ FStar_UInt128_uint128 l_6 = FStar_UInt128_add(o210, FStar_UInt128_uint64_to_uint128(c0));
+ uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U));
+ FStar_UInt128_uint128 l_7 = FStar_UInt128_add(o220, FStar_UInt128_uint64_to_uint128(c1));
+ uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U));
+ FStar_UInt128_uint128 l_8 = FStar_UInt128_add(o230, FStar_UInt128_uint64_to_uint128(c2));
+ uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U));
+ FStar_UInt128_uint128 l_9 = FStar_UInt128_add(o240, FStar_UInt128_uint64_to_uint128(c3));
+ uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU;
+ uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U));
+ uint64_t l_10 = tmp0 + c4 * (uint64_t)19U;
+ uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c5 = l_10 >> (uint32_t)51U;
+ uint64_t o201 = tmp0_0;
+ uint64_t o211 = tmp1 + c5;
+ uint64_t o221 = tmp2;
+ uint64_t o231 = tmp3;
+ uint64_t o241 = tmp4;
+ uint64_t o10 = o101;
+ uint64_t o11 = o111;
+ uint64_t o12 = o121;
+ uint64_t o13 = o131;
+ uint64_t o14 = o141;
+ uint64_t o20 = o201;
+ uint64_t o21 = o211;
+ uint64_t o22 = o221;
+ uint64_t o23 = o231;
+ uint64_t o24 = o241;
+ out[0U] = o10;
+ out[1U] = o11;
+ out[2U] = o12;
+ out[3U] = o13;
+ out[4U] = o14;
+ out[5U] = o20;
+ out[6U] = o21;
+ out[7U] = o22;
+ out[8U] = o23;
+ out[9U] = o24;
+}
+
+static void
+store_felem(uint64_t *u64s, uint64_t *f)
+{
+ uint64_t f0 = f[0U];
+ uint64_t f1 = f[1U];
+ uint64_t f2 = f[2U];
+ uint64_t f3 = f[3U];
+ uint64_t f4 = f[4U];
+ uint64_t l_ = f0 + (uint64_t)0U;
+ uint64_t tmp0 = l_ & (uint64_t)0x7ffffffffffffU;
+ uint64_t c0 = l_ >> (uint32_t)51U;
+ uint64_t l_0 = f1 + c0;
+ uint64_t tmp1 = l_0 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c1 = l_0 >> (uint32_t)51U;
+ uint64_t l_1 = f2 + c1;
+ uint64_t tmp2 = l_1 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c2 = l_1 >> (uint32_t)51U;
+ uint64_t l_2 = f3 + c2;
+ uint64_t tmp3 = l_2 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c3 = l_2 >> (uint32_t)51U;
+ uint64_t l_3 = f4 + c3;
+ uint64_t tmp4 = l_3 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c4 = l_3 >> (uint32_t)51U;
+ uint64_t l_4 = tmp0 + c4 * (uint64_t)19U;
+ uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU;
+ uint64_t c5 = l_4 >> (uint32_t)51U;
+ uint64_t f01 = tmp0_;
+ uint64_t f11 = tmp1 + c5;
+ uint64_t f21 = tmp2;
+ uint64_t f31 = tmp3;
+ uint64_t f41 = tmp4;
+ uint64_t m0 = FStar_UInt64_gte_mask(f01, (uint64_t)0x7ffffffffffedU);
+ uint64_t m1 = FStar_UInt64_eq_mask(f11, (uint64_t)0x7ffffffffffffU);
+ uint64_t m2 = FStar_UInt64_eq_mask(f21, (uint64_t)0x7ffffffffffffU);
+ uint64_t m3 = FStar_UInt64_eq_mask(f31, (uint64_t)0x7ffffffffffffU);
+ uint64_t m4 = FStar_UInt64_eq_mask(f41, (uint64_t)0x7ffffffffffffU);
+ uint64_t mask = (((m0 & m1) & m2) & m3) & m4;
+ uint64_t f0_ = f01 - (mask & (uint64_t)0x7ffffffffffedU);
+ uint64_t f1_ = f11 - (mask & (uint64_t)0x7ffffffffffffU);
+ uint64_t f2_ = f21 - (mask & (uint64_t)0x7ffffffffffffU);
+ uint64_t f3_ = f31 - (mask & (uint64_t)0x7ffffffffffffU);
+ uint64_t f4_ = f41 - (mask & (uint64_t)0x7ffffffffffffU);
+ uint64_t f02 = f0_;
+ uint64_t f12 = f1_;
+ uint64_t f22 = f2_;
+ uint64_t f32 = f3_;
+ uint64_t f42 = f4_;
+ uint64_t o00 = f02 | f12 << (uint32_t)51U;
+ uint64_t o10 = f12 >> (uint32_t)13U | f22 << (uint32_t)38U;
+ uint64_t o20 = f22 >> (uint32_t)26U | f32 << (uint32_t)25U;
+ uint64_t o30 = f32 >> (uint32_t)39U | f42 << (uint32_t)12U;
+ uint64_t o0 = o00;
+ uint64_t o1 = o10;
+ uint64_t o2 = o20;
+ uint64_t o3 = o30;
+ u64s[0U] = o0;
+ u64s[1U] = o1;
+ u64s[2U] = o2;
+ u64s[3U] = o3;
+}
+
+static inline void
+cswap20(uint64_t bit, uint64_t *p1, uint64_t *p2)
+{
+ uint64_t mask = (uint64_t)0U - bit;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)10U; i++) {
+ uint64_t dummy = mask & (p1[i] ^ p2[i]);
+ p1[i] = p1[i] ^ dummy;
+ p2[i] = p2[i] ^ dummy;
+ }
+}
+
+static const uint8_t g25519[32U] = { (uint8_t)9U };
+
+static void
+point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, FStar_UInt128_uint128 *tmp2)
+{
+ uint64_t *nq = p01_tmp1;
+ uint64_t *nq_p1 = p01_tmp1 + (uint32_t)10U;
+ uint64_t *tmp1 = p01_tmp1 + (uint32_t)20U;
+ uint64_t *x1 = q;
+ uint64_t *x2 = nq;
+ uint64_t *z2 = nq + (uint32_t)5U;
+ uint64_t *z3 = nq_p1 + (uint32_t)5U;
+ uint64_t *a = tmp1;
+ uint64_t *b = tmp1 + (uint32_t)5U;
+ uint64_t *ab = tmp1;
+ uint64_t *dc = tmp1 + (uint32_t)10U;
+ fadd0(a, x2, z2);
+ fsub0(b, x2, z2);
+ uint64_t *x3 = nq_p1;
+ uint64_t *z31 = nq_p1 + (uint32_t)5U;
+ uint64_t *d0 = dc;
+ uint64_t *c0 = dc + (uint32_t)5U;
+ fadd0(c0, x3, z31);
+ fsub0(d0, x3, z31);
+ fmul20(dc, dc, ab);
+ fadd0(x3, d0, c0);
+ fsub0(z31, d0, c0);
+ uint64_t *a1 = tmp1;
+ uint64_t *b1 = tmp1 + (uint32_t)5U;
+ uint64_t *d = tmp1 + (uint32_t)10U;
+ uint64_t *c = tmp1 + (uint32_t)15U;
+ uint64_t *ab1 = tmp1;
+ uint64_t *dc1 = tmp1 + (uint32_t)10U;
+ fsqr20(dc1, ab1);
+ fsqr20(nq_p1, nq_p1);
+ a1[0U] = c[0U];
+ a1[1U] = c[1U];
+ a1[2U] = c[2U];
+ a1[3U] = c[3U];
+ a1[4U] = c[4U];
+ fsub0(c, d, c);
+ fmul1(b1, c, (uint64_t)121665U);
+ fadd0(b1, b1, d);
+ fmul20(nq, dc1, ab1);
+ fmul0(z3, z3, x1);
+}
+
+static void
+point_double(uint64_t *nq, uint64_t *tmp1, FStar_UInt128_uint128 *tmp2)
+{
+ uint64_t *x2 = nq;
+ uint64_t *z2 = nq + (uint32_t)5U;
+ uint64_t *a = tmp1;
+ uint64_t *b = tmp1 + (uint32_t)5U;
+ uint64_t *d = tmp1 + (uint32_t)10U;
+ uint64_t *c = tmp1 + (uint32_t)15U;
+ uint64_t *ab = tmp1;
+ uint64_t *dc = tmp1 + (uint32_t)10U;
+ fadd0(a, x2, z2);
+ fsub0(b, x2, z2);
+ fsqr20(dc, ab);
+ a[0U] = c[0U];
+ a[1U] = c[1U];
+ a[2U] = c[2U];
+ a[3U] = c[3U];
+ a[4U] = c[4U];
+ fsub0(c, d, c);
+ fmul1(b, c, (uint64_t)121665U);
+ fadd0(b, b, d);
+ fmul20(nq, dc, ab);
+}
+
+static void
+montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init)
+{
+ FStar_UInt128_uint128 tmp2[10U];
+ for (uint32_t _i = 0U; _i < (uint32_t)10U; ++_i)
+ tmp2[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U);
+ uint64_t p01_tmp1_swap[41U] = { 0U };
+ uint64_t *p0 = p01_tmp1_swap;
+ uint64_t *p01 = p01_tmp1_swap;
+ uint64_t *p03 = p01;
+ uint64_t *p11 = p01 + (uint32_t)10U;
+ memcpy(p11, init, (uint32_t)10U * sizeof(init[0U]));
+ uint64_t *x0 = p03;
+ uint64_t *z0 = p03 + (uint32_t)5U;
+ x0[0U] = (uint64_t)1U;
+ x0[1U] = (uint64_t)0U;
+ x0[2U] = (uint64_t)0U;
+ x0[3U] = (uint64_t)0U;
+ x0[4U] = (uint64_t)0U;
+ z0[0U] = (uint64_t)0U;
+ z0[1U] = (uint64_t)0U;
+ z0[2U] = (uint64_t)0U;
+ z0[3U] = (uint64_t)0U;
+ z0[4U] = (uint64_t)0U;
+ uint64_t *p01_tmp1 = p01_tmp1_swap;
+ uint64_t *p01_tmp11 = p01_tmp1_swap;
+ uint64_t *nq1 = p01_tmp1_swap;
+ uint64_t *nq_p11 = p01_tmp1_swap + (uint32_t)10U;
+ uint64_t *swap = p01_tmp1_swap + (uint32_t)40U;
+ cswap20((uint64_t)1U, nq1, nq_p11);
+ point_add_and_double(init, p01_tmp11, tmp2);
+ swap[0U] = (uint64_t)1U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)251U; i++) {
+ uint64_t *p01_tmp12 = p01_tmp1_swap;
+ uint64_t *swap1 = p01_tmp1_swap + (uint32_t)40U;
+ uint64_t *nq2 = p01_tmp12;
+ uint64_t *nq_p12 = p01_tmp12 + (uint32_t)10U;
+ uint64_t
+ bit =
+ (uint64_t)(key[((uint32_t)253U - i) / (uint32_t)8U] >> ((uint32_t)253U - i) % (uint32_t)8U & (uint8_t)1U);
+ uint64_t sw = swap1[0U] ^ bit;
+ cswap20(sw, nq2, nq_p12);
+ point_add_and_double(init, p01_tmp12, tmp2);
+ swap1[0U] = bit;
+ }
+ uint64_t sw = swap[0U];
+ cswap20(sw, nq1, nq_p11);
+ uint64_t *nq10 = p01_tmp1;
+ uint64_t *tmp1 = p01_tmp1 + (uint32_t)20U;
+ point_double(nq10, tmp1, tmp2);
+ point_double(nq10, tmp1, tmp2);
+ point_double(nq10, tmp1, tmp2);
+ memcpy(out, p0, (uint32_t)10U * sizeof(p0[0U]));
+}
+
+static void
+fsquare_times(uint64_t *o, uint64_t *inp, FStar_UInt128_uint128 *tmp, uint32_t n)
+{
+ fsqr0(o, inp);
+ for (uint32_t i = (uint32_t)0U; i < n - (uint32_t)1U; i++) {
+ fsqr0(o, o);
+ }
+}
+
+static void
+finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tmp)
+{
+ uint64_t t1[20U] = { 0U };
+ uint64_t *a = t1;
+ uint64_t *b = t1 + (uint32_t)5U;
+ uint64_t *c = t1 + (uint32_t)10U;
+ uint64_t *t00 = t1 + (uint32_t)15U;
+ FStar_UInt128_uint128 *tmp1 = tmp;
+ fsquare_times(a, i, tmp1, (uint32_t)1U);
+ fsquare_times(t00, a, tmp1, (uint32_t)2U);
+ fmul0(b, t00, i);
+ fmul0(a, b, a);
+ fsquare_times(t00, a, tmp1, (uint32_t)1U);
+ fmul0(b, t00, b);
+ fsquare_times(t00, b, tmp1, (uint32_t)5U);
+ fmul0(b, t00, b);
+ fsquare_times(t00, b, tmp1, (uint32_t)10U);
+ fmul0(c, t00, b);
+ fsquare_times(t00, c, tmp1, (uint32_t)20U);
+ fmul0(t00, t00, c);
+ fsquare_times(t00, t00, tmp1, (uint32_t)10U);
+ fmul0(b, t00, b);
+ fsquare_times(t00, b, tmp1, (uint32_t)50U);
+ fmul0(c, t00, b);
+ fsquare_times(t00, c, tmp1, (uint32_t)100U);
+ fmul0(t00, t00, c);
+ fsquare_times(t00, t00, tmp1, (uint32_t)50U);
+ fmul0(t00, t00, b);
+ fsquare_times(t00, t00, tmp1, (uint32_t)5U);
+ uint64_t *a0 = t1;
+ uint64_t *t0 = t1 + (uint32_t)15U;
+ fmul0(o, t0, a0);
+}
+
+static void
+encode_point(uint8_t *o, uint64_t *i)
+{
+ uint64_t *x = i;
+ uint64_t *z = i + (uint32_t)5U;
+ uint64_t tmp[5U] = { 0U };
+ uint64_t u64s[4U] = { 0U };
+ FStar_UInt128_uint128 tmp_w[10U];
+ for (uint32_t _i = 0U; _i < (uint32_t)10U; ++_i)
+ tmp_w[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U);
+ finv(tmp, z, tmp_w);
+ fmul0(tmp, tmp, x);
+ store_felem(u64s, tmp);
+ for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) {
+ store64_le(o + i0 * (uint32_t)8U, u64s[i0]);
+ }
+}
+
+void
+Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub)
+{
+ uint64_t init[10U] = { 0U };
+ uint64_t tmp[4U] = { 0U };
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) {
+ uint64_t *os = tmp;
+ uint8_t *bj = pub + i * (uint32_t)8U;
+ uint64_t u = load64_le(bj);
+ uint64_t r = u;
+ uint64_t x = r;
+ os[i] = x;
+ }
+ uint64_t tmp3 = tmp[3U];
+ tmp[3U] = tmp3 & (uint64_t)0x7fffffffffffffffU;
+ uint64_t *x = init;
+ uint64_t *z = init + (uint32_t)5U;
+ z[0U] = (uint64_t)1U;
+ z[1U] = (uint64_t)0U;
+ z[2U] = (uint64_t)0U;
+ z[3U] = (uint64_t)0U;
+ z[4U] = (uint64_t)0U;
+ uint64_t f0l = tmp[0U] & (uint64_t)0x7ffffffffffffU;
+ uint64_t f0h = tmp[0U] >> (uint32_t)51U;
+ uint64_t f1l = (tmp[1U] & (uint64_t)0x3fffffffffU) << (uint32_t)13U;
+ uint64_t f1h = tmp[1U] >> (uint32_t)38U;
+ uint64_t f2l = (tmp[2U] & (uint64_t)0x1ffffffU) << (uint32_t)26U;
+ uint64_t f2h = tmp[2U] >> (uint32_t)25U;
+ uint64_t f3l = (tmp[3U] & (uint64_t)0xfffU) << (uint32_t)39U;
+ uint64_t f3h = tmp[3U] >> (uint32_t)12U;
+ x[0U] = f0l;
+ x[1U] = f0h | f1l;
+ x[2U] = f1h | f2l;
+ x[3U] = f2h | f3l;
+ x[4U] = f3h;
+ montgomery_ladder(init, priv, init);
+ encode_point(out, init);
+}
+
+void
+Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv)
+{
+ uint8_t basepoint[32U] = { 0U };
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) {
+ uint8_t *os = basepoint;
+ uint8_t x = g25519[i];
+ os[i] = x;
+ }
+ Hacl_Curve25519_51_scalarmult(pub, priv, basepoint);
+}
+
+bool
+Hacl_Curve25519_51_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub)
+{
+ uint8_t zeros[32U] = { 0U };
+ Hacl_Curve25519_51_scalarmult(out, priv, pub);
+ uint8_t res = (uint8_t)255U;
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) {
+ uint8_t uu____0 = FStar_UInt8_eq_mask(out[i], zeros[i]);
+ res = uu____0 & res;
+ }
+ uint8_t z = res;
+ bool r = z == (uint8_t)255U;
+ return !r;
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_51.h b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.h
new file mode 100644
index 0000000000..05050739cf
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.h
@@ -0,0 +1,41 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include <stdbool.h>
+
+#ifndef __Hacl_Curve25519_51_H
+#define __Hacl_Curve25519_51_H
+
+#include "Hacl_Kremlib.h"
+
+void Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub);
+
+void Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv);
+
+bool Hacl_Curve25519_51_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub);
+
+#define __Hacl_Curve25519_51_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Kremlib.h b/security/nss/lib/freebl/verified/Hacl_Kremlib.h
new file mode 100644
index 0000000000..a2116220ff
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Kremlib.h
@@ -0,0 +1,51 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include <stdbool.h>
+
+#ifndef __Hacl_Kremlib_H
+#define __Hacl_Kremlib_H
+
+static inline uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b);
+
+static inline uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b);
+
+static inline uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a);
+
+static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y);
+
+#define __Hacl_Kremlib_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_128.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.c
new file mode 100644
index 0000000000..7fbd7fc4b7
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.c
@@ -0,0 +1,1624 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Poly1305_128.h"
+
+void
+Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b)
+{
+ Lib_IntVector_Intrinsics_vec128 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load_le(b);
+ Lib_IntVector_Intrinsics_vec128
+ b2 = Lib_IntVector_Intrinsics_vec128_load_le(b + (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2);
+ Lib_IntVector_Intrinsics_vec128 hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2);
+ Lib_IntVector_Intrinsics_vec128
+ f00 =
+ Lib_IntVector_Intrinsics_vec128_and(lo,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f10 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f02 = f00;
+ Lib_IntVector_Intrinsics_vec128 f12 = f10;
+ Lib_IntVector_Intrinsics_vec128 f22 = f20;
+ Lib_IntVector_Intrinsics_vec128 f32 = f30;
+ Lib_IntVector_Intrinsics_vec128 f42 = f40;
+ e[0U] = f02;
+ e[1U] = f12;
+ e[2U] = f22;
+ e[3U] = f32;
+ e[4U] = f42;
+ uint64_t b10 = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b10);
+ Lib_IntVector_Intrinsics_vec128 f43 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f43, mask);
+ Lib_IntVector_Intrinsics_vec128 acc0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 acc1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 acc2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 acc3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 acc4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 e0 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 e1 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 e2 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 e3 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 e4 = e[4U];
+ Lib_IntVector_Intrinsics_vec128
+ f0 = Lib_IntVector_Intrinsics_vec128_insert64(acc0, (uint64_t)0U, (uint32_t)1U);
+ Lib_IntVector_Intrinsics_vec128
+ f1 = Lib_IntVector_Intrinsics_vec128_insert64(acc1, (uint64_t)0U, (uint32_t)1U);
+ Lib_IntVector_Intrinsics_vec128
+ f2 = Lib_IntVector_Intrinsics_vec128_insert64(acc2, (uint64_t)0U, (uint32_t)1U);
+ Lib_IntVector_Intrinsics_vec128
+ f3 = Lib_IntVector_Intrinsics_vec128_insert64(acc3, (uint64_t)0U, (uint32_t)1U);
+ Lib_IntVector_Intrinsics_vec128
+ f4 = Lib_IntVector_Intrinsics_vec128_insert64(acc4, (uint64_t)0U, (uint32_t)1U);
+ Lib_IntVector_Intrinsics_vec128 f01 = Lib_IntVector_Intrinsics_vec128_add64(f0, e0);
+ Lib_IntVector_Intrinsics_vec128 f11 = Lib_IntVector_Intrinsics_vec128_add64(f1, e1);
+ Lib_IntVector_Intrinsics_vec128 f21 = Lib_IntVector_Intrinsics_vec128_add64(f2, e2);
+ Lib_IntVector_Intrinsics_vec128 f31 = Lib_IntVector_Intrinsics_vec128_add64(f3, e3);
+ Lib_IntVector_Intrinsics_vec128 f41 = Lib_IntVector_Intrinsics_vec128_add64(f4, e4);
+ Lib_IntVector_Intrinsics_vec128 acc01 = f01;
+ Lib_IntVector_Intrinsics_vec128 acc11 = f11;
+ Lib_IntVector_Intrinsics_vec128 acc21 = f21;
+ Lib_IntVector_Intrinsics_vec128 acc31 = f31;
+ Lib_IntVector_Intrinsics_vec128 acc41 = f41;
+ acc[0U] = acc01;
+ acc[1U] = acc11;
+ acc[2U] = acc21;
+ acc[3U] = acc31;
+ acc[4U] = acc41;
+}
+
+void
+Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(
+ Lib_IntVector_Intrinsics_vec128 *out,
+ Lib_IntVector_Intrinsics_vec128 *p)
+{
+ Lib_IntVector_Intrinsics_vec128 *r = p;
+ Lib_IntVector_Intrinsics_vec128 *r2 = p + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec128 a0 = out[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = out[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = out[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = out[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = out[4U];
+ Lib_IntVector_Intrinsics_vec128 r10 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 r11 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 r12 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 r13 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 r14 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 r20 = r2[0U];
+ Lib_IntVector_Intrinsics_vec128 r21 = r2[1U];
+ Lib_IntVector_Intrinsics_vec128 r22 = r2[2U];
+ Lib_IntVector_Intrinsics_vec128 r23 = r2[3U];
+ Lib_IntVector_Intrinsics_vec128 r24 = r2[4U];
+ Lib_IntVector_Intrinsics_vec128
+ r201 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r20, r10);
+ Lib_IntVector_Intrinsics_vec128
+ r211 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r21, r11);
+ Lib_IntVector_Intrinsics_vec128
+ r221 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r22, r12);
+ Lib_IntVector_Intrinsics_vec128
+ r231 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r23, r13);
+ Lib_IntVector_Intrinsics_vec128
+ r241 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r24, r14);
+ Lib_IntVector_Intrinsics_vec128
+ r251 = Lib_IntVector_Intrinsics_vec128_smul64(r211, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec128
+ r252 = Lib_IntVector_Intrinsics_vec128_smul64(r221, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec128
+ r253 = Lib_IntVector_Intrinsics_vec128_smul64(r231, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec128
+ r254 = Lib_IntVector_Intrinsics_vec128_smul64(r241, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_mul64(r201, a0);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_mul64(r211, a0);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_mul64(r221, a0);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_mul64(r231, a0);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_mul64(r241, a0);
+ Lib_IntVector_Intrinsics_vec128
+ a02 =
+ Lib_IntVector_Intrinsics_vec128_add64(a01,
+ Lib_IntVector_Intrinsics_vec128_mul64(r254, a1));
+ Lib_IntVector_Intrinsics_vec128
+ a12 =
+ Lib_IntVector_Intrinsics_vec128_add64(a11,
+ Lib_IntVector_Intrinsics_vec128_mul64(r201, a1));
+ Lib_IntVector_Intrinsics_vec128
+ a22 =
+ Lib_IntVector_Intrinsics_vec128_add64(a21,
+ Lib_IntVector_Intrinsics_vec128_mul64(r211, a1));
+ Lib_IntVector_Intrinsics_vec128
+ a32 =
+ Lib_IntVector_Intrinsics_vec128_add64(a31,
+ Lib_IntVector_Intrinsics_vec128_mul64(r221, a1));
+ Lib_IntVector_Intrinsics_vec128
+ a42 =
+ Lib_IntVector_Intrinsics_vec128_add64(a41,
+ Lib_IntVector_Intrinsics_vec128_mul64(r231, a1));
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r253, a2));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r254, a2));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r201, a2));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r211, a2));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r221, a2));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r252, a3));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r253, a3));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r254, a3));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r201, a3));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r211, a3));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r251, a4));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r252, a4));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r253, a4));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r254, a4));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r201, a4));
+ Lib_IntVector_Intrinsics_vec128 t0 = a05;
+ Lib_IntVector_Intrinsics_vec128 t1 = a15;
+ Lib_IntVector_Intrinsics_vec128 t2 = a25;
+ Lib_IntVector_Intrinsics_vec128 t3 = a35;
+ Lib_IntVector_Intrinsics_vec128 t4 = a45;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o10 = x12;
+ Lib_IntVector_Intrinsics_vec128 o20 = x21;
+ Lib_IntVector_Intrinsics_vec128 o30 = x32;
+ Lib_IntVector_Intrinsics_vec128 o40 = x42;
+ Lib_IntVector_Intrinsics_vec128
+ o01 =
+ Lib_IntVector_Intrinsics_vec128_add64(o0,
+ Lib_IntVector_Intrinsics_vec128_interleave_high64(o0, o0));
+ Lib_IntVector_Intrinsics_vec128
+ o11 =
+ Lib_IntVector_Intrinsics_vec128_add64(o10,
+ Lib_IntVector_Intrinsics_vec128_interleave_high64(o10, o10));
+ Lib_IntVector_Intrinsics_vec128
+ o21 =
+ Lib_IntVector_Intrinsics_vec128_add64(o20,
+ Lib_IntVector_Intrinsics_vec128_interleave_high64(o20, o20));
+ Lib_IntVector_Intrinsics_vec128
+ o31 =
+ Lib_IntVector_Intrinsics_vec128_add64(o30,
+ Lib_IntVector_Intrinsics_vec128_interleave_high64(o30, o30));
+ Lib_IntVector_Intrinsics_vec128
+ o41 =
+ Lib_IntVector_Intrinsics_vec128_add64(o40,
+ Lib_IntVector_Intrinsics_vec128_interleave_high64(o40, o40));
+ Lib_IntVector_Intrinsics_vec128
+ l = Lib_IntVector_Intrinsics_vec128_add64(o01, Lib_IntVector_Intrinsics_vec128_zero);
+ Lib_IntVector_Intrinsics_vec128
+ tmp0 =
+ Lib_IntVector_Intrinsics_vec128_and(l,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(o11, c0);
+ Lib_IntVector_Intrinsics_vec128
+ tmp1 =
+ Lib_IntVector_Intrinsics_vec128_and(l0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(o21, c1);
+ Lib_IntVector_Intrinsics_vec128
+ tmp2 =
+ Lib_IntVector_Intrinsics_vec128_and(l1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(o31, c2);
+ Lib_IntVector_Intrinsics_vec128
+ tmp3 =
+ Lib_IntVector_Intrinsics_vec128_and(l2,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(o41, c3);
+ Lib_IntVector_Intrinsics_vec128
+ tmp4 =
+ Lib_IntVector_Intrinsics_vec128_and(l3,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ o00 =
+ Lib_IntVector_Intrinsics_vec128_add64(tmp0,
+ Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U));
+ Lib_IntVector_Intrinsics_vec128 o1 = tmp1;
+ Lib_IntVector_Intrinsics_vec128 o2 = tmp2;
+ Lib_IntVector_Intrinsics_vec128 o3 = tmp3;
+ Lib_IntVector_Intrinsics_vec128 o4 = tmp4;
+ out[0U] = o00;
+ out[1U] = o1;
+ out[2U] = o2;
+ out[3U] = o3;
+ out[4U] = o4;
+}
+
+uint32_t Hacl_Poly1305_128_blocklen = (uint32_t)16U;
+
+void
+Hacl_Poly1305_128_poly1305_init(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *key)
+{
+ Lib_IntVector_Intrinsics_vec128 *acc = ctx;
+ Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U;
+ uint8_t *kr = key;
+ acc[0U] = Lib_IntVector_Intrinsics_vec128_zero;
+ acc[1U] = Lib_IntVector_Intrinsics_vec128_zero;
+ acc[2U] = Lib_IntVector_Intrinsics_vec128_zero;
+ acc[3U] = Lib_IntVector_Intrinsics_vec128_zero;
+ acc[4U] = Lib_IntVector_Intrinsics_vec128_zero;
+ uint64_t u0 = load64_le(kr);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(kr + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU;
+ uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU;
+ uint64_t lo1 = lo & mask0;
+ uint64_t hi1 = hi & mask1;
+ Lib_IntVector_Intrinsics_vec128 *r = pre;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 *rn = pre + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec128 *rn_5 = pre + (uint32_t)15U;
+ Lib_IntVector_Intrinsics_vec128 r_vec0 = Lib_IntVector_Intrinsics_vec128_load64(lo1);
+ Lib_IntVector_Intrinsics_vec128 r_vec1 = Lib_IntVector_Intrinsics_vec128_load64(hi1);
+ Lib_IntVector_Intrinsics_vec128
+ f00 =
+ Lib_IntVector_Intrinsics_vec128_and(r_vec0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f15 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(r_vec1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f0 = f00;
+ Lib_IntVector_Intrinsics_vec128 f1 = f15;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f4 = f40;
+ r[0U] = f0;
+ r[1U] = f1;
+ r[2U] = f2;
+ r[3U] = f3;
+ r[4U] = f4;
+ Lib_IntVector_Intrinsics_vec128 f200 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 f210 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 f220 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 f230 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 f240 = r[4U];
+ r5[0U] = Lib_IntVector_Intrinsics_vec128_smul64(f200, (uint64_t)5U);
+ r5[1U] = Lib_IntVector_Intrinsics_vec128_smul64(f210, (uint64_t)5U);
+ r5[2U] = Lib_IntVector_Intrinsics_vec128_smul64(f220, (uint64_t)5U);
+ r5[3U] = Lib_IntVector_Intrinsics_vec128_smul64(f230, (uint64_t)5U);
+ r5[4U] = Lib_IntVector_Intrinsics_vec128_smul64(f240, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec128 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10);
+ Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10);
+ Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10);
+ Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10);
+ Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10);
+ Lib_IntVector_Intrinsics_vec128
+ a01 =
+ Lib_IntVector_Intrinsics_vec128_add64(a0,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f11));
+ Lib_IntVector_Intrinsics_vec128
+ a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, Lib_IntVector_Intrinsics_vec128_mul64(r0, f11));
+ Lib_IntVector_Intrinsics_vec128
+ a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, Lib_IntVector_Intrinsics_vec128_mul64(r1, f11));
+ Lib_IntVector_Intrinsics_vec128
+ a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, Lib_IntVector_Intrinsics_vec128_mul64(r2, f11));
+ Lib_IntVector_Intrinsics_vec128
+ a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, Lib_IntVector_Intrinsics_vec128_mul64(r3, f11));
+ Lib_IntVector_Intrinsics_vec128
+ a02 =
+ Lib_IntVector_Intrinsics_vec128_add64(a01,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f12));
+ Lib_IntVector_Intrinsics_vec128
+ a12 =
+ Lib_IntVector_Intrinsics_vec128_add64(a11,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f12));
+ Lib_IntVector_Intrinsics_vec128
+ a22 =
+ Lib_IntVector_Intrinsics_vec128_add64(a21,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f12));
+ Lib_IntVector_Intrinsics_vec128
+ a32 =
+ Lib_IntVector_Intrinsics_vec128_add64(a31,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f12));
+ Lib_IntVector_Intrinsics_vec128
+ a42 =
+ Lib_IntVector_Intrinsics_vec128_add64(a41,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, f12));
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, f13));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f13));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f13));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f13));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f13));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, f14));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, f14));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f14));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f14));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f14));
+ Lib_IntVector_Intrinsics_vec128 t0 = a04;
+ Lib_IntVector_Intrinsics_vec128 t1 = a14;
+ Lib_IntVector_Intrinsics_vec128 t2 = a24;
+ Lib_IntVector_Intrinsics_vec128 t3 = a34;
+ Lib_IntVector_Intrinsics_vec128 t4 = a44;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ rn[0U] = o0;
+ rn[1U] = o1;
+ rn[2U] = o2;
+ rn[3U] = o3;
+ rn[4U] = o4;
+ Lib_IntVector_Intrinsics_vec128 f201 = rn[0U];
+ Lib_IntVector_Intrinsics_vec128 f21 = rn[1U];
+ Lib_IntVector_Intrinsics_vec128 f22 = rn[2U];
+ Lib_IntVector_Intrinsics_vec128 f23 = rn[3U];
+ Lib_IntVector_Intrinsics_vec128 f24 = rn[4U];
+ rn_5[0U] = Lib_IntVector_Intrinsics_vec128_smul64(f201, (uint64_t)5U);
+ rn_5[1U] = Lib_IntVector_Intrinsics_vec128_smul64(f21, (uint64_t)5U);
+ rn_5[2U] = Lib_IntVector_Intrinsics_vec128_smul64(f22, (uint64_t)5U);
+ rn_5[3U] = Lib_IntVector_Intrinsics_vec128_smul64(f23, (uint64_t)5U);
+ rn_5[4U] = Lib_IntVector_Intrinsics_vec128_smul64(f24, (uint64_t)5U);
+}
+
+void
+Hacl_Poly1305_128_poly1305_update1(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *text)
+{
+ Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 *acc = ctx;
+ Lib_IntVector_Intrinsics_vec128 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ uint64_t u0 = load64_le(text);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(text + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *r = pre;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t0 = a06;
+ Lib_IntVector_Intrinsics_vec128 t1 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+}
+
+void
+Hacl_Poly1305_128_poly1305_update(
+ Lib_IntVector_Intrinsics_vec128 *ctx,
+ uint32_t len,
+ uint8_t *text)
+{
+ Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 *acc = ctx;
+ uint32_t sz_block = (uint32_t)32U;
+ uint32_t len0 = len / sz_block * sz_block;
+ uint8_t *t0 = text;
+ if (len0 > (uint32_t)0U) {
+ uint32_t bs = (uint32_t)32U;
+ uint8_t *text0 = t0;
+ Hacl_Impl_Poly1305_Field32xN_128_load_acc2(acc, text0);
+ uint32_t len1 = len0 - bs;
+ uint8_t *text1 = t0 + bs;
+ uint32_t nb = len1 / bs;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = text1 + i * bs;
+ Lib_IntVector_Intrinsics_vec128 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load_le(block);
+ Lib_IntVector_Intrinsics_vec128
+ b2 = Lib_IntVector_Intrinsics_vec128_load_le(block + (uint32_t)16U);
+ Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2);
+ Lib_IntVector_Intrinsics_vec128
+ hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2);
+ Lib_IntVector_Intrinsics_vec128
+ f00 =
+ Lib_IntVector_Intrinsics_vec128_and(lo,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f15 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f25 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f0 = f00;
+ Lib_IntVector_Intrinsics_vec128 f1 = f15;
+ Lib_IntVector_Intrinsics_vec128 f2 = f25;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f0;
+ e[1U] = f1;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *rn = pre + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec128 *rn5 = pre + (uint32_t)15U;
+ Lib_IntVector_Intrinsics_vec128 r0 = rn[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = rn[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = rn[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = rn[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = rn[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = rn5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = rn5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = rn5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = rn5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 f110 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 f120 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 f130 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 f140 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10);
+ Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10);
+ Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10);
+ Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10);
+ Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10);
+ Lib_IntVector_Intrinsics_vec128
+ a01 =
+ Lib_IntVector_Intrinsics_vec128_add64(a0,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a11 =
+ Lib_IntVector_Intrinsics_vec128_add64(a1,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a21 =
+ Lib_IntVector_Intrinsics_vec128_add64(a2,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a31 =
+ Lib_IntVector_Intrinsics_vec128_add64(a3,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a41 =
+ Lib_IntVector_Intrinsics_vec128_add64(a4,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, f110));
+ Lib_IntVector_Intrinsics_vec128
+ a02 =
+ Lib_IntVector_Intrinsics_vec128_add64(a01,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a12 =
+ Lib_IntVector_Intrinsics_vec128_add64(a11,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a22 =
+ Lib_IntVector_Intrinsics_vec128_add64(a21,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a32 =
+ Lib_IntVector_Intrinsics_vec128_add64(a31,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a42 =
+ Lib_IntVector_Intrinsics_vec128_add64(a41,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, f120));
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, f130));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, f140));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, f140));
+ Lib_IntVector_Intrinsics_vec128 t01 = a04;
+ Lib_IntVector_Intrinsics_vec128 t1 = a14;
+ Lib_IntVector_Intrinsics_vec128 t2 = a24;
+ Lib_IntVector_Intrinsics_vec128 t3 = a34;
+ Lib_IntVector_Intrinsics_vec128 t4 = a44;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o00 = x02;
+ Lib_IntVector_Intrinsics_vec128 o10 = x12;
+ Lib_IntVector_Intrinsics_vec128 o20 = x21;
+ Lib_IntVector_Intrinsics_vec128 o30 = x32;
+ Lib_IntVector_Intrinsics_vec128 o40 = x42;
+ acc[0U] = o00;
+ acc[1U] = o10;
+ acc[2U] = o20;
+ acc[3U] = o30;
+ acc[4U] = o40;
+ Lib_IntVector_Intrinsics_vec128 f100 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 f20 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f21 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f22 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f23 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f24 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_add64(f100, f20);
+ Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_add64(f11, f21);
+ Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_add64(f12, f22);
+ Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_add64(f13, f23);
+ Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_add64(f14, f24);
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ }
+ Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(acc, pre);
+ }
+ uint32_t len1 = len - len0;
+ uint8_t *t1 = text + len0;
+ uint32_t nb = len1 / (uint32_t)16U;
+ uint32_t rem = len1 % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = t1 + i * (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec128 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec128 *r = pre;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t01 = a06;
+ Lib_IntVector_Intrinsics_vec128 t11 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ }
+ if (rem > (uint32_t)0U) {
+ uint8_t *last = t1 + nb * (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec128 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem * sizeof(last[0U]));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo);
+ Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_and(f0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f110 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f20 =
+ Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec128
+ f30 =
+ Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec128 f01 = f010;
+ Lib_IntVector_Intrinsics_vec128 f111 = f110;
+ Lib_IntVector_Intrinsics_vec128 f2 = f20;
+ Lib_IntVector_Intrinsics_vec128 f3 = f30;
+ Lib_IntVector_Intrinsics_vec128 f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem * (uint32_t)8U % (uint32_t)26U;
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b);
+ Lib_IntVector_Intrinsics_vec128 fi = e[rem * (uint32_t)8U / (uint32_t)26U];
+ e[rem * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec128_or(fi, mask);
+ Lib_IntVector_Intrinsics_vec128 *r = pre;
+ Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec128 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec128 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec128 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec128 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec128 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec128 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec128 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec128 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec128 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec128 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec128 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec128 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec128 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec128 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec128 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec128
+ a03 =
+ Lib_IntVector_Intrinsics_vec128_add64(a02,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a13 =
+ Lib_IntVector_Intrinsics_vec128_add64(a12,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a23 =
+ Lib_IntVector_Intrinsics_vec128_add64(a22,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a33 =
+ Lib_IntVector_Intrinsics_vec128_add64(a32,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a43 =
+ Lib_IntVector_Intrinsics_vec128_add64(a42,
+ Lib_IntVector_Intrinsics_vec128_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec128
+ a04 =
+ Lib_IntVector_Intrinsics_vec128_add64(a03,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a14 =
+ Lib_IntVector_Intrinsics_vec128_add64(a13,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a24 =
+ Lib_IntVector_Intrinsics_vec128_add64(a23,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a34 =
+ Lib_IntVector_Intrinsics_vec128_add64(a33,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a44 =
+ Lib_IntVector_Intrinsics_vec128_add64(a43,
+ Lib_IntVector_Intrinsics_vec128_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec128
+ a05 =
+ Lib_IntVector_Intrinsics_vec128_add64(a04,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a15 =
+ Lib_IntVector_Intrinsics_vec128_add64(a14,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a25 =
+ Lib_IntVector_Intrinsics_vec128_add64(a24,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a35 =
+ Lib_IntVector_Intrinsics_vec128_add64(a34,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a45 =
+ Lib_IntVector_Intrinsics_vec128_add64(a44,
+ Lib_IntVector_Intrinsics_vec128_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec128
+ a06 =
+ Lib_IntVector_Intrinsics_vec128_add64(a05,
+ Lib_IntVector_Intrinsics_vec128_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a16 =
+ Lib_IntVector_Intrinsics_vec128_add64(a15,
+ Lib_IntVector_Intrinsics_vec128_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a26 =
+ Lib_IntVector_Intrinsics_vec128_add64(a25,
+ Lib_IntVector_Intrinsics_vec128_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a36 =
+ Lib_IntVector_Intrinsics_vec128_add64(a35,
+ Lib_IntVector_Intrinsics_vec128_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec128
+ a46 =
+ Lib_IntVector_Intrinsics_vec128_add64(a45,
+ Lib_IntVector_Intrinsics_vec128_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec128 t01 = a06;
+ Lib_IntVector_Intrinsics_vec128 t11 = a16;
+ Lib_IntVector_Intrinsics_vec128 t2 = a26;
+ Lib_IntVector_Intrinsics_vec128 t3 = a36;
+ Lib_IntVector_Intrinsics_vec128 t4 = a46;
+ Lib_IntVector_Intrinsics_vec128
+ mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec128
+ z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec128
+ z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec128
+ z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec128 o0 = x02;
+ Lib_IntVector_Intrinsics_vec128 o1 = x12;
+ Lib_IntVector_Intrinsics_vec128 o2 = x21;
+ Lib_IntVector_Intrinsics_vec128 o3 = x32;
+ Lib_IntVector_Intrinsics_vec128 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+void
+Hacl_Poly1305_128_poly1305_finish(
+ uint8_t *tag,
+ uint8_t *key,
+ Lib_IntVector_Intrinsics_vec128 *ctx)
+{
+ Lib_IntVector_Intrinsics_vec128 *acc = ctx;
+ uint8_t *ks = key + (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec128 f0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 f13 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 f23 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 f33 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 f40 = acc[4U];
+ Lib_IntVector_Intrinsics_vec128
+ l0 = Lib_IntVector_Intrinsics_vec128_add64(f0, Lib_IntVector_Intrinsics_vec128_zero);
+ Lib_IntVector_Intrinsics_vec128
+ tmp00 =
+ Lib_IntVector_Intrinsics_vec128_and(l0,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c00 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(f13, c00);
+ Lib_IntVector_Intrinsics_vec128
+ tmp10 =
+ Lib_IntVector_Intrinsics_vec128_and(l1,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c10 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(f23, c10);
+ Lib_IntVector_Intrinsics_vec128
+ tmp20 =
+ Lib_IntVector_Intrinsics_vec128_and(l2,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c20 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(f33, c20);
+ Lib_IntVector_Intrinsics_vec128
+ tmp30 =
+ Lib_IntVector_Intrinsics_vec128_and(l3,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c30 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l4 = Lib_IntVector_Intrinsics_vec128_add64(f40, c30);
+ Lib_IntVector_Intrinsics_vec128
+ tmp40 =
+ Lib_IntVector_Intrinsics_vec128_and(l4,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c40 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ f010 =
+ Lib_IntVector_Intrinsics_vec128_add64(tmp00,
+ Lib_IntVector_Intrinsics_vec128_smul64(c40, (uint64_t)5U));
+ Lib_IntVector_Intrinsics_vec128 f110 = tmp10;
+ Lib_IntVector_Intrinsics_vec128 f210 = tmp20;
+ Lib_IntVector_Intrinsics_vec128 f310 = tmp30;
+ Lib_IntVector_Intrinsics_vec128 f410 = tmp40;
+ Lib_IntVector_Intrinsics_vec128
+ l = Lib_IntVector_Intrinsics_vec128_add64(f010, Lib_IntVector_Intrinsics_vec128_zero);
+ Lib_IntVector_Intrinsics_vec128
+ tmp0 =
+ Lib_IntVector_Intrinsics_vec128_and(l,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l5 = Lib_IntVector_Intrinsics_vec128_add64(f110, c0);
+ Lib_IntVector_Intrinsics_vec128
+ tmp1 =
+ Lib_IntVector_Intrinsics_vec128_and(l5,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l5, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l6 = Lib_IntVector_Intrinsics_vec128_add64(f210, c1);
+ Lib_IntVector_Intrinsics_vec128
+ tmp2 =
+ Lib_IntVector_Intrinsics_vec128_and(l6,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l6, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l7 = Lib_IntVector_Intrinsics_vec128_add64(f310, c2);
+ Lib_IntVector_Intrinsics_vec128
+ tmp3 =
+ Lib_IntVector_Intrinsics_vec128_and(l7,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l7, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128 l8 = Lib_IntVector_Intrinsics_vec128_add64(f410, c3);
+ Lib_IntVector_Intrinsics_vec128
+ tmp4 =
+ Lib_IntVector_Intrinsics_vec128_and(l8,
+ Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec128
+ c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l8, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec128
+ f02 =
+ Lib_IntVector_Intrinsics_vec128_add64(tmp0,
+ Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U));
+ Lib_IntVector_Intrinsics_vec128 f12 = tmp1;
+ Lib_IntVector_Intrinsics_vec128 f22 = tmp2;
+ Lib_IntVector_Intrinsics_vec128 f32 = tmp3;
+ Lib_IntVector_Intrinsics_vec128 f42 = tmp4;
+ Lib_IntVector_Intrinsics_vec128
+ mh = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec128
+ ml = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffffbU);
+ Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_eq64(f42, mh);
+ Lib_IntVector_Intrinsics_vec128
+ mask1 =
+ Lib_IntVector_Intrinsics_vec128_and(mask,
+ Lib_IntVector_Intrinsics_vec128_eq64(f32, mh));
+ Lib_IntVector_Intrinsics_vec128
+ mask2 =
+ Lib_IntVector_Intrinsics_vec128_and(mask1,
+ Lib_IntVector_Intrinsics_vec128_eq64(f22, mh));
+ Lib_IntVector_Intrinsics_vec128
+ mask3 =
+ Lib_IntVector_Intrinsics_vec128_and(mask2,
+ Lib_IntVector_Intrinsics_vec128_eq64(f12, mh));
+ Lib_IntVector_Intrinsics_vec128
+ mask4 =
+ Lib_IntVector_Intrinsics_vec128_and(mask3,
+ Lib_IntVector_Intrinsics_vec128_lognot(Lib_IntVector_Intrinsics_vec128_gt64(ml, f02)));
+ Lib_IntVector_Intrinsics_vec128 ph = Lib_IntVector_Intrinsics_vec128_and(mask4, mh);
+ Lib_IntVector_Intrinsics_vec128 pl = Lib_IntVector_Intrinsics_vec128_and(mask4, ml);
+ Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_sub64(f02, pl);
+ Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_sub64(f12, ph);
+ Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_sub64(f22, ph);
+ Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_sub64(f32, ph);
+ Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_sub64(f42, ph);
+ Lib_IntVector_Intrinsics_vec128 f011 = o0;
+ Lib_IntVector_Intrinsics_vec128 f111 = o1;
+ Lib_IntVector_Intrinsics_vec128 f211 = o2;
+ Lib_IntVector_Intrinsics_vec128 f311 = o3;
+ Lib_IntVector_Intrinsics_vec128 f411 = o4;
+ acc[0U] = f011;
+ acc[1U] = f111;
+ acc[2U] = f211;
+ acc[3U] = f311;
+ acc[4U] = f411;
+ Lib_IntVector_Intrinsics_vec128 f00 = acc[0U];
+ Lib_IntVector_Intrinsics_vec128 f1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec128 f2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec128 f3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec128 f4 = acc[4U];
+ uint64_t f01 = Lib_IntVector_Intrinsics_vec128_extract64(f00, (uint32_t)0U);
+ uint64_t f112 = Lib_IntVector_Intrinsics_vec128_extract64(f1, (uint32_t)0U);
+ uint64_t f212 = Lib_IntVector_Intrinsics_vec128_extract64(f2, (uint32_t)0U);
+ uint64_t f312 = Lib_IntVector_Intrinsics_vec128_extract64(f3, (uint32_t)0U);
+ uint64_t f41 = Lib_IntVector_Intrinsics_vec128_extract64(f4, (uint32_t)0U);
+ uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U;
+ uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U;
+ uint64_t f10 = lo;
+ uint64_t f11 = hi;
+ uint64_t u0 = load64_le(ks);
+ uint64_t lo0 = u0;
+ uint64_t u = load64_le(ks + (uint32_t)8U);
+ uint64_t hi0 = u;
+ uint64_t f20 = lo0;
+ uint64_t f21 = hi0;
+ uint64_t r0 = f10 + f20;
+ uint64_t r1 = f11 + f21;
+ uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U;
+ uint64_t r11 = r1 + c;
+ uint64_t f30 = r0;
+ uint64_t f31 = r11;
+ store64_le(tag, f30);
+ store64_le(tag + (uint32_t)8U, f31);
+}
+
+void
+Hacl_Poly1305_128_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key)
+{
+ Lib_IntVector_Intrinsics_vec128 ctx[25U];
+ for (uint32_t _i = 0U; _i < (uint32_t)25U; ++_i)
+ ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero;
+ Hacl_Poly1305_128_poly1305_init(ctx, key);
+ Hacl_Poly1305_128_poly1305_update(ctx, len, text);
+ Hacl_Poly1305_128_poly1305_finish(tag, key, ctx);
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_128.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.h
new file mode 100644
index 0000000000..8e7cdc74dd
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.h
@@ -0,0 +1,66 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "libintvector.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include <stdbool.h>
+
+#ifndef __Hacl_Poly1305_128_H
+#define __Hacl_Poly1305_128_H
+
+#include "Hacl_Kremlib.h"
+
+void
+Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b);
+
+void
+Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(
+ Lib_IntVector_Intrinsics_vec128 *out,
+ Lib_IntVector_Intrinsics_vec128 *p);
+
+extern uint32_t Hacl_Poly1305_128_blocklen;
+
+typedef Lib_IntVector_Intrinsics_vec128 *Hacl_Poly1305_128_poly1305_ctx;
+
+void Hacl_Poly1305_128_poly1305_init(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *key);
+
+void Hacl_Poly1305_128_poly1305_update1(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *text);
+
+void
+Hacl_Poly1305_128_poly1305_update(
+ Lib_IntVector_Intrinsics_vec128 *ctx,
+ uint32_t len,
+ uint8_t *text);
+
+void
+Hacl_Poly1305_128_poly1305_finish(
+ uint8_t *tag,
+ uint8_t *key,
+ Lib_IntVector_Intrinsics_vec128 *ctx);
+
+void Hacl_Poly1305_128_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key);
+
+#define __Hacl_Poly1305_128_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_256.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.c
new file mode 100644
index 0000000000..6fddf86af7
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.c
@@ -0,0 +1,2095 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Poly1305_256.h"
+
+void
+Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b)
+{
+ Lib_IntVector_Intrinsics_vec256 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load_le(b);
+ Lib_IntVector_Intrinsics_vec256
+ hi = Lib_IntVector_Intrinsics_vec256_load_le(b + (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256 m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi);
+ Lib_IntVector_Intrinsics_vec256
+ m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi);
+ Lib_IntVector_Intrinsics_vec256
+ m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U);
+ Lib_IntVector_Intrinsics_vec256
+ m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U);
+ Lib_IntVector_Intrinsics_vec256 m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1);
+ Lib_IntVector_Intrinsics_vec256 t0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1);
+ Lib_IntVector_Intrinsics_vec256 t3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3);
+ Lib_IntVector_Intrinsics_vec256
+ t2 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)4U);
+ Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t2, mask26);
+ Lib_IntVector_Intrinsics_vec256
+ t1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t1, mask26);
+ Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec256
+ t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)30U);
+ Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask26);
+ Lib_IntVector_Intrinsics_vec256
+ o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 o0 = o5;
+ Lib_IntVector_Intrinsics_vec256 o1 = o10;
+ Lib_IntVector_Intrinsics_vec256 o2 = o20;
+ Lib_IntVector_Intrinsics_vec256 o3 = o30;
+ Lib_IntVector_Intrinsics_vec256 o4 = o40;
+ e[0U] = o0;
+ e[1U] = o1;
+ e[2U] = o2;
+ e[3U] = o3;
+ e[4U] = o4;
+ uint64_t b1 = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b1);
+ Lib_IntVector_Intrinsics_vec256 f40 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f40, mask);
+ Lib_IntVector_Intrinsics_vec256 acc0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 acc1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 acc2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 acc3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 acc4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 e0 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 e1 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 e2 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 e3 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 e4 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 r0 = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256 r1 = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256 r2 = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256 r3 = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256 r4 = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256
+ r01 =
+ Lib_IntVector_Intrinsics_vec256_insert64(r0,
+ Lib_IntVector_Intrinsics_vec256_extract64(acc0, (uint32_t)0U),
+ (uint32_t)0U);
+ Lib_IntVector_Intrinsics_vec256
+ r11 =
+ Lib_IntVector_Intrinsics_vec256_insert64(r1,
+ Lib_IntVector_Intrinsics_vec256_extract64(acc1, (uint32_t)0U),
+ (uint32_t)0U);
+ Lib_IntVector_Intrinsics_vec256
+ r21 =
+ Lib_IntVector_Intrinsics_vec256_insert64(r2,
+ Lib_IntVector_Intrinsics_vec256_extract64(acc2, (uint32_t)0U),
+ (uint32_t)0U);
+ Lib_IntVector_Intrinsics_vec256
+ r31 =
+ Lib_IntVector_Intrinsics_vec256_insert64(r3,
+ Lib_IntVector_Intrinsics_vec256_extract64(acc3, (uint32_t)0U),
+ (uint32_t)0U);
+ Lib_IntVector_Intrinsics_vec256
+ r41 =
+ Lib_IntVector_Intrinsics_vec256_insert64(r4,
+ Lib_IntVector_Intrinsics_vec256_extract64(acc4, (uint32_t)0U),
+ (uint32_t)0U);
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_add64(r01, e0);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_add64(r11, e1);
+ Lib_IntVector_Intrinsics_vec256 f2 = Lib_IntVector_Intrinsics_vec256_add64(r21, e2);
+ Lib_IntVector_Intrinsics_vec256 f3 = Lib_IntVector_Intrinsics_vec256_add64(r31, e3);
+ Lib_IntVector_Intrinsics_vec256 f4 = Lib_IntVector_Intrinsics_vec256_add64(r41, e4);
+ Lib_IntVector_Intrinsics_vec256 acc01 = f0;
+ Lib_IntVector_Intrinsics_vec256 acc11 = f1;
+ Lib_IntVector_Intrinsics_vec256 acc21 = f2;
+ Lib_IntVector_Intrinsics_vec256 acc31 = f3;
+ Lib_IntVector_Intrinsics_vec256 acc41 = f4;
+ acc[0U] = acc01;
+ acc[1U] = acc11;
+ acc[2U] = acc21;
+ acc[3U] = acc31;
+ acc[4U] = acc41;
+}
+
+void
+Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(
+ Lib_IntVector_Intrinsics_vec256 *out,
+ Lib_IntVector_Intrinsics_vec256 *p)
+{
+ Lib_IntVector_Intrinsics_vec256 *r = p;
+ Lib_IntVector_Intrinsics_vec256 *r_5 = p + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *r4 = p + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec256 a0 = out[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = out[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = out[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = out[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = out[4U];
+ Lib_IntVector_Intrinsics_vec256 r10 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 r11 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 r12 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 r13 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 r14 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 r151 = r_5[1U];
+ Lib_IntVector_Intrinsics_vec256 r152 = r_5[2U];
+ Lib_IntVector_Intrinsics_vec256 r153 = r_5[3U];
+ Lib_IntVector_Intrinsics_vec256 r154 = r_5[4U];
+ Lib_IntVector_Intrinsics_vec256 r40 = r4[0U];
+ Lib_IntVector_Intrinsics_vec256 r41 = r4[1U];
+ Lib_IntVector_Intrinsics_vec256 r42 = r4[2U];
+ Lib_IntVector_Intrinsics_vec256 r43 = r4[3U];
+ Lib_IntVector_Intrinsics_vec256 r44 = r4[4U];
+ Lib_IntVector_Intrinsics_vec256 a010 = Lib_IntVector_Intrinsics_vec256_mul64(r10, r10);
+ Lib_IntVector_Intrinsics_vec256 a110 = Lib_IntVector_Intrinsics_vec256_mul64(r11, r10);
+ Lib_IntVector_Intrinsics_vec256 a210 = Lib_IntVector_Intrinsics_vec256_mul64(r12, r10);
+ Lib_IntVector_Intrinsics_vec256 a310 = Lib_IntVector_Intrinsics_vec256_mul64(r13, r10);
+ Lib_IntVector_Intrinsics_vec256 a410 = Lib_IntVector_Intrinsics_vec256_mul64(r14, r10);
+ Lib_IntVector_Intrinsics_vec256
+ a020 =
+ Lib_IntVector_Intrinsics_vec256_add64(a010,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r11));
+ Lib_IntVector_Intrinsics_vec256
+ a120 =
+ Lib_IntVector_Intrinsics_vec256_add64(a110,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r11));
+ Lib_IntVector_Intrinsics_vec256
+ a220 =
+ Lib_IntVector_Intrinsics_vec256_add64(a210,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, r11));
+ Lib_IntVector_Intrinsics_vec256
+ a320 =
+ Lib_IntVector_Intrinsics_vec256_add64(a310,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12, r11));
+ Lib_IntVector_Intrinsics_vec256
+ a420 =
+ Lib_IntVector_Intrinsics_vec256_add64(a410,
+ Lib_IntVector_Intrinsics_vec256_mul64(r13, r11));
+ Lib_IntVector_Intrinsics_vec256
+ a030 =
+ Lib_IntVector_Intrinsics_vec256_add64(a020,
+ Lib_IntVector_Intrinsics_vec256_mul64(r153, r12));
+ Lib_IntVector_Intrinsics_vec256
+ a130 =
+ Lib_IntVector_Intrinsics_vec256_add64(a120,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r12));
+ Lib_IntVector_Intrinsics_vec256
+ a230 =
+ Lib_IntVector_Intrinsics_vec256_add64(a220,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r12));
+ Lib_IntVector_Intrinsics_vec256
+ a330 =
+ Lib_IntVector_Intrinsics_vec256_add64(a320,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, r12));
+ Lib_IntVector_Intrinsics_vec256
+ a430 =
+ Lib_IntVector_Intrinsics_vec256_add64(a420,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12, r12));
+ Lib_IntVector_Intrinsics_vec256
+ a040 =
+ Lib_IntVector_Intrinsics_vec256_add64(a030,
+ Lib_IntVector_Intrinsics_vec256_mul64(r152, r13));
+ Lib_IntVector_Intrinsics_vec256
+ a140 =
+ Lib_IntVector_Intrinsics_vec256_add64(a130,
+ Lib_IntVector_Intrinsics_vec256_mul64(r153, r13));
+ Lib_IntVector_Intrinsics_vec256
+ a240 =
+ Lib_IntVector_Intrinsics_vec256_add64(a230,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r13));
+ Lib_IntVector_Intrinsics_vec256
+ a340 =
+ Lib_IntVector_Intrinsics_vec256_add64(a330,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r13));
+ Lib_IntVector_Intrinsics_vec256
+ a440 =
+ Lib_IntVector_Intrinsics_vec256_add64(a430,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, r13));
+ Lib_IntVector_Intrinsics_vec256
+ a050 =
+ Lib_IntVector_Intrinsics_vec256_add64(a040,
+ Lib_IntVector_Intrinsics_vec256_mul64(r151, r14));
+ Lib_IntVector_Intrinsics_vec256
+ a150 =
+ Lib_IntVector_Intrinsics_vec256_add64(a140,
+ Lib_IntVector_Intrinsics_vec256_mul64(r152, r14));
+ Lib_IntVector_Intrinsics_vec256
+ a250 =
+ Lib_IntVector_Intrinsics_vec256_add64(a240,
+ Lib_IntVector_Intrinsics_vec256_mul64(r153, r14));
+ Lib_IntVector_Intrinsics_vec256
+ a350 =
+ Lib_IntVector_Intrinsics_vec256_add64(a340,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r14));
+ Lib_IntVector_Intrinsics_vec256
+ a450 =
+ Lib_IntVector_Intrinsics_vec256_add64(a440,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r14));
+ Lib_IntVector_Intrinsics_vec256 t00 = a050;
+ Lib_IntVector_Intrinsics_vec256 t10 = a150;
+ Lib_IntVector_Intrinsics_vec256 t20 = a250;
+ Lib_IntVector_Intrinsics_vec256 t30 = a350;
+ Lib_IntVector_Intrinsics_vec256 t40 = a450;
+ Lib_IntVector_Intrinsics_vec256
+ mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z00 = Lib_IntVector_Intrinsics_vec256_shift_right64(t00, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x00 = Lib_IntVector_Intrinsics_vec256_and(t00, mask260);
+ Lib_IntVector_Intrinsics_vec256 x30 = Lib_IntVector_Intrinsics_vec256_and(t30, mask260);
+ Lib_IntVector_Intrinsics_vec256 x10 = Lib_IntVector_Intrinsics_vec256_add64(t10, z00);
+ Lib_IntVector_Intrinsics_vec256 x40 = Lib_IntVector_Intrinsics_vec256_add64(t40, z10);
+ Lib_IntVector_Intrinsics_vec256
+ z010 = Lib_IntVector_Intrinsics_vec256_shift_right64(x10, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z110 = Lib_IntVector_Intrinsics_vec256_shift_right64(x40, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t5 = Lib_IntVector_Intrinsics_vec256_shift_left64(z110, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z110, t5);
+ Lib_IntVector_Intrinsics_vec256 x110 = Lib_IntVector_Intrinsics_vec256_and(x10, mask260);
+ Lib_IntVector_Intrinsics_vec256 x410 = Lib_IntVector_Intrinsics_vec256_and(x40, mask260);
+ Lib_IntVector_Intrinsics_vec256 x20 = Lib_IntVector_Intrinsics_vec256_add64(t20, z010);
+ Lib_IntVector_Intrinsics_vec256 x010 = Lib_IntVector_Intrinsics_vec256_add64(x00, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z020 = Lib_IntVector_Intrinsics_vec256_shift_right64(x20, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z130 = Lib_IntVector_Intrinsics_vec256_shift_right64(x010, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x210 = Lib_IntVector_Intrinsics_vec256_and(x20, mask260);
+ Lib_IntVector_Intrinsics_vec256 x020 = Lib_IntVector_Intrinsics_vec256_and(x010, mask260);
+ Lib_IntVector_Intrinsics_vec256 x310 = Lib_IntVector_Intrinsics_vec256_add64(x30, z020);
+ Lib_IntVector_Intrinsics_vec256 x120 = Lib_IntVector_Intrinsics_vec256_add64(x110, z130);
+ Lib_IntVector_Intrinsics_vec256
+ z030 = Lib_IntVector_Intrinsics_vec256_shift_right64(x310, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x320 = Lib_IntVector_Intrinsics_vec256_and(x310, mask260);
+ Lib_IntVector_Intrinsics_vec256 x420 = Lib_IntVector_Intrinsics_vec256_add64(x410, z030);
+ Lib_IntVector_Intrinsics_vec256 r20 = x020;
+ Lib_IntVector_Intrinsics_vec256 r21 = x120;
+ Lib_IntVector_Intrinsics_vec256 r22 = x210;
+ Lib_IntVector_Intrinsics_vec256 r23 = x320;
+ Lib_IntVector_Intrinsics_vec256 r24 = x420;
+ Lib_IntVector_Intrinsics_vec256 a011 = Lib_IntVector_Intrinsics_vec256_mul64(r10, r20);
+ Lib_IntVector_Intrinsics_vec256 a111 = Lib_IntVector_Intrinsics_vec256_mul64(r11, r20);
+ Lib_IntVector_Intrinsics_vec256 a211 = Lib_IntVector_Intrinsics_vec256_mul64(r12, r20);
+ Lib_IntVector_Intrinsics_vec256 a311 = Lib_IntVector_Intrinsics_vec256_mul64(r13, r20);
+ Lib_IntVector_Intrinsics_vec256 a411 = Lib_IntVector_Intrinsics_vec256_mul64(r14, r20);
+ Lib_IntVector_Intrinsics_vec256
+ a021 =
+ Lib_IntVector_Intrinsics_vec256_add64(a011,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r21));
+ Lib_IntVector_Intrinsics_vec256
+ a121 =
+ Lib_IntVector_Intrinsics_vec256_add64(a111,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r21));
+ Lib_IntVector_Intrinsics_vec256
+ a221 =
+ Lib_IntVector_Intrinsics_vec256_add64(a211,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, r21));
+ Lib_IntVector_Intrinsics_vec256
+ a321 =
+ Lib_IntVector_Intrinsics_vec256_add64(a311,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12, r21));
+ Lib_IntVector_Intrinsics_vec256
+ a421 =
+ Lib_IntVector_Intrinsics_vec256_add64(a411,
+ Lib_IntVector_Intrinsics_vec256_mul64(r13, r21));
+ Lib_IntVector_Intrinsics_vec256
+ a031 =
+ Lib_IntVector_Intrinsics_vec256_add64(a021,
+ Lib_IntVector_Intrinsics_vec256_mul64(r153, r22));
+ Lib_IntVector_Intrinsics_vec256
+ a131 =
+ Lib_IntVector_Intrinsics_vec256_add64(a121,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r22));
+ Lib_IntVector_Intrinsics_vec256
+ a231 =
+ Lib_IntVector_Intrinsics_vec256_add64(a221,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r22));
+ Lib_IntVector_Intrinsics_vec256
+ a331 =
+ Lib_IntVector_Intrinsics_vec256_add64(a321,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, r22));
+ Lib_IntVector_Intrinsics_vec256
+ a431 =
+ Lib_IntVector_Intrinsics_vec256_add64(a421,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12, r22));
+ Lib_IntVector_Intrinsics_vec256
+ a041 =
+ Lib_IntVector_Intrinsics_vec256_add64(a031,
+ Lib_IntVector_Intrinsics_vec256_mul64(r152, r23));
+ Lib_IntVector_Intrinsics_vec256
+ a141 =
+ Lib_IntVector_Intrinsics_vec256_add64(a131,
+ Lib_IntVector_Intrinsics_vec256_mul64(r153, r23));
+ Lib_IntVector_Intrinsics_vec256
+ a241 =
+ Lib_IntVector_Intrinsics_vec256_add64(a231,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r23));
+ Lib_IntVector_Intrinsics_vec256
+ a341 =
+ Lib_IntVector_Intrinsics_vec256_add64(a331,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r23));
+ Lib_IntVector_Intrinsics_vec256
+ a441 =
+ Lib_IntVector_Intrinsics_vec256_add64(a431,
+ Lib_IntVector_Intrinsics_vec256_mul64(r11, r23));
+ Lib_IntVector_Intrinsics_vec256
+ a051 =
+ Lib_IntVector_Intrinsics_vec256_add64(a041,
+ Lib_IntVector_Intrinsics_vec256_mul64(r151, r24));
+ Lib_IntVector_Intrinsics_vec256
+ a151 =
+ Lib_IntVector_Intrinsics_vec256_add64(a141,
+ Lib_IntVector_Intrinsics_vec256_mul64(r152, r24));
+ Lib_IntVector_Intrinsics_vec256
+ a251 =
+ Lib_IntVector_Intrinsics_vec256_add64(a241,
+ Lib_IntVector_Intrinsics_vec256_mul64(r153, r24));
+ Lib_IntVector_Intrinsics_vec256
+ a351 =
+ Lib_IntVector_Intrinsics_vec256_add64(a341,
+ Lib_IntVector_Intrinsics_vec256_mul64(r154, r24));
+ Lib_IntVector_Intrinsics_vec256
+ a451 =
+ Lib_IntVector_Intrinsics_vec256_add64(a441,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, r24));
+ Lib_IntVector_Intrinsics_vec256 t01 = a051;
+ Lib_IntVector_Intrinsics_vec256 t11 = a151;
+ Lib_IntVector_Intrinsics_vec256 t21 = a251;
+ Lib_IntVector_Intrinsics_vec256 t31 = a351;
+ Lib_IntVector_Intrinsics_vec256 t41 = a451;
+ Lib_IntVector_Intrinsics_vec256
+ mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z04 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z14 = Lib_IntVector_Intrinsics_vec256_shift_right64(t31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x03 = Lib_IntVector_Intrinsics_vec256_and(t01, mask261);
+ Lib_IntVector_Intrinsics_vec256 x33 = Lib_IntVector_Intrinsics_vec256_and(t31, mask261);
+ Lib_IntVector_Intrinsics_vec256 x13 = Lib_IntVector_Intrinsics_vec256_add64(t11, z04);
+ Lib_IntVector_Intrinsics_vec256 x43 = Lib_IntVector_Intrinsics_vec256_add64(t41, z14);
+ Lib_IntVector_Intrinsics_vec256
+ z011 = Lib_IntVector_Intrinsics_vec256_shift_right64(x13, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z111 = Lib_IntVector_Intrinsics_vec256_shift_right64(x43, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t6 = Lib_IntVector_Intrinsics_vec256_shift_left64(z111, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z120 = Lib_IntVector_Intrinsics_vec256_add64(z111, t6);
+ Lib_IntVector_Intrinsics_vec256 x111 = Lib_IntVector_Intrinsics_vec256_and(x13, mask261);
+ Lib_IntVector_Intrinsics_vec256 x411 = Lib_IntVector_Intrinsics_vec256_and(x43, mask261);
+ Lib_IntVector_Intrinsics_vec256 x22 = Lib_IntVector_Intrinsics_vec256_add64(t21, z011);
+ Lib_IntVector_Intrinsics_vec256 x011 = Lib_IntVector_Intrinsics_vec256_add64(x03, z120);
+ Lib_IntVector_Intrinsics_vec256
+ z021 = Lib_IntVector_Intrinsics_vec256_shift_right64(x22, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z131 = Lib_IntVector_Intrinsics_vec256_shift_right64(x011, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x211 = Lib_IntVector_Intrinsics_vec256_and(x22, mask261);
+ Lib_IntVector_Intrinsics_vec256 x021 = Lib_IntVector_Intrinsics_vec256_and(x011, mask261);
+ Lib_IntVector_Intrinsics_vec256 x311 = Lib_IntVector_Intrinsics_vec256_add64(x33, z021);
+ Lib_IntVector_Intrinsics_vec256 x121 = Lib_IntVector_Intrinsics_vec256_add64(x111, z131);
+ Lib_IntVector_Intrinsics_vec256
+ z031 = Lib_IntVector_Intrinsics_vec256_shift_right64(x311, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x321 = Lib_IntVector_Intrinsics_vec256_and(x311, mask261);
+ Lib_IntVector_Intrinsics_vec256 x421 = Lib_IntVector_Intrinsics_vec256_add64(x411, z031);
+ Lib_IntVector_Intrinsics_vec256 r30 = x021;
+ Lib_IntVector_Intrinsics_vec256 r31 = x121;
+ Lib_IntVector_Intrinsics_vec256 r32 = x211;
+ Lib_IntVector_Intrinsics_vec256 r33 = x321;
+ Lib_IntVector_Intrinsics_vec256 r34 = x421;
+ Lib_IntVector_Intrinsics_vec256
+ v12120 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r20, r10);
+ Lib_IntVector_Intrinsics_vec256
+ v34340 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r40, r30);
+ Lib_IntVector_Intrinsics_vec256
+ r12340 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34340, v12120);
+ Lib_IntVector_Intrinsics_vec256
+ v12121 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r21, r11);
+ Lib_IntVector_Intrinsics_vec256
+ v34341 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r41, r31);
+ Lib_IntVector_Intrinsics_vec256
+ r12341 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34341, v12121);
+ Lib_IntVector_Intrinsics_vec256
+ v12122 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r22, r12);
+ Lib_IntVector_Intrinsics_vec256
+ v34342 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r42, r32);
+ Lib_IntVector_Intrinsics_vec256
+ r12342 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34342, v12122);
+ Lib_IntVector_Intrinsics_vec256
+ v12123 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r23, r13);
+ Lib_IntVector_Intrinsics_vec256
+ v34343 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r43, r33);
+ Lib_IntVector_Intrinsics_vec256
+ r12343 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34343, v12123);
+ Lib_IntVector_Intrinsics_vec256
+ v12124 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r24, r14);
+ Lib_IntVector_Intrinsics_vec256
+ v34344 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r44, r34);
+ Lib_IntVector_Intrinsics_vec256
+ r12344 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34344, v12124);
+ Lib_IntVector_Intrinsics_vec256
+ r123451 = Lib_IntVector_Intrinsics_vec256_smul64(r12341, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec256
+ r123452 = Lib_IntVector_Intrinsics_vec256_smul64(r12342, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec256
+ r123453 = Lib_IntVector_Intrinsics_vec256_smul64(r12343, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec256
+ r123454 = Lib_IntVector_Intrinsics_vec256_smul64(r12344, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_mul64(r12340, a0);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_mul64(r12341, a0);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_mul64(r12342, a0);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_mul64(r12343, a0);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_mul64(r12344, a0);
+ Lib_IntVector_Intrinsics_vec256
+ a02 =
+ Lib_IntVector_Intrinsics_vec256_add64(a01,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123454, a1));
+ Lib_IntVector_Intrinsics_vec256
+ a12 =
+ Lib_IntVector_Intrinsics_vec256_add64(a11,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12340, a1));
+ Lib_IntVector_Intrinsics_vec256
+ a22 =
+ Lib_IntVector_Intrinsics_vec256_add64(a21,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12341, a1));
+ Lib_IntVector_Intrinsics_vec256
+ a32 =
+ Lib_IntVector_Intrinsics_vec256_add64(a31,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12342, a1));
+ Lib_IntVector_Intrinsics_vec256
+ a42 =
+ Lib_IntVector_Intrinsics_vec256_add64(a41,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12343, a1));
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123453, a2));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123454, a2));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12340, a2));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12341, a2));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12342, a2));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123452, a3));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123453, a3));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123454, a3));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12340, a3));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12341, a3));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123451, a4));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123452, a4));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123453, a4));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r123454, a4));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r12340, a4));
+ Lib_IntVector_Intrinsics_vec256 t0 = a05;
+ Lib_IntVector_Intrinsics_vec256 t1 = a15;
+ Lib_IntVector_Intrinsics_vec256 t2 = a25;
+ Lib_IntVector_Intrinsics_vec256 t3 = a35;
+ Lib_IntVector_Intrinsics_vec256 t4 = a45;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z121 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z121);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o10 = x12;
+ Lib_IntVector_Intrinsics_vec256 o20 = x21;
+ Lib_IntVector_Intrinsics_vec256 o30 = x32;
+ Lib_IntVector_Intrinsics_vec256 o40 = x42;
+ Lib_IntVector_Intrinsics_vec256
+ v00 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o0, o0);
+ Lib_IntVector_Intrinsics_vec256 v10 = Lib_IntVector_Intrinsics_vec256_add64(o0, v00);
+ Lib_IntVector_Intrinsics_vec256
+ v10h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v10, v10);
+ Lib_IntVector_Intrinsics_vec256 v20 = Lib_IntVector_Intrinsics_vec256_add64(v10, v10h);
+ Lib_IntVector_Intrinsics_vec256
+ v01 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o10, o10);
+ Lib_IntVector_Intrinsics_vec256 v11 = Lib_IntVector_Intrinsics_vec256_add64(o10, v01);
+ Lib_IntVector_Intrinsics_vec256
+ v11h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v11, v11);
+ Lib_IntVector_Intrinsics_vec256 v21 = Lib_IntVector_Intrinsics_vec256_add64(v11, v11h);
+ Lib_IntVector_Intrinsics_vec256
+ v02 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o20, o20);
+ Lib_IntVector_Intrinsics_vec256 v12 = Lib_IntVector_Intrinsics_vec256_add64(o20, v02);
+ Lib_IntVector_Intrinsics_vec256
+ v12h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v12, v12);
+ Lib_IntVector_Intrinsics_vec256 v22 = Lib_IntVector_Intrinsics_vec256_add64(v12, v12h);
+ Lib_IntVector_Intrinsics_vec256
+ v03 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o30, o30);
+ Lib_IntVector_Intrinsics_vec256 v13 = Lib_IntVector_Intrinsics_vec256_add64(o30, v03);
+ Lib_IntVector_Intrinsics_vec256
+ v13h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v13, v13);
+ Lib_IntVector_Intrinsics_vec256 v23 = Lib_IntVector_Intrinsics_vec256_add64(v13, v13h);
+ Lib_IntVector_Intrinsics_vec256
+ v04 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o40, o40);
+ Lib_IntVector_Intrinsics_vec256 v14 = Lib_IntVector_Intrinsics_vec256_add64(o40, v04);
+ Lib_IntVector_Intrinsics_vec256
+ v14h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v14, v14);
+ Lib_IntVector_Intrinsics_vec256 v24 = Lib_IntVector_Intrinsics_vec256_add64(v14, v14h);
+ Lib_IntVector_Intrinsics_vec256
+ l = Lib_IntVector_Intrinsics_vec256_add64(v20, Lib_IntVector_Intrinsics_vec256_zero);
+ Lib_IntVector_Intrinsics_vec256
+ tmp0 =
+ Lib_IntVector_Intrinsics_vec256_and(l,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c0 = Lib_IntVector_Intrinsics_vec256_shift_right64(l, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l0 = Lib_IntVector_Intrinsics_vec256_add64(v21, c0);
+ Lib_IntVector_Intrinsics_vec256
+ tmp1 =
+ Lib_IntVector_Intrinsics_vec256_and(l0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c1 = Lib_IntVector_Intrinsics_vec256_shift_right64(l0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l1 = Lib_IntVector_Intrinsics_vec256_add64(v22, c1);
+ Lib_IntVector_Intrinsics_vec256
+ tmp2 =
+ Lib_IntVector_Intrinsics_vec256_and(l1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c2 = Lib_IntVector_Intrinsics_vec256_shift_right64(l1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l2 = Lib_IntVector_Intrinsics_vec256_add64(v23, c2);
+ Lib_IntVector_Intrinsics_vec256
+ tmp3 =
+ Lib_IntVector_Intrinsics_vec256_and(l2,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c3 = Lib_IntVector_Intrinsics_vec256_shift_right64(l2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l3 = Lib_IntVector_Intrinsics_vec256_add64(v24, c3);
+ Lib_IntVector_Intrinsics_vec256
+ tmp4 =
+ Lib_IntVector_Intrinsics_vec256_and(l3,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c4 = Lib_IntVector_Intrinsics_vec256_shift_right64(l3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ o00 =
+ Lib_IntVector_Intrinsics_vec256_add64(tmp0,
+ Lib_IntVector_Intrinsics_vec256_smul64(c4, (uint64_t)5U));
+ Lib_IntVector_Intrinsics_vec256 o1 = tmp1;
+ Lib_IntVector_Intrinsics_vec256 o2 = tmp2;
+ Lib_IntVector_Intrinsics_vec256 o3 = tmp3;
+ Lib_IntVector_Intrinsics_vec256 o4 = tmp4;
+ out[0U] = o00;
+ out[1U] = o1;
+ out[2U] = o2;
+ out[3U] = o3;
+ out[4U] = o4;
+}
+
+uint32_t Hacl_Poly1305_256_blocklen = (uint32_t)16U;
+
+void
+Hacl_Poly1305_256_poly1305_init(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *key)
+{
+ Lib_IntVector_Intrinsics_vec256 *acc = ctx;
+ Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
+ uint8_t *kr = key;
+ acc[0U] = Lib_IntVector_Intrinsics_vec256_zero;
+ acc[1U] = Lib_IntVector_Intrinsics_vec256_zero;
+ acc[2U] = Lib_IntVector_Intrinsics_vec256_zero;
+ acc[3U] = Lib_IntVector_Intrinsics_vec256_zero;
+ acc[4U] = Lib_IntVector_Intrinsics_vec256_zero;
+ uint64_t u0 = load64_le(kr);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(kr + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU;
+ uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU;
+ uint64_t lo1 = lo & mask0;
+ uint64_t hi1 = hi & mask1;
+ Lib_IntVector_Intrinsics_vec256 *r = pre;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *rn = pre + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec256 *rn_5 = pre + (uint32_t)15U;
+ Lib_IntVector_Intrinsics_vec256 r_vec0 = Lib_IntVector_Intrinsics_vec256_load64(lo1);
+ Lib_IntVector_Intrinsics_vec256 r_vec1 = Lib_IntVector_Intrinsics_vec256_load64(hi1);
+ Lib_IntVector_Intrinsics_vec256
+ f00 =
+ Lib_IntVector_Intrinsics_vec256_and(r_vec0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f15 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(r_vec1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f0 = f00;
+ Lib_IntVector_Intrinsics_vec256 f1 = f15;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f4 = f40;
+ r[0U] = f0;
+ r[1U] = f1;
+ r[2U] = f2;
+ r[3U] = f3;
+ r[4U] = f4;
+ Lib_IntVector_Intrinsics_vec256 f200 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 f210 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 f220 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 f230 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 f240 = r[4U];
+ r5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f200, (uint64_t)5U);
+ r5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f210, (uint64_t)5U);
+ r5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f220, (uint64_t)5U);
+ r5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f230, (uint64_t)5U);
+ r5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f240, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 r10 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 r20 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 r30 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 r40 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 r510 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r520 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r530 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r540 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f100 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 f110 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 f120 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 f130 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 f140 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 a00 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f100);
+ Lib_IntVector_Intrinsics_vec256 a10 = Lib_IntVector_Intrinsics_vec256_mul64(r10, f100);
+ Lib_IntVector_Intrinsics_vec256 a20 = Lib_IntVector_Intrinsics_vec256_mul64(r20, f100);
+ Lib_IntVector_Intrinsics_vec256 a30 = Lib_IntVector_Intrinsics_vec256_mul64(r30, f100);
+ Lib_IntVector_Intrinsics_vec256 a40 = Lib_IntVector_Intrinsics_vec256_mul64(r40, f100);
+ Lib_IntVector_Intrinsics_vec256
+ a010 =
+ Lib_IntVector_Intrinsics_vec256_add64(a00,
+ Lib_IntVector_Intrinsics_vec256_mul64(r540, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a110 =
+ Lib_IntVector_Intrinsics_vec256_add64(a10,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a210 =
+ Lib_IntVector_Intrinsics_vec256_add64(a20,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a310 =
+ Lib_IntVector_Intrinsics_vec256_add64(a30,
+ Lib_IntVector_Intrinsics_vec256_mul64(r20, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a410 =
+ Lib_IntVector_Intrinsics_vec256_add64(a40,
+ Lib_IntVector_Intrinsics_vec256_mul64(r30, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a020 =
+ Lib_IntVector_Intrinsics_vec256_add64(a010,
+ Lib_IntVector_Intrinsics_vec256_mul64(r530, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a120 =
+ Lib_IntVector_Intrinsics_vec256_add64(a110,
+ Lib_IntVector_Intrinsics_vec256_mul64(r540, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a220 =
+ Lib_IntVector_Intrinsics_vec256_add64(a210,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a320 =
+ Lib_IntVector_Intrinsics_vec256_add64(a310,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a420 =
+ Lib_IntVector_Intrinsics_vec256_add64(a410,
+ Lib_IntVector_Intrinsics_vec256_mul64(r20, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a030 =
+ Lib_IntVector_Intrinsics_vec256_add64(a020,
+ Lib_IntVector_Intrinsics_vec256_mul64(r520, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a130 =
+ Lib_IntVector_Intrinsics_vec256_add64(a120,
+ Lib_IntVector_Intrinsics_vec256_mul64(r530, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a230 =
+ Lib_IntVector_Intrinsics_vec256_add64(a220,
+ Lib_IntVector_Intrinsics_vec256_mul64(r540, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a330 =
+ Lib_IntVector_Intrinsics_vec256_add64(a320,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a430 =
+ Lib_IntVector_Intrinsics_vec256_add64(a420,
+ Lib_IntVector_Intrinsics_vec256_mul64(r10, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a040 =
+ Lib_IntVector_Intrinsics_vec256_add64(a030,
+ Lib_IntVector_Intrinsics_vec256_mul64(r510, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a140 =
+ Lib_IntVector_Intrinsics_vec256_add64(a130,
+ Lib_IntVector_Intrinsics_vec256_mul64(r520, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a240 =
+ Lib_IntVector_Intrinsics_vec256_add64(a230,
+ Lib_IntVector_Intrinsics_vec256_mul64(r530, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a340 =
+ Lib_IntVector_Intrinsics_vec256_add64(a330,
+ Lib_IntVector_Intrinsics_vec256_mul64(r540, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a440 =
+ Lib_IntVector_Intrinsics_vec256_add64(a430,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f140));
+ Lib_IntVector_Intrinsics_vec256 t00 = a040;
+ Lib_IntVector_Intrinsics_vec256 t10 = a140;
+ Lib_IntVector_Intrinsics_vec256 t20 = a240;
+ Lib_IntVector_Intrinsics_vec256 t30 = a340;
+ Lib_IntVector_Intrinsics_vec256 t40 = a440;
+ Lib_IntVector_Intrinsics_vec256
+ mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z00 = Lib_IntVector_Intrinsics_vec256_shift_right64(t00, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x00 = Lib_IntVector_Intrinsics_vec256_and(t00, mask260);
+ Lib_IntVector_Intrinsics_vec256 x30 = Lib_IntVector_Intrinsics_vec256_and(t30, mask260);
+ Lib_IntVector_Intrinsics_vec256 x10 = Lib_IntVector_Intrinsics_vec256_add64(t10, z00);
+ Lib_IntVector_Intrinsics_vec256 x40 = Lib_IntVector_Intrinsics_vec256_add64(t40, z10);
+ Lib_IntVector_Intrinsics_vec256
+ z010 = Lib_IntVector_Intrinsics_vec256_shift_right64(x10, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z110 = Lib_IntVector_Intrinsics_vec256_shift_right64(x40, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t5 = Lib_IntVector_Intrinsics_vec256_shift_left64(z110, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z110, t5);
+ Lib_IntVector_Intrinsics_vec256 x110 = Lib_IntVector_Intrinsics_vec256_and(x10, mask260);
+ Lib_IntVector_Intrinsics_vec256 x410 = Lib_IntVector_Intrinsics_vec256_and(x40, mask260);
+ Lib_IntVector_Intrinsics_vec256 x20 = Lib_IntVector_Intrinsics_vec256_add64(t20, z010);
+ Lib_IntVector_Intrinsics_vec256 x010 = Lib_IntVector_Intrinsics_vec256_add64(x00, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z020 = Lib_IntVector_Intrinsics_vec256_shift_right64(x20, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z130 = Lib_IntVector_Intrinsics_vec256_shift_right64(x010, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x210 = Lib_IntVector_Intrinsics_vec256_and(x20, mask260);
+ Lib_IntVector_Intrinsics_vec256 x020 = Lib_IntVector_Intrinsics_vec256_and(x010, mask260);
+ Lib_IntVector_Intrinsics_vec256 x310 = Lib_IntVector_Intrinsics_vec256_add64(x30, z020);
+ Lib_IntVector_Intrinsics_vec256 x120 = Lib_IntVector_Intrinsics_vec256_add64(x110, z130);
+ Lib_IntVector_Intrinsics_vec256
+ z030 = Lib_IntVector_Intrinsics_vec256_shift_right64(x310, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x320 = Lib_IntVector_Intrinsics_vec256_and(x310, mask260);
+ Lib_IntVector_Intrinsics_vec256 x420 = Lib_IntVector_Intrinsics_vec256_add64(x410, z030);
+ Lib_IntVector_Intrinsics_vec256 o00 = x020;
+ Lib_IntVector_Intrinsics_vec256 o10 = x120;
+ Lib_IntVector_Intrinsics_vec256 o20 = x210;
+ Lib_IntVector_Intrinsics_vec256 o30 = x320;
+ Lib_IntVector_Intrinsics_vec256 o40 = x420;
+ rn[0U] = o00;
+ rn[1U] = o10;
+ rn[2U] = o20;
+ rn[3U] = o30;
+ rn[4U] = o40;
+ Lib_IntVector_Intrinsics_vec256 f201 = rn[0U];
+ Lib_IntVector_Intrinsics_vec256 f211 = rn[1U];
+ Lib_IntVector_Intrinsics_vec256 f221 = rn[2U];
+ Lib_IntVector_Intrinsics_vec256 f231 = rn[3U];
+ Lib_IntVector_Intrinsics_vec256 f241 = rn[4U];
+ rn_5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f201, (uint64_t)5U);
+ rn_5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f211, (uint64_t)5U);
+ rn_5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f221, (uint64_t)5U);
+ rn_5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f231, (uint64_t)5U);
+ rn_5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f241, (uint64_t)5U);
+ Lib_IntVector_Intrinsics_vec256 r00 = rn[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = rn[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = rn[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = rn[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = rn[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = rn_5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = rn_5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = rn_5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = rn_5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = rn[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = rn[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = rn[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = rn[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = rn[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r00, f10);
+ Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10);
+ Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10);
+ Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10);
+ Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10);
+ Lib_IntVector_Intrinsics_vec256
+ a01 =
+ Lib_IntVector_Intrinsics_vec256_add64(a0,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f11));
+ Lib_IntVector_Intrinsics_vec256
+ a11 =
+ Lib_IntVector_Intrinsics_vec256_add64(a1,
+ Lib_IntVector_Intrinsics_vec256_mul64(r00, f11));
+ Lib_IntVector_Intrinsics_vec256
+ a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, Lib_IntVector_Intrinsics_vec256_mul64(r1, f11));
+ Lib_IntVector_Intrinsics_vec256
+ a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, Lib_IntVector_Intrinsics_vec256_mul64(r2, f11));
+ Lib_IntVector_Intrinsics_vec256
+ a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, Lib_IntVector_Intrinsics_vec256_mul64(r3, f11));
+ Lib_IntVector_Intrinsics_vec256
+ a02 =
+ Lib_IntVector_Intrinsics_vec256_add64(a01,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f12));
+ Lib_IntVector_Intrinsics_vec256
+ a12 =
+ Lib_IntVector_Intrinsics_vec256_add64(a11,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f12));
+ Lib_IntVector_Intrinsics_vec256
+ a22 =
+ Lib_IntVector_Intrinsics_vec256_add64(a21,
+ Lib_IntVector_Intrinsics_vec256_mul64(r00, f12));
+ Lib_IntVector_Intrinsics_vec256
+ a32 =
+ Lib_IntVector_Intrinsics_vec256_add64(a31,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f12));
+ Lib_IntVector_Intrinsics_vec256
+ a42 =
+ Lib_IntVector_Intrinsics_vec256_add64(a41,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, f12));
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, f13));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f13));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f13));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r00, f13));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f13));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, f14));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, f14));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f14));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f14));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r00, f14));
+ Lib_IntVector_Intrinsics_vec256 t0 = a04;
+ Lib_IntVector_Intrinsics_vec256 t1 = a14;
+ Lib_IntVector_Intrinsics_vec256 t2 = a24;
+ Lib_IntVector_Intrinsics_vec256 t3 = a34;
+ Lib_IntVector_Intrinsics_vec256 t4 = a44;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z120 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z120);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ rn[0U] = o0;
+ rn[1U] = o1;
+ rn[2U] = o2;
+ rn[3U] = o3;
+ rn[4U] = o4;
+ Lib_IntVector_Intrinsics_vec256 f202 = rn[0U];
+ Lib_IntVector_Intrinsics_vec256 f21 = rn[1U];
+ Lib_IntVector_Intrinsics_vec256 f22 = rn[2U];
+ Lib_IntVector_Intrinsics_vec256 f23 = rn[3U];
+ Lib_IntVector_Intrinsics_vec256 f24 = rn[4U];
+ rn_5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f202, (uint64_t)5U);
+ rn_5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f21, (uint64_t)5U);
+ rn_5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f22, (uint64_t)5U);
+ rn_5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f23, (uint64_t)5U);
+ rn_5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f24, (uint64_t)5U);
+}
+
+void
+Hacl_Poly1305_256_poly1305_update1(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *text)
+{
+ Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *acc = ctx;
+ Lib_IntVector_Intrinsics_vec256 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ uint64_t u0 = load64_le(text);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(text + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *r = pre;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t0 = a06;
+ Lib_IntVector_Intrinsics_vec256 t1 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+}
+
+void
+Hacl_Poly1305_256_poly1305_update(
+ Lib_IntVector_Intrinsics_vec256 *ctx,
+ uint32_t len,
+ uint8_t *text)
+{
+ Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 *acc = ctx;
+ uint32_t sz_block = (uint32_t)64U;
+ uint32_t len0 = len / sz_block * sz_block;
+ uint8_t *t0 = text;
+ if (len0 > (uint32_t)0U) {
+ uint32_t bs = (uint32_t)64U;
+ uint8_t *text0 = t0;
+ Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc, text0);
+ uint32_t len1 = len0 - bs;
+ uint8_t *text1 = t0 + bs;
+ uint32_t nb = len1 / bs;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = text1 + i * bs;
+ Lib_IntVector_Intrinsics_vec256 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load_le(block);
+ Lib_IntVector_Intrinsics_vec256
+ hi = Lib_IntVector_Intrinsics_vec256_load_le(block + (uint32_t)32U);
+ Lib_IntVector_Intrinsics_vec256
+ mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi);
+ Lib_IntVector_Intrinsics_vec256
+ m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi);
+ Lib_IntVector_Intrinsics_vec256
+ m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U);
+ Lib_IntVector_Intrinsics_vec256
+ m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U);
+ Lib_IntVector_Intrinsics_vec256
+ m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1);
+ Lib_IntVector_Intrinsics_vec256
+ t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1);
+ Lib_IntVector_Intrinsics_vec256
+ t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3);
+ Lib_IntVector_Intrinsics_vec256
+ t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U);
+ Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260);
+ Lib_IntVector_Intrinsics_vec256
+ t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260);
+ Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260);
+ Lib_IntVector_Intrinsics_vec256
+ t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U);
+ Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260);
+ Lib_IntVector_Intrinsics_vec256
+ o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 o00 = o5;
+ Lib_IntVector_Intrinsics_vec256 o11 = o10;
+ Lib_IntVector_Intrinsics_vec256 o21 = o20;
+ Lib_IntVector_Intrinsics_vec256 o31 = o30;
+ Lib_IntVector_Intrinsics_vec256 o41 = o40;
+ e[0U] = o00;
+ e[1U] = o11;
+ e[2U] = o21;
+ e[3U] = o31;
+ e[4U] = o41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *rn = pre + (uint32_t)10U;
+ Lib_IntVector_Intrinsics_vec256 *rn5 = pre + (uint32_t)15U;
+ Lib_IntVector_Intrinsics_vec256 r0 = rn[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = rn[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = rn[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = rn[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = rn[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 f110 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 f120 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 f130 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 f140 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10);
+ Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10);
+ Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10);
+ Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10);
+ Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10);
+ Lib_IntVector_Intrinsics_vec256
+ a01 =
+ Lib_IntVector_Intrinsics_vec256_add64(a0,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a11 =
+ Lib_IntVector_Intrinsics_vec256_add64(a1,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a21 =
+ Lib_IntVector_Intrinsics_vec256_add64(a2,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a31 =
+ Lib_IntVector_Intrinsics_vec256_add64(a3,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a41 =
+ Lib_IntVector_Intrinsics_vec256_add64(a4,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, f110));
+ Lib_IntVector_Intrinsics_vec256
+ a02 =
+ Lib_IntVector_Intrinsics_vec256_add64(a01,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a12 =
+ Lib_IntVector_Intrinsics_vec256_add64(a11,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a22 =
+ Lib_IntVector_Intrinsics_vec256_add64(a21,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a32 =
+ Lib_IntVector_Intrinsics_vec256_add64(a31,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a42 =
+ Lib_IntVector_Intrinsics_vec256_add64(a41,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, f120));
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, f130));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, f140));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, f140));
+ Lib_IntVector_Intrinsics_vec256 t01 = a04;
+ Lib_IntVector_Intrinsics_vec256 t1 = a14;
+ Lib_IntVector_Intrinsics_vec256 t2 = a24;
+ Lib_IntVector_Intrinsics_vec256 t3 = a34;
+ Lib_IntVector_Intrinsics_vec256 t4 = a44;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o01 = x02;
+ Lib_IntVector_Intrinsics_vec256 o12 = x12;
+ Lib_IntVector_Intrinsics_vec256 o22 = x21;
+ Lib_IntVector_Intrinsics_vec256 o32 = x32;
+ Lib_IntVector_Intrinsics_vec256 o42 = x42;
+ acc[0U] = o01;
+ acc[1U] = o12;
+ acc[2U] = o22;
+ acc[3U] = o32;
+ acc[4U] = o42;
+ Lib_IntVector_Intrinsics_vec256 f100 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 f20 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f21 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f22 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f23 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f24 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20);
+ Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21);
+ Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22);
+ Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23);
+ Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24);
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ }
+ Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc, pre);
+ }
+ uint32_t len1 = len - len0;
+ uint8_t *t1 = text + len0;
+ uint32_t nb = len1 / (uint32_t)16U;
+ uint32_t rem = len1 % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = t1 + i * (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec256 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 f4 = e[4U];
+ e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask);
+ Lib_IntVector_Intrinsics_vec256 *r = pre;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t01 = a06;
+ Lib_IntVector_Intrinsics_vec256 t11 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ }
+ if (rem > (uint32_t)0U) {
+ uint8_t *last = t1 + nb * (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec256 e[5U];
+ for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i)
+ e[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem * sizeof(last[0U]));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo);
+ Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_and(f0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f110 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)26U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f20 =
+ Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0,
+ (uint32_t)52U),
+ Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)),
+ (uint32_t)12U));
+ Lib_IntVector_Intrinsics_vec256
+ f30 =
+ Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1,
+ (uint32_t)14U),
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U);
+ Lib_IntVector_Intrinsics_vec256 f01 = f010;
+ Lib_IntVector_Intrinsics_vec256 f111 = f110;
+ Lib_IntVector_Intrinsics_vec256 f2 = f20;
+ Lib_IntVector_Intrinsics_vec256 f3 = f30;
+ Lib_IntVector_Intrinsics_vec256 f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem * (uint32_t)8U % (uint32_t)26U;
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b);
+ Lib_IntVector_Intrinsics_vec256 fi = e[rem * (uint32_t)8U / (uint32_t)26U];
+ e[rem * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask);
+ Lib_IntVector_Intrinsics_vec256 *r = pre;
+ Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U;
+ Lib_IntVector_Intrinsics_vec256 r0 = r[0U];
+ Lib_IntVector_Intrinsics_vec256 r1 = r[1U];
+ Lib_IntVector_Intrinsics_vec256 r2 = r[2U];
+ Lib_IntVector_Intrinsics_vec256 r3 = r[3U];
+ Lib_IntVector_Intrinsics_vec256 r4 = r[4U];
+ Lib_IntVector_Intrinsics_vec256 r51 = r5[1U];
+ Lib_IntVector_Intrinsics_vec256 r52 = r5[2U];
+ Lib_IntVector_Intrinsics_vec256 r53 = r5[3U];
+ Lib_IntVector_Intrinsics_vec256 r54 = r5[4U];
+ Lib_IntVector_Intrinsics_vec256 f10 = e[0U];
+ Lib_IntVector_Intrinsics_vec256 f11 = e[1U];
+ Lib_IntVector_Intrinsics_vec256 f12 = e[2U];
+ Lib_IntVector_Intrinsics_vec256 f13 = e[3U];
+ Lib_IntVector_Intrinsics_vec256 f14 = e[4U];
+ Lib_IntVector_Intrinsics_vec256 a0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 a1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 a2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 a3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 a4 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10);
+ Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11);
+ Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12);
+ Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13);
+ Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14);
+ Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01);
+ Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01);
+ Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01);
+ Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01);
+ Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01);
+ Lib_IntVector_Intrinsics_vec256
+ a03 =
+ Lib_IntVector_Intrinsics_vec256_add64(a02,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a13 =
+ Lib_IntVector_Intrinsics_vec256_add64(a12,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a23 =
+ Lib_IntVector_Intrinsics_vec256_add64(a22,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a33 =
+ Lib_IntVector_Intrinsics_vec256_add64(a32,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a43 =
+ Lib_IntVector_Intrinsics_vec256_add64(a42,
+ Lib_IntVector_Intrinsics_vec256_mul64(r3, a11));
+ Lib_IntVector_Intrinsics_vec256
+ a04 =
+ Lib_IntVector_Intrinsics_vec256_add64(a03,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a14 =
+ Lib_IntVector_Intrinsics_vec256_add64(a13,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a24 =
+ Lib_IntVector_Intrinsics_vec256_add64(a23,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a34 =
+ Lib_IntVector_Intrinsics_vec256_add64(a33,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a44 =
+ Lib_IntVector_Intrinsics_vec256_add64(a43,
+ Lib_IntVector_Intrinsics_vec256_mul64(r2, a21));
+ Lib_IntVector_Intrinsics_vec256
+ a05 =
+ Lib_IntVector_Intrinsics_vec256_add64(a04,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a15 =
+ Lib_IntVector_Intrinsics_vec256_add64(a14,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a25 =
+ Lib_IntVector_Intrinsics_vec256_add64(a24,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a35 =
+ Lib_IntVector_Intrinsics_vec256_add64(a34,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a45 =
+ Lib_IntVector_Intrinsics_vec256_add64(a44,
+ Lib_IntVector_Intrinsics_vec256_mul64(r1, a31));
+ Lib_IntVector_Intrinsics_vec256
+ a06 =
+ Lib_IntVector_Intrinsics_vec256_add64(a05,
+ Lib_IntVector_Intrinsics_vec256_mul64(r51, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a16 =
+ Lib_IntVector_Intrinsics_vec256_add64(a15,
+ Lib_IntVector_Intrinsics_vec256_mul64(r52, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a26 =
+ Lib_IntVector_Intrinsics_vec256_add64(a25,
+ Lib_IntVector_Intrinsics_vec256_mul64(r53, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a36 =
+ Lib_IntVector_Intrinsics_vec256_add64(a35,
+ Lib_IntVector_Intrinsics_vec256_mul64(r54, a41));
+ Lib_IntVector_Intrinsics_vec256
+ a46 =
+ Lib_IntVector_Intrinsics_vec256_add64(a45,
+ Lib_IntVector_Intrinsics_vec256_mul64(r0, a41));
+ Lib_IntVector_Intrinsics_vec256 t01 = a06;
+ Lib_IntVector_Intrinsics_vec256 t11 = a16;
+ Lib_IntVector_Intrinsics_vec256 t2 = a26;
+ Lib_IntVector_Intrinsics_vec256 t3 = a36;
+ Lib_IntVector_Intrinsics_vec256 t4 = a46;
+ Lib_IntVector_Intrinsics_vec256
+ mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26);
+ Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0);
+ Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1);
+ Lib_IntVector_Intrinsics_vec256
+ z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U);
+ Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t);
+ Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26);
+ Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26);
+ Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01);
+ Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12);
+ Lib_IntVector_Intrinsics_vec256
+ z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26);
+ Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26);
+ Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02);
+ Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13);
+ Lib_IntVector_Intrinsics_vec256
+ z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26);
+ Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03);
+ Lib_IntVector_Intrinsics_vec256 o0 = x02;
+ Lib_IntVector_Intrinsics_vec256 o1 = x12;
+ Lib_IntVector_Intrinsics_vec256 o2 = x21;
+ Lib_IntVector_Intrinsics_vec256 o3 = x32;
+ Lib_IntVector_Intrinsics_vec256 o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+void
+Hacl_Poly1305_256_poly1305_finish(
+ uint8_t *tag,
+ uint8_t *key,
+ Lib_IntVector_Intrinsics_vec256 *ctx)
+{
+ Lib_IntVector_Intrinsics_vec256 *acc = ctx;
+ uint8_t *ks = key + (uint32_t)16U;
+ Lib_IntVector_Intrinsics_vec256 f0 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 f13 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 f23 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 f33 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 f40 = acc[4U];
+ Lib_IntVector_Intrinsics_vec256
+ l0 = Lib_IntVector_Intrinsics_vec256_add64(f0, Lib_IntVector_Intrinsics_vec256_zero);
+ Lib_IntVector_Intrinsics_vec256
+ tmp00 =
+ Lib_IntVector_Intrinsics_vec256_and(l0,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c00 = Lib_IntVector_Intrinsics_vec256_shift_right64(l0, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l1 = Lib_IntVector_Intrinsics_vec256_add64(f13, c00);
+ Lib_IntVector_Intrinsics_vec256
+ tmp10 =
+ Lib_IntVector_Intrinsics_vec256_and(l1,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c10 = Lib_IntVector_Intrinsics_vec256_shift_right64(l1, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l2 = Lib_IntVector_Intrinsics_vec256_add64(f23, c10);
+ Lib_IntVector_Intrinsics_vec256
+ tmp20 =
+ Lib_IntVector_Intrinsics_vec256_and(l2,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c20 = Lib_IntVector_Intrinsics_vec256_shift_right64(l2, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l3 = Lib_IntVector_Intrinsics_vec256_add64(f33, c20);
+ Lib_IntVector_Intrinsics_vec256
+ tmp30 =
+ Lib_IntVector_Intrinsics_vec256_and(l3,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c30 = Lib_IntVector_Intrinsics_vec256_shift_right64(l3, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l4 = Lib_IntVector_Intrinsics_vec256_add64(f40, c30);
+ Lib_IntVector_Intrinsics_vec256
+ tmp40 =
+ Lib_IntVector_Intrinsics_vec256_and(l4,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c40 = Lib_IntVector_Intrinsics_vec256_shift_right64(l4, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ f010 =
+ Lib_IntVector_Intrinsics_vec256_add64(tmp00,
+ Lib_IntVector_Intrinsics_vec256_smul64(c40, (uint64_t)5U));
+ Lib_IntVector_Intrinsics_vec256 f110 = tmp10;
+ Lib_IntVector_Intrinsics_vec256 f210 = tmp20;
+ Lib_IntVector_Intrinsics_vec256 f310 = tmp30;
+ Lib_IntVector_Intrinsics_vec256 f410 = tmp40;
+ Lib_IntVector_Intrinsics_vec256
+ l = Lib_IntVector_Intrinsics_vec256_add64(f010, Lib_IntVector_Intrinsics_vec256_zero);
+ Lib_IntVector_Intrinsics_vec256
+ tmp0 =
+ Lib_IntVector_Intrinsics_vec256_and(l,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c0 = Lib_IntVector_Intrinsics_vec256_shift_right64(l, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l5 = Lib_IntVector_Intrinsics_vec256_add64(f110, c0);
+ Lib_IntVector_Intrinsics_vec256
+ tmp1 =
+ Lib_IntVector_Intrinsics_vec256_and(l5,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c1 = Lib_IntVector_Intrinsics_vec256_shift_right64(l5, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l6 = Lib_IntVector_Intrinsics_vec256_add64(f210, c1);
+ Lib_IntVector_Intrinsics_vec256
+ tmp2 =
+ Lib_IntVector_Intrinsics_vec256_and(l6,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c2 = Lib_IntVector_Intrinsics_vec256_shift_right64(l6, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l7 = Lib_IntVector_Intrinsics_vec256_add64(f310, c2);
+ Lib_IntVector_Intrinsics_vec256
+ tmp3 =
+ Lib_IntVector_Intrinsics_vec256_and(l7,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c3 = Lib_IntVector_Intrinsics_vec256_shift_right64(l7, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256 l8 = Lib_IntVector_Intrinsics_vec256_add64(f410, c3);
+ Lib_IntVector_Intrinsics_vec256
+ tmp4 =
+ Lib_IntVector_Intrinsics_vec256_and(l8,
+ Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU));
+ Lib_IntVector_Intrinsics_vec256
+ c4 = Lib_IntVector_Intrinsics_vec256_shift_right64(l8, (uint32_t)26U);
+ Lib_IntVector_Intrinsics_vec256
+ f02 =
+ Lib_IntVector_Intrinsics_vec256_add64(tmp0,
+ Lib_IntVector_Intrinsics_vec256_smul64(c4, (uint64_t)5U));
+ Lib_IntVector_Intrinsics_vec256 f12 = tmp1;
+ Lib_IntVector_Intrinsics_vec256 f22 = tmp2;
+ Lib_IntVector_Intrinsics_vec256 f32 = tmp3;
+ Lib_IntVector_Intrinsics_vec256 f42 = tmp4;
+ Lib_IntVector_Intrinsics_vec256
+ mh = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU);
+ Lib_IntVector_Intrinsics_vec256
+ ml = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffffbU);
+ Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_eq64(f42, mh);
+ Lib_IntVector_Intrinsics_vec256
+ mask1 =
+ Lib_IntVector_Intrinsics_vec256_and(mask,
+ Lib_IntVector_Intrinsics_vec256_eq64(f32, mh));
+ Lib_IntVector_Intrinsics_vec256
+ mask2 =
+ Lib_IntVector_Intrinsics_vec256_and(mask1,
+ Lib_IntVector_Intrinsics_vec256_eq64(f22, mh));
+ Lib_IntVector_Intrinsics_vec256
+ mask3 =
+ Lib_IntVector_Intrinsics_vec256_and(mask2,
+ Lib_IntVector_Intrinsics_vec256_eq64(f12, mh));
+ Lib_IntVector_Intrinsics_vec256
+ mask4 =
+ Lib_IntVector_Intrinsics_vec256_and(mask3,
+ Lib_IntVector_Intrinsics_vec256_lognot(Lib_IntVector_Intrinsics_vec256_gt64(ml, f02)));
+ Lib_IntVector_Intrinsics_vec256 ph = Lib_IntVector_Intrinsics_vec256_and(mask4, mh);
+ Lib_IntVector_Intrinsics_vec256 pl = Lib_IntVector_Intrinsics_vec256_and(mask4, ml);
+ Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_sub64(f02, pl);
+ Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_sub64(f12, ph);
+ Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_sub64(f22, ph);
+ Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_sub64(f32, ph);
+ Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_sub64(f42, ph);
+ Lib_IntVector_Intrinsics_vec256 f011 = o0;
+ Lib_IntVector_Intrinsics_vec256 f111 = o1;
+ Lib_IntVector_Intrinsics_vec256 f211 = o2;
+ Lib_IntVector_Intrinsics_vec256 f311 = o3;
+ Lib_IntVector_Intrinsics_vec256 f411 = o4;
+ acc[0U] = f011;
+ acc[1U] = f111;
+ acc[2U] = f211;
+ acc[3U] = f311;
+ acc[4U] = f411;
+ Lib_IntVector_Intrinsics_vec256 f00 = acc[0U];
+ Lib_IntVector_Intrinsics_vec256 f1 = acc[1U];
+ Lib_IntVector_Intrinsics_vec256 f2 = acc[2U];
+ Lib_IntVector_Intrinsics_vec256 f3 = acc[3U];
+ Lib_IntVector_Intrinsics_vec256 f4 = acc[4U];
+ uint64_t f01 = Lib_IntVector_Intrinsics_vec256_extract64(f00, (uint32_t)0U);
+ uint64_t f112 = Lib_IntVector_Intrinsics_vec256_extract64(f1, (uint32_t)0U);
+ uint64_t f212 = Lib_IntVector_Intrinsics_vec256_extract64(f2, (uint32_t)0U);
+ uint64_t f312 = Lib_IntVector_Intrinsics_vec256_extract64(f3, (uint32_t)0U);
+ uint64_t f41 = Lib_IntVector_Intrinsics_vec256_extract64(f4, (uint32_t)0U);
+ uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U;
+ uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U;
+ uint64_t f10 = lo;
+ uint64_t f11 = hi;
+ uint64_t u0 = load64_le(ks);
+ uint64_t lo0 = u0;
+ uint64_t u = load64_le(ks + (uint32_t)8U);
+ uint64_t hi0 = u;
+ uint64_t f20 = lo0;
+ uint64_t f21 = hi0;
+ uint64_t r0 = f10 + f20;
+ uint64_t r1 = f11 + f21;
+ uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U;
+ uint64_t r11 = r1 + c;
+ uint64_t f30 = r0;
+ uint64_t f31 = r11;
+ store64_le(tag, f30);
+ store64_le(tag + (uint32_t)8U, f31);
+}
+
+void
+Hacl_Poly1305_256_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key)
+{
+ Lib_IntVector_Intrinsics_vec256 ctx[25U];
+ for (uint32_t _i = 0U; _i < (uint32_t)25U; ++_i)
+ ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero;
+ Hacl_Poly1305_256_poly1305_init(ctx, key);
+ Hacl_Poly1305_256_poly1305_update(ctx, len, text);
+ Hacl_Poly1305_256_poly1305_finish(tag, key, ctx);
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_256.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.h
new file mode 100644
index 0000000000..9d5ff5728b
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.h
@@ -0,0 +1,66 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "libintvector.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include <stdbool.h>
+
+#ifndef __Hacl_Poly1305_256_H
+#define __Hacl_Poly1305_256_H
+
+#include "Hacl_Kremlib.h"
+
+void
+Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b);
+
+void
+Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(
+ Lib_IntVector_Intrinsics_vec256 *out,
+ Lib_IntVector_Intrinsics_vec256 *p);
+
+extern uint32_t Hacl_Poly1305_256_blocklen;
+
+typedef Lib_IntVector_Intrinsics_vec256 *Hacl_Poly1305_256_poly1305_ctx;
+
+void Hacl_Poly1305_256_poly1305_init(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *key);
+
+void Hacl_Poly1305_256_poly1305_update1(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *text);
+
+void
+Hacl_Poly1305_256_poly1305_update(
+ Lib_IntVector_Intrinsics_vec256 *ctx,
+ uint32_t len,
+ uint8_t *text);
+
+void
+Hacl_Poly1305_256_poly1305_finish(
+ uint8_t *tag,
+ uint8_t *key,
+ Lib_IntVector_Intrinsics_vec256 *ctx);
+
+void Hacl_Poly1305_256_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key);
+
+#define __Hacl_Poly1305_256_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c
new file mode 100644
index 0000000000..b5b118333e
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c
@@ -0,0 +1,574 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Hacl_Poly1305_32.h"
+
+uint32_t Hacl_Poly1305_32_blocklen = (uint32_t)16U;
+
+void
+Hacl_Poly1305_32_poly1305_init(uint64_t *ctx, uint8_t *key)
+{
+ uint64_t *acc = ctx;
+ uint64_t *pre = ctx + (uint32_t)5U;
+ uint8_t *kr = key;
+ acc[0U] = (uint64_t)0U;
+ acc[1U] = (uint64_t)0U;
+ acc[2U] = (uint64_t)0U;
+ acc[3U] = (uint64_t)0U;
+ acc[4U] = (uint64_t)0U;
+ uint64_t u0 = load64_le(kr);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(kr + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU;
+ uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU;
+ uint64_t lo1 = lo & mask0;
+ uint64_t hi1 = hi & mask1;
+ uint64_t *r = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t *rn = pre + (uint32_t)10U;
+ uint64_t *rn_5 = pre + (uint32_t)15U;
+ uint64_t r_vec0 = lo1;
+ uint64_t r_vec1 = hi1;
+ uint64_t f00 = r_vec0 & (uint64_t)0x3ffffffU;
+ uint64_t f10 = r_vec0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = r_vec0 >> (uint32_t)52U | (r_vec1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = r_vec1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = r_vec1 >> (uint32_t)40U;
+ uint64_t f0 = f00;
+ uint64_t f1 = f10;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f4 = f40;
+ r[0U] = f0;
+ r[1U] = f1;
+ r[2U] = f2;
+ r[3U] = f3;
+ r[4U] = f4;
+ uint64_t f200 = r[0U];
+ uint64_t f21 = r[1U];
+ uint64_t f22 = r[2U];
+ uint64_t f23 = r[3U];
+ uint64_t f24 = r[4U];
+ r5[0U] = f200 * (uint64_t)5U;
+ r5[1U] = f21 * (uint64_t)5U;
+ r5[2U] = f22 * (uint64_t)5U;
+ r5[3U] = f23 * (uint64_t)5U;
+ r5[4U] = f24 * (uint64_t)5U;
+ rn[0U] = r[0U];
+ rn[1U] = r[1U];
+ rn[2U] = r[2U];
+ rn[3U] = r[3U];
+ rn[4U] = r[4U];
+ rn_5[0U] = r5[0U];
+ rn_5[1U] = r5[1U];
+ rn_5[2U] = r5[2U];
+ rn_5[3U] = r5[3U];
+ rn_5[4U] = r5[4U];
+}
+
+void
+Hacl_Poly1305_32_poly1305_update1(uint64_t *ctx, uint8_t *text)
+{
+ uint64_t *pre = ctx + (uint32_t)5U;
+ uint64_t *acc = ctx;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(text);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(text + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t r0 = r[0U];
+ uint64_t r1 = r[1U];
+ uint64_t r2 = r[2U];
+ uint64_t r3 = r[3U];
+ uint64_t r4 = r[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc[0U];
+ uint64_t a1 = acc[1U];
+ uint64_t a2 = acc[2U];
+ uint64_t a3 = acc[3U];
+ uint64_t a4 = acc[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r1 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r1 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r1 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r1 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+}
+
+void
+Hacl_Poly1305_32_poly1305_update(uint64_t *ctx, uint32_t len, uint8_t *text)
+{
+ uint64_t *pre = ctx + (uint32_t)5U;
+ uint64_t *acc = ctx;
+ uint32_t nb = len / (uint32_t)16U;
+ uint32_t rem = len % (uint32_t)16U;
+ for (uint32_t i = (uint32_t)0U; i < nb; i++) {
+ uint8_t *block = text + i * (uint32_t)16U;
+ uint64_t e[5U] = { 0U };
+ uint64_t u0 = load64_le(block);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(block + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f41 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f41;
+ uint64_t b = (uint64_t)0x1000000U;
+ uint64_t mask = b;
+ uint64_t f4 = e[4U];
+ e[4U] = f4 | mask;
+ uint64_t *r = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t r0 = r[0U];
+ uint64_t r1 = r[1U];
+ uint64_t r2 = r[2U];
+ uint64_t r3 = r[3U];
+ uint64_t r4 = r[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc[0U];
+ uint64_t a1 = acc[1U];
+ uint64_t a2 = acc[2U];
+ uint64_t a3 = acc[3U];
+ uint64_t a4 = acc[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r1 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r1 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r1 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r1 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ }
+ if (rem > (uint32_t)0U) {
+ uint8_t *last = text + nb * (uint32_t)16U;
+ uint64_t e[5U] = { 0U };
+ uint8_t tmp[16U] = { 0U };
+ memcpy(tmp, last, rem * sizeof(last[0U]));
+ uint64_t u0 = load64_le(tmp);
+ uint64_t lo = u0;
+ uint64_t u = load64_le(tmp + (uint32_t)8U);
+ uint64_t hi = u;
+ uint64_t f0 = lo;
+ uint64_t f1 = hi;
+ uint64_t f010 = f0 & (uint64_t)0x3ffffffU;
+ uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU;
+ uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U;
+ uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU;
+ uint64_t f40 = f1 >> (uint32_t)40U;
+ uint64_t f01 = f010;
+ uint64_t f111 = f110;
+ uint64_t f2 = f20;
+ uint64_t f3 = f30;
+ uint64_t f4 = f40;
+ e[0U] = f01;
+ e[1U] = f111;
+ e[2U] = f2;
+ e[3U] = f3;
+ e[4U] = f4;
+ uint64_t b = (uint64_t)1U << rem * (uint32_t)8U % (uint32_t)26U;
+ uint64_t mask = b;
+ uint64_t fi = e[rem * (uint32_t)8U / (uint32_t)26U];
+ e[rem * (uint32_t)8U / (uint32_t)26U] = fi | mask;
+ uint64_t *r = pre;
+ uint64_t *r5 = pre + (uint32_t)5U;
+ uint64_t r0 = r[0U];
+ uint64_t r1 = r[1U];
+ uint64_t r2 = r[2U];
+ uint64_t r3 = r[3U];
+ uint64_t r4 = r[4U];
+ uint64_t r51 = r5[1U];
+ uint64_t r52 = r5[2U];
+ uint64_t r53 = r5[3U];
+ uint64_t r54 = r5[4U];
+ uint64_t f10 = e[0U];
+ uint64_t f11 = e[1U];
+ uint64_t f12 = e[2U];
+ uint64_t f13 = e[3U];
+ uint64_t f14 = e[4U];
+ uint64_t a0 = acc[0U];
+ uint64_t a1 = acc[1U];
+ uint64_t a2 = acc[2U];
+ uint64_t a3 = acc[3U];
+ uint64_t a4 = acc[4U];
+ uint64_t a01 = a0 + f10;
+ uint64_t a11 = a1 + f11;
+ uint64_t a21 = a2 + f12;
+ uint64_t a31 = a3 + f13;
+ uint64_t a41 = a4 + f14;
+ uint64_t a02 = r0 * a01;
+ uint64_t a12 = r1 * a01;
+ uint64_t a22 = r2 * a01;
+ uint64_t a32 = r3 * a01;
+ uint64_t a42 = r4 * a01;
+ uint64_t a03 = a02 + r54 * a11;
+ uint64_t a13 = a12 + r0 * a11;
+ uint64_t a23 = a22 + r1 * a11;
+ uint64_t a33 = a32 + r2 * a11;
+ uint64_t a43 = a42 + r3 * a11;
+ uint64_t a04 = a03 + r53 * a21;
+ uint64_t a14 = a13 + r54 * a21;
+ uint64_t a24 = a23 + r0 * a21;
+ uint64_t a34 = a33 + r1 * a21;
+ uint64_t a44 = a43 + r2 * a21;
+ uint64_t a05 = a04 + r52 * a31;
+ uint64_t a15 = a14 + r53 * a31;
+ uint64_t a25 = a24 + r54 * a31;
+ uint64_t a35 = a34 + r0 * a31;
+ uint64_t a45 = a44 + r1 * a31;
+ uint64_t a06 = a05 + r51 * a41;
+ uint64_t a16 = a15 + r52 * a41;
+ uint64_t a26 = a25 + r53 * a41;
+ uint64_t a36 = a35 + r54 * a41;
+ uint64_t a46 = a45 + r0 * a41;
+ uint64_t t0 = a06;
+ uint64_t t1 = a16;
+ uint64_t t2 = a26;
+ uint64_t t3 = a36;
+ uint64_t t4 = a46;
+ uint64_t mask26 = (uint64_t)0x3ffffffU;
+ uint64_t z0 = t0 >> (uint32_t)26U;
+ uint64_t z1 = t3 >> (uint32_t)26U;
+ uint64_t x0 = t0 & mask26;
+ uint64_t x3 = t3 & mask26;
+ uint64_t x1 = t1 + z0;
+ uint64_t x4 = t4 + z1;
+ uint64_t z01 = x1 >> (uint32_t)26U;
+ uint64_t z11 = x4 >> (uint32_t)26U;
+ uint64_t t = z11 << (uint32_t)2U;
+ uint64_t z12 = z11 + t;
+ uint64_t x11 = x1 & mask26;
+ uint64_t x41 = x4 & mask26;
+ uint64_t x2 = t2 + z01;
+ uint64_t x01 = x0 + z12;
+ uint64_t z02 = x2 >> (uint32_t)26U;
+ uint64_t z13 = x01 >> (uint32_t)26U;
+ uint64_t x21 = x2 & mask26;
+ uint64_t x02 = x01 & mask26;
+ uint64_t x31 = x3 + z02;
+ uint64_t x12 = x11 + z13;
+ uint64_t z03 = x31 >> (uint32_t)26U;
+ uint64_t x32 = x31 & mask26;
+ uint64_t x42 = x41 + z03;
+ uint64_t o0 = x02;
+ uint64_t o1 = x12;
+ uint64_t o2 = x21;
+ uint64_t o3 = x32;
+ uint64_t o4 = x42;
+ acc[0U] = o0;
+ acc[1U] = o1;
+ acc[2U] = o2;
+ acc[3U] = o3;
+ acc[4U] = o4;
+ return;
+ }
+}
+
+void
+Hacl_Poly1305_32_poly1305_finish(uint8_t *tag, uint8_t *key, uint64_t *ctx)
+{
+ uint64_t *acc = ctx;
+ uint8_t *ks = key + (uint32_t)16U;
+ uint64_t f0 = acc[0U];
+ uint64_t f13 = acc[1U];
+ uint64_t f23 = acc[2U];
+ uint64_t f33 = acc[3U];
+ uint64_t f40 = acc[4U];
+ uint64_t l0 = f0 + (uint64_t)0U;
+ uint64_t tmp00 = l0 & (uint64_t)0x3ffffffU;
+ uint64_t c00 = l0 >> (uint32_t)26U;
+ uint64_t l1 = f13 + c00;
+ uint64_t tmp10 = l1 & (uint64_t)0x3ffffffU;
+ uint64_t c10 = l1 >> (uint32_t)26U;
+ uint64_t l2 = f23 + c10;
+ uint64_t tmp20 = l2 & (uint64_t)0x3ffffffU;
+ uint64_t c20 = l2 >> (uint32_t)26U;
+ uint64_t l3 = f33 + c20;
+ uint64_t tmp30 = l3 & (uint64_t)0x3ffffffU;
+ uint64_t c30 = l3 >> (uint32_t)26U;
+ uint64_t l4 = f40 + c30;
+ uint64_t tmp40 = l4 & (uint64_t)0x3ffffffU;
+ uint64_t c40 = l4 >> (uint32_t)26U;
+ uint64_t f010 = tmp00 + c40 * (uint64_t)5U;
+ uint64_t f110 = tmp10;
+ uint64_t f210 = tmp20;
+ uint64_t f310 = tmp30;
+ uint64_t f410 = tmp40;
+ uint64_t l = f010 + (uint64_t)0U;
+ uint64_t tmp0 = l & (uint64_t)0x3ffffffU;
+ uint64_t c0 = l >> (uint32_t)26U;
+ uint64_t l5 = f110 + c0;
+ uint64_t tmp1 = l5 & (uint64_t)0x3ffffffU;
+ uint64_t c1 = l5 >> (uint32_t)26U;
+ uint64_t l6 = f210 + c1;
+ uint64_t tmp2 = l6 & (uint64_t)0x3ffffffU;
+ uint64_t c2 = l6 >> (uint32_t)26U;
+ uint64_t l7 = f310 + c2;
+ uint64_t tmp3 = l7 & (uint64_t)0x3ffffffU;
+ uint64_t c3 = l7 >> (uint32_t)26U;
+ uint64_t l8 = f410 + c3;
+ uint64_t tmp4 = l8 & (uint64_t)0x3ffffffU;
+ uint64_t c4 = l8 >> (uint32_t)26U;
+ uint64_t f02 = tmp0 + c4 * (uint64_t)5U;
+ uint64_t f12 = tmp1;
+ uint64_t f22 = tmp2;
+ uint64_t f32 = tmp3;
+ uint64_t f42 = tmp4;
+ uint64_t mh = (uint64_t)0x3ffffffU;
+ uint64_t ml = (uint64_t)0x3fffffbU;
+ uint64_t mask = FStar_UInt64_eq_mask(f42, mh);
+ uint64_t mask1 = mask & FStar_UInt64_eq_mask(f32, mh);
+ uint64_t mask2 = mask1 & FStar_UInt64_eq_mask(f22, mh);
+ uint64_t mask3 = mask2 & FStar_UInt64_eq_mask(f12, mh);
+ uint64_t mask4 = mask3 & ~~FStar_UInt64_gte_mask(f02, ml);
+ uint64_t ph = mask4 & mh;
+ uint64_t pl = mask4 & ml;
+ uint64_t o0 = f02 - pl;
+ uint64_t o1 = f12 - ph;
+ uint64_t o2 = f22 - ph;
+ uint64_t o3 = f32 - ph;
+ uint64_t o4 = f42 - ph;
+ uint64_t f011 = o0;
+ uint64_t f111 = o1;
+ uint64_t f211 = o2;
+ uint64_t f311 = o3;
+ uint64_t f411 = o4;
+ acc[0U] = f011;
+ acc[1U] = f111;
+ acc[2U] = f211;
+ acc[3U] = f311;
+ acc[4U] = f411;
+ uint64_t f00 = acc[0U];
+ uint64_t f1 = acc[1U];
+ uint64_t f2 = acc[2U];
+ uint64_t f3 = acc[3U];
+ uint64_t f4 = acc[4U];
+ uint64_t f01 = f00;
+ uint64_t f112 = f1;
+ uint64_t f212 = f2;
+ uint64_t f312 = f3;
+ uint64_t f41 = f4;
+ uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U;
+ uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U;
+ uint64_t f10 = lo;
+ uint64_t f11 = hi;
+ uint64_t u0 = load64_le(ks);
+ uint64_t lo0 = u0;
+ uint64_t u = load64_le(ks + (uint32_t)8U);
+ uint64_t hi0 = u;
+ uint64_t f20 = lo0;
+ uint64_t f21 = hi0;
+ uint64_t r0 = f10 + f20;
+ uint64_t r1 = f11 + f21;
+ uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U;
+ uint64_t r11 = r1 + c;
+ uint64_t f30 = r0;
+ uint64_t f31 = r11;
+ store64_le(tag, f30);
+ store64_le(tag + (uint32_t)8U, f31);
+}
+
+void
+Hacl_Poly1305_32_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key)
+{
+ uint64_t ctx[25U] = { 0U };
+ Hacl_Poly1305_32_poly1305_init(ctx, key);
+ Hacl_Poly1305_32_poly1305_update(ctx, len, text);
+ Hacl_Poly1305_32_poly1305_finish(tag, key, ctx);
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h
new file mode 100644
index 0000000000..442b5db429
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h
@@ -0,0 +1,49 @@
+/* MIT License
+ *
+ * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "kremlin/internal/types.h"
+#include "kremlin/lowstar_endianness.h"
+#include <string.h>
+#include <stdbool.h>
+
+#ifndef __Hacl_Poly1305_32_H
+#define __Hacl_Poly1305_32_H
+
+#include "Hacl_Kremlib.h"
+
+extern uint32_t Hacl_Poly1305_32_blocklen;
+
+typedef uint64_t *Hacl_Poly1305_32_poly1305_ctx;
+
+void Hacl_Poly1305_32_poly1305_init(uint64_t *ctx, uint8_t *key);
+
+void Hacl_Poly1305_32_poly1305_update1(uint64_t *ctx, uint8_t *text);
+
+void Hacl_Poly1305_32_poly1305_update(uint64_t *ctx, uint32_t len, uint8_t *text);
+
+void Hacl_Poly1305_32_poly1305_finish(uint8_t *tag, uint8_t *key, uint64_t *ctx);
+
+void Hacl_Poly1305_32_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key);
+
+#define __Hacl_Poly1305_32_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/callconv.h b/security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/callconv.h
new file mode 100644
index 0000000000..8278b157d3
--- /dev/null
+++ b/security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/callconv.h
@@ -0,0 +1,46 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef __KREMLIN_CALLCONV_H
+#define __KREMLIN_CALLCONV_H
+
+/******************************************************************************/
+/* Some macros to ease compatibility */
+/******************************************************************************/
+
+/* We want to generate __cdecl safely without worrying about it being undefined.
+ * When using MSVC, these are always defined. When using MinGW, these are
+ * defined too. They have no meaning for other platforms, so we define them to
+ * be empty macros in other situations. */
+#ifndef _MSC_VER
+#ifndef __cdecl
+#define __cdecl
+#endif
+#ifndef __stdcall
+#define __stdcall
+#endif
+#ifndef __fastcall
+#define __fastcall
+#endif
+#endif
+
+/* Since KreMLin emits the inline keyword unconditionally, we follow the
+ * guidelines at https://gcc.gnu.org/onlinedocs/gcc/Inline.html and make this
+ * __inline__ to ensure the code compiles with -std=c90 and earlier. */
+#ifdef __GNUC__
+#define inline __inline__
+#endif
+
+/* GCC-specific attribute syntax; everyone else gets the standard C inline
+ * attribute. */
+#ifdef __GNU_C__
+#ifndef __clang__
+#define force_inline inline __attribute__((always_inline))
+#else
+#define force_inline inline
+#endif
+#else
+#define force_inline inline
+#endif
+
+#endif
diff --git a/security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/compat.h b/security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/compat.h
new file mode 100644
index 0000000000..964d1c52aa
--- /dev/null
+++ b/security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/compat.h
@@ -0,0 +1,32 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef KRML_COMPAT_H
+#define KRML_COMPAT_H
+
+#include <inttypes.h>
+
+/* A series of macros that define C implementations of types that are not Low*,
+ * to facilitate porting programs to Low*. */
+
+typedef struct {
+ uint32_t length;
+ const char *data;
+} FStar_Bytes_bytes;
+
+typedef int32_t Prims_pos, Prims_nat, Prims_nonzero, Prims_int,
+ krml_checked_int_t;
+
+#define RETURN_OR(x) \
+ do { \
+ int64_t __ret = x; \
+ if (__ret < INT32_MIN || INT32_MAX < __ret) { \
+ KRML_HOST_PRINTF( \
+ "Prims.{int,nat,pos} integer overflow at %s:%d\n", __FILE__, \
+ __LINE__); \
+ KRML_HOST_EXIT(252); \
+ } \
+ return (int32_t)__ret; \
+ } while (0)
+
+#endif
diff --git a/security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/target.h b/security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/target.h
new file mode 100644
index 0000000000..25f0fd0ac4
--- /dev/null
+++ b/security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/target.h
@@ -0,0 +1,113 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef __KREMLIN_TARGET_H
+#define __KREMLIN_TARGET_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#include "kremlin/internal/callconv.h"
+
+/******************************************************************************/
+/* Macros that KreMLin will generate. */
+/******************************************************************************/
+
+/* For "bare" targets that do not have a C stdlib, the user might want to use
+ * [-add-early-include '"mydefinitions.h"'] and override these. */
+#ifndef KRML_HOST_PRINTF
+#define KRML_HOST_PRINTF printf
+#endif
+
+#if ( \
+ (defined __STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
+ (!(defined KRML_HOST_EPRINTF)))
+#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__)
+#endif
+
+#ifndef KRML_HOST_EXIT
+#define KRML_HOST_EXIT exit
+#endif
+
+#ifndef KRML_HOST_MALLOC
+#define KRML_HOST_MALLOC malloc
+#endif
+
+#ifndef KRML_HOST_CALLOC
+#define KRML_HOST_CALLOC calloc
+#endif
+
+#ifndef KRML_HOST_FREE
+#define KRML_HOST_FREE free
+#endif
+
+#ifndef KRML_HOST_TIME
+
+#include <time.h>
+
+/* Prims_nat not yet in scope */
+inline static int32_t
+krml_time()
+{
+ return (int32_t)time(NULL);
+}
+
+#define KRML_HOST_TIME krml_time
+#endif
+
+/* In statement position, exiting is easy. */
+#define KRML_EXIT \
+ do { \
+ KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \
+ KRML_HOST_EXIT(254); \
+ } while (0)
+
+/* In expression position, use the comma-operator and a malloc to return an
+ * expression of the right size. KreMLin passes t as the parameter to the macro.
+ */
+#define KRML_EABORT(t, msg) \
+ (KRML_HOST_PRINTF("KreMLin abort at %s:%d\n%s\n", __FILE__, __LINE__, msg), \
+ KRML_HOST_EXIT(255), *((t *)KRML_HOST_MALLOC(sizeof(t))))
+
+/* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of
+ * *elements*. Do an ugly, run-time check (some of which KreMLin can eliminate).
+ */
+
+#ifdef __GNUC__
+#define _KRML_CHECK_SIZE_PRAGMA \
+ _Pragma("GCC diagnostic ignored \"-Wtype-limits\"")
+#else
+#define _KRML_CHECK_SIZE_PRAGMA
+#endif
+
+#define KRML_CHECK_SIZE(size_elt, sz) \
+ do { \
+ _KRML_CHECK_SIZE_PRAGMA \
+ if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) { \
+ KRML_HOST_PRINTF( \
+ "Maximum allocatable size exceeded, aborting before overflow at " \
+ "%s:%d\n", \
+ __FILE__, __LINE__); \
+ KRML_HOST_EXIT(253); \
+ } \
+ } while (0)
+
+#if defined(_MSC_VER) && _MSC_VER < 1900
+#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) _snprintf_s(buf, sz, _TRUNCATE, fmt, arg)
+#else
+#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) snprintf(buf, sz, fmt, arg)
+#endif
+
+#if defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ > 4
+#define KRML_DEPRECATED(x) __attribute__((deprecated(x)))
+#elif defined(__GNUC__)
+/* deprecated attribute is not defined in GCC < 4.5. */
+#define KRML_DEPRECATED(x)
+#elif defined(_MSC_VER)
+#define KRML_DEPRECATED(x) __declspec(deprecated(x))
+#endif
+
+#endif
diff --git a/security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/types.h b/security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/types.h
new file mode 100644
index 0000000000..4654b8c017
--- /dev/null
+++ b/security/nss/lib/freebl/verified/kremlin/include/kremlin/internal/types.h
@@ -0,0 +1,99 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef KRML_TYPES_H
+#define KRML_TYPES_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+/* Types which are either abstract, meaning that have to be implemented in C, or
+ * which are models, meaning that they are swapped out at compile-time for
+ * hand-written C types (in which case they're marked as noextract). */
+
+typedef uint64_t FStar_UInt64_t, FStar_UInt64_t_;
+typedef int64_t FStar_Int64_t, FStar_Int64_t_;
+typedef uint32_t FStar_UInt32_t, FStar_UInt32_t_;
+typedef int32_t FStar_Int32_t, FStar_Int32_t_;
+typedef uint16_t FStar_UInt16_t, FStar_UInt16_t_;
+typedef int16_t FStar_Int16_t, FStar_Int16_t_;
+typedef uint8_t FStar_UInt8_t, FStar_UInt8_t_;
+typedef int8_t FStar_Int8_t, FStar_Int8_t_;
+
+/* Only useful when building Kremlib, because it's in the dependency graph of
+ * FStar.Int.Cast. */
+typedef uint64_t FStar_UInt63_t, FStar_UInt63_t_;
+typedef int64_t FStar_Int63_t, FStar_Int63_t_;
+
+typedef double FStar_Float_float;
+typedef uint32_t FStar_Char_char;
+typedef FILE *FStar_IO_fd_read, *FStar_IO_fd_write;
+
+typedef void *FStar_Dyn_dyn;
+
+typedef const char *C_String_t, *C_String_t_, *C_Compat_String_t, *C_Compat_String_t_;
+
+typedef int exit_code;
+typedef FILE *channel;
+
+typedef unsigned long long TestLib_cycles;
+
+typedef uint64_t FStar_Date_dateTime, FStar_Date_timeSpan;
+
+/* Now Prims.string is no longer illegal with the new model in LowStar.Printf;
+ * it's operations that produce Prims_string which are illegal. Bring the
+ * definition into scope by default. */
+typedef const char *Prims_string;
+
+#if (defined(_MSC_VER) && defined(_M_X64) && !defined(__clang__))
+#define IS_MSVC64 1
+#endif
+
+/* This code makes a number of assumptions and should be refined. In particular,
+ * it assumes that: any non-MSVC amd64 compiler supports int128. Maybe it would
+ * be easier to just test for defined(__SIZEOF_INT128__) only? */
+#if (defined(__x86_64__) || \
+ defined(__x86_64) || \
+ defined(__aarch64__) || \
+ (defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)) || \
+ defined(__s390x__) || \
+ (defined(_MSC_VER) && !defined(_M_X64) && defined(__clang__)) || \
+ (defined(__mips__) && defined(__LP64__)) || \
+ (defined(__riscv) && __riscv_xlen == 64) || \
+ defined(__SIZEOF_INT128__))
+#define HAS_INT128 1
+#endif
+
+/* The uint128 type is a special case since we offer several implementations of
+ * it, depending on the compiler and whether the user wants the verified
+ * implementation or not. */
+#if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64)
+#include <emmintrin.h>
+typedef __m128i FStar_UInt128_uint128;
+#elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128)
+typedef unsigned __int128 FStar_UInt128_uint128;
+#else
+typedef struct FStar_UInt128_uint128_s {
+ uint64_t low;
+ uint64_t high;
+} FStar_UInt128_uint128;
+#endif
+
+/* The former is defined once, here (otherwise, conflicts for test-c89. The
+ * latter is for internal use. */
+typedef FStar_UInt128_uint128 FStar_UInt128_t, uint128_t;
+
+#include "kremlin/lowstar_endianness.h"
+
+#if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64)
+#include "fstar_uint128_msvc.h"
+#elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128)
+#include "fstar_uint128_gcc64.h"
+#else
+#include "FStar_UInt128_Verified.h"
+#include "fstar_uint128_struct_endianness.h"
+#endif
+
+#endif
diff --git a/security/nss/lib/freebl/verified/kremlin/include/kremlin/lowstar_endianness.h b/security/nss/lib/freebl/verified/kremlin/include/kremlin/lowstar_endianness.h
new file mode 100644
index 0000000000..2a13cc9f15
--- /dev/null
+++ b/security/nss/lib/freebl/verified/kremlin/include/kremlin/lowstar_endianness.h
@@ -0,0 +1,242 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef __LOWSTAR_ENDIANNESS_H
+#define __LOWSTAR_ENDIANNESS_H
+
+#include <string.h>
+#include <inttypes.h>
+
+/******************************************************************************/
+/* Implementing C.fst (part 2: endian-ness macros) */
+/******************************************************************************/
+
+/* ... for Linux */
+#if defined(__linux__) || defined(__CYGWIN__) || defined(__USE_SYSTEM_ENDIAN_H__)
+#include <endian.h>
+
+/* ... for OSX */
+#elif defined(__APPLE__)
+#include <libkern/OSByteOrder.h>
+#define htole64(x) OSSwapHostToLittleInt64(x)
+#define le64toh(x) OSSwapLittleToHostInt64(x)
+#define htobe64(x) OSSwapHostToBigInt64(x)
+#define be64toh(x) OSSwapBigToHostInt64(x)
+
+#define htole16(x) OSSwapHostToLittleInt16(x)
+#define le16toh(x) OSSwapLittleToHostInt16(x)
+#define htobe16(x) OSSwapHostToBigInt16(x)
+#define be16toh(x) OSSwapBigToHostInt16(x)
+
+#define htole32(x) OSSwapHostToLittleInt32(x)
+#define le32toh(x) OSSwapLittleToHostInt32(x)
+#define htobe32(x) OSSwapHostToBigInt32(x)
+#define be32toh(x) OSSwapBigToHostInt32(x)
+
+/* ... for Solaris */
+#elif defined(__sun__)
+#include <sys/byteorder.h>
+#define htole64(x) LE_64(x)
+#define le64toh(x) LE_64(x)
+#define htobe64(x) BE_64(x)
+#define be64toh(x) BE_64(x)
+
+#define htole16(x) LE_16(x)
+#define le16toh(x) LE_16(x)
+#define htobe16(x) BE_16(x)
+#define be16toh(x) BE_16(x)
+
+#define htole32(x) LE_32(x)
+#define le32toh(x) LE_32(x)
+#define htobe32(x) BE_32(x)
+#define be32toh(x) BE_32(x)
+
+/* ... for the BSDs */
+#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
+#include <sys/endian.h>
+#elif defined(__OpenBSD__)
+#include <endian.h>
+
+/* ... for Windows (MSVC)... not targeting XBOX 360! */
+#elif defined(_MSC_VER)
+
+#include <stdlib.h>
+#define htobe16(x) _byteswap_ushort(x)
+#define htole16(x) (x)
+#define be16toh(x) _byteswap_ushort(x)
+#define le16toh(x) (x)
+
+#define htobe32(x) _byteswap_ulong(x)
+#define htole32(x) (x)
+#define be32toh(x) _byteswap_ulong(x)
+#define le32toh(x) (x)
+
+#define htobe64(x) _byteswap_uint64(x)
+#define htole64(x) (x)
+#define be64toh(x) _byteswap_uint64(x)
+#define le64toh(x) (x)
+
+/* ... for Windows (GCC-like, e.g. mingw or clang) */
+#elif (defined(_WIN32) || defined(_WIN64)) && \
+ (defined(__GNUC__) || defined(__clang__))
+
+#define htobe16(x) __builtin_bswap16(x)
+#define htole16(x) (x)
+#define be16toh(x) __builtin_bswap16(x)
+#define le16toh(x) (x)
+
+#define htobe32(x) __builtin_bswap32(x)
+#define htole32(x) (x)
+#define be32toh(x) __builtin_bswap32(x)
+#define le32toh(x) (x)
+
+#define htobe64(x) __builtin_bswap64(x)
+#define htole64(x) (x)
+#define be64toh(x) __builtin_bswap64(x)
+#define le64toh(x) (x)
+
+/* ... generic big-endian fallback code */
+#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+
+/* byte swapping code inspired by:
+ * https://github.com/rweather/arduinolibs/blob/master/libraries/Crypto/utility/EndianUtil.h
+ * */
+
+#define htobe32(x) (x)
+#define be32toh(x) (x)
+#define htole32(x) \
+ (__extension__({ \
+ uint32_t _temp = (x); \
+ ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \
+ ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \
+ }))
+#define le32toh(x) (htole32((x)))
+
+#define htobe64(x) (x)
+#define be64toh(x) (x)
+#define htole64(x) \
+ (__extension__({ \
+ uint64_t __temp = (x); \
+ uint32_t __low = htobe32((uint32_t)__temp); \
+ uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \
+ (((uint64_t)__low) << 32) | __high; \
+ }))
+#define le64toh(x) (htole64((x)))
+
+/* ... generic little-endian fallback code */
+#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+#define htole32(x) (x)
+#define le32toh(x) (x)
+#define htobe32(x) \
+ (__extension__({ \
+ uint32_t _temp = (x); \
+ ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \
+ ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \
+ }))
+#define be32toh(x) (htobe32((x)))
+
+#define htole64(x) (x)
+#define le64toh(x) (x)
+#define htobe64(x) \
+ (__extension__({ \
+ uint64_t __temp = (x); \
+ uint32_t __low = htobe32((uint32_t)__temp); \
+ uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \
+ (((uint64_t)__low) << 32) | __high; \
+ }))
+#define be64toh(x) (htobe64((x)))
+
+/* ... couldn't determine endian-ness of the target platform */
+#else
+#error "Please define __BYTE_ORDER__!"
+
+#endif /* defined(__linux__) || ... */
+
+/* Loads and stores. These avoid undefined behavior due to unaligned memory
+ * accesses, via memcpy. */
+
+inline static uint16_t
+load16(uint8_t *b)
+{
+ uint16_t x;
+ memcpy(&x, b, 2);
+ return x;
+}
+
+inline static uint32_t
+load32(uint8_t *b)
+{
+ uint32_t x;
+ memcpy(&x, b, 4);
+ return x;
+}
+
+inline static uint64_t
+load64(uint8_t *b)
+{
+ uint64_t x;
+ memcpy(&x, b, 8);
+ return x;
+}
+
+inline static void
+store16(uint8_t *b, uint16_t i)
+{
+ memcpy(b, &i, 2);
+}
+
+inline static void
+store32(uint8_t *b, uint32_t i)
+{
+ memcpy(b, &i, 4);
+}
+
+inline static void
+store64(uint8_t *b, uint64_t i)
+{
+ memcpy(b, &i, 8);
+}
+
+/* Legacy accessors so that this header can serve as an implementation of
+ * C.Endianness */
+#define load16_le(b) (le16toh(load16(b)))
+#define store16_le(b, i) (store16(b, htole16(i)))
+#define load16_be(b) (be16toh(load16(b)))
+#define store16_be(b, i) (store16(b, htobe16(i)))
+
+#define load32_le(b) (le32toh(load32(b)))
+#define store32_le(b, i) (store32(b, htole32(i)))
+#define load32_be(b) (be32toh(load32(b)))
+#define store32_be(b, i) (store32(b, htobe32(i)))
+
+#define load64_le(b) (le64toh(load64(b)))
+#define store64_le(b, i) (store64(b, htole64(i)))
+#define load64_be(b) (be64toh(load64(b)))
+#define store64_be(b, i) (store64(b, htobe64(i)))
+
+/* Co-existence of LowStar.Endianness and FStar.Endianness generates name
+ * conflicts, because of course both insist on having no prefixes. Until a
+ * prefix is added, or until we truly retire FStar.Endianness, solve this issue
+ * in an elegant way. */
+#define load16_le0 load16_le
+#define store16_le0 store16_le
+#define load16_be0 load16_be
+#define store16_be0 store16_be
+
+#define load32_le0 load32_le
+#define store32_le0 store32_le
+#define load32_be0 load32_be
+#define store32_be0 store32_be
+
+#define load64_le0 load64_le
+#define store64_le0 store64_le
+#define load64_be0 load64_be
+#define store64_be0 store64_be
+
+#define load128_le0 load128_le
+#define store128_le0 store128_le
+#define load128_be0 load128_be
+#define store128_be0 store128_be
+
+#endif
diff --git a/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h
new file mode 100644
index 0000000000..6f254639f1
--- /dev/null
+++ b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h
@@ -0,0 +1,76 @@
+/*
+ Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License.
+*/
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include "kremlin/internal/compat.h"
+#include "kremlin/lowstar_endianness.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/internal/target.h"
+
+#ifndef __FStar_UInt128_H
+#define __FStar_UInt128_H
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s);
+
+static inline bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a);
+
+static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y);
+
+static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y);
+
+#define __FStar_UInt128_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h
new file mode 100644
index 0000000000..a7da435c31
--- /dev/null
+++ b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h
@@ -0,0 +1,329 @@
+/*
+ Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License.
+*/
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include "kremlin/internal/types.h"
+#include "kremlin/internal/target.h"
+
+#ifndef __FStar_UInt128_Verified_H
+#define __FStar_UInt128_Verified_H
+
+#include "FStar_UInt_8_16_32_64.h"
+
+static inline uint64_t
+FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b)
+{
+ return (a ^ ((a ^ b) | ((a - b) ^ b))) >> (uint32_t)63U;
+}
+
+static inline uint64_t
+FStar_UInt128_carry(uint64_t a, uint64_t b)
+{
+ return FStar_UInt128_constant_time_carry(a, b);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low + b.low;
+ lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low + b.low;
+ lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low + b.low;
+ lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low - b.low;
+ lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low - b.low;
+ lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low - b.low;
+ lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return FStar_UInt128_sub_mod_impl(a, b);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low & b.low;
+ lit.high = a.high & b.high;
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low ^ b.low;
+ lit.high = a.high ^ b.high;
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low | b.low;
+ lit.high = a.high | b.high;
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_lognot(FStar_UInt128_uint128 a)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = ~a.low;
+ lit.high = ~a.high;
+ return lit;
+}
+
+static uint32_t FStar_UInt128_u32_64 = (uint32_t)64U;
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s));
+}
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return FStar_UInt128_add_u64_shift_left(hi, lo, s);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+ if (s == (uint32_t)0U) {
+ return a;
+ } else {
+ FStar_UInt128_uint128 lit;
+ lit.low = a.low << s;
+ lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s);
+ return lit;
+ }
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = (uint64_t)0U;
+ lit.high = a.low << (s - FStar_UInt128_u32_64);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s)
+{
+ if (s < FStar_UInt128_u32_64) {
+ return FStar_UInt128_shift_left_small(a, s);
+ } else {
+ return FStar_UInt128_shift_left_large(a, s);
+ }
+}
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s));
+}
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return FStar_UInt128_add_u64_shift_right(hi, lo, s);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+ if (s == (uint32_t)0U) {
+ return a;
+ } else {
+ FStar_UInt128_uint128 lit;
+ lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s);
+ lit.high = a.high >> s;
+ return lit;
+ }
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a.high >> (s - FStar_UInt128_u32_64);
+ lit.high = (uint64_t)0U;
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s)
+{
+ if (s < FStar_UInt128_u32_64) {
+ return FStar_UInt128_shift_right_small(a, s);
+ } else {
+ return FStar_UInt128_shift_right_large(a, s);
+ }
+}
+
+static inline bool
+FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.low == b.low && a.high == b.high;
+}
+
+static inline bool
+FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high > b.high || (a.high == b.high && a.low > b.low);
+}
+
+static inline bool
+FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high < b.high || (a.high == b.high && a.low < b.low);
+}
+
+static inline bool
+FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high > b.high || (a.high == b.high && a.low >= b.low);
+}
+
+static inline bool
+FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high < b.high || (a.high == b.high && a.low <= b.low);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high);
+ lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high);
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low =
+ (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low));
+ lit.high =
+ (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low));
+ return lit;
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_uint64_to_uint128(uint64_t a)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low = a;
+ lit.high = (uint64_t)0U;
+ return lit;
+}
+
+static inline uint64_t
+FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a)
+{
+ return a.low;
+}
+
+static inline uint64_t
+FStar_UInt128_u64_mod_32(uint64_t a)
+{
+ return a & (uint64_t)0xffffffffU;
+}
+
+static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U;
+
+static inline uint64_t
+FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo)
+{
+ return lo + (hi << FStar_UInt128_u32_32);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_mul32(uint64_t x, uint32_t y)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low =
+ FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32) * (uint64_t)y + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32),
+ FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y));
+ lit.high =
+ ((x >> FStar_UInt128_u32_32) * (uint64_t)y + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32;
+ return lit;
+}
+
+static inline uint64_t
+FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo)
+{
+ return lo + (hi << FStar_UInt128_u32_32);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_mul_wide(uint64_t x, uint64_t y)
+{
+ FStar_UInt128_uint128 lit;
+ lit.low =
+ FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) +
+ FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)),
+ FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)));
+ lit.high =
+ (x >> FStar_UInt128_u32_32) * (y >> FStar_UInt128_u32_32) +
+ (((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32) +
+ ((FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) +
+ FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))) >>
+ FStar_UInt128_u32_32);
+ return lit;
+}
+
+#define __FStar_UInt128_Verified_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h
new file mode 100644
index 0000000000..809ea58ac2
--- /dev/null
+++ b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h
@@ -0,0 +1,203 @@
+/*
+ Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License.
+*/
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include "kremlin/internal/compat.h"
+#include "kremlin/lowstar_endianness.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/internal/target.h"
+
+#ifndef __FStar_UInt_8_16_32_64_H
+#define __FStar_UInt_8_16_32_64_H
+
+extern Prims_int FStar_UInt64_n;
+
+extern bool FStar_UInt64_uu___is_Mk(uint64_t projectee);
+
+extern Prims_int FStar_UInt64___proj__Mk__item__v(uint64_t projectee);
+
+extern Prims_int FStar_UInt64_v(uint64_t x);
+
+extern uint64_t FStar_UInt64_uint_to_t(Prims_int x);
+
+extern uint64_t FStar_UInt64_minus(uint64_t a);
+
+extern uint32_t FStar_UInt64_n_minus_one;
+
+static inline uint64_t
+FStar_UInt64_eq_mask(uint64_t a, uint64_t b)
+{
+ uint64_t x = a ^ b;
+ uint64_t minus_x = ~x + (uint64_t)1U;
+ uint64_t x_or_minus_x = x | minus_x;
+ uint64_t xnx = x_or_minus_x >> (uint32_t)63U;
+ return xnx - (uint64_t)1U;
+}
+
+static inline uint64_t
+FStar_UInt64_gte_mask(uint64_t a, uint64_t b)
+{
+ uint64_t x = a;
+ uint64_t y = b;
+ uint64_t x_xor_y = x ^ y;
+ uint64_t x_sub_y = x - y;
+ uint64_t x_sub_y_xor_y = x_sub_y ^ y;
+ uint64_t q = x_xor_y | x_sub_y_xor_y;
+ uint64_t x_xor_q = x ^ q;
+ uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U;
+ return x_xor_q_ - (uint64_t)1U;
+}
+
+extern Prims_string FStar_UInt64_to_string(uint64_t uu____888);
+
+extern Prims_string FStar_UInt64_to_string_hex(uint64_t uu____899);
+
+extern Prims_string FStar_UInt64_to_string_hex_pad(uint64_t uu____910);
+
+extern uint64_t FStar_UInt64_of_string(Prims_string uu____921);
+
+extern Prims_int FStar_UInt32_n;
+
+extern bool FStar_UInt32_uu___is_Mk(uint32_t projectee);
+
+extern Prims_int FStar_UInt32___proj__Mk__item__v(uint32_t projectee);
+
+extern Prims_int FStar_UInt32_v(uint32_t x);
+
+extern uint32_t FStar_UInt32_uint_to_t(Prims_int x);
+
+extern uint32_t FStar_UInt32_minus(uint32_t a);
+
+extern uint32_t FStar_UInt32_n_minus_one;
+
+static inline uint32_t
+FStar_UInt32_eq_mask(uint32_t a, uint32_t b)
+{
+ uint32_t x = a ^ b;
+ uint32_t minus_x = ~x + (uint32_t)1U;
+ uint32_t x_or_minus_x = x | minus_x;
+ uint32_t xnx = x_or_minus_x >> (uint32_t)31U;
+ return xnx - (uint32_t)1U;
+}
+
+static inline uint32_t
+FStar_UInt32_gte_mask(uint32_t a, uint32_t b)
+{
+ uint32_t x = a;
+ uint32_t y = b;
+ uint32_t x_xor_y = x ^ y;
+ uint32_t x_sub_y = x - y;
+ uint32_t x_sub_y_xor_y = x_sub_y ^ y;
+ uint32_t q = x_xor_y | x_sub_y_xor_y;
+ uint32_t x_xor_q = x ^ q;
+ uint32_t x_xor_q_ = x_xor_q >> (uint32_t)31U;
+ return x_xor_q_ - (uint32_t)1U;
+}
+
+extern Prims_string FStar_UInt32_to_string(uint32_t uu____888);
+
+extern Prims_string FStar_UInt32_to_string_hex(uint32_t uu____899);
+
+extern Prims_string FStar_UInt32_to_string_hex_pad(uint32_t uu____910);
+
+extern uint32_t FStar_UInt32_of_string(Prims_string uu____921);
+
+extern Prims_int FStar_UInt16_n;
+
+extern bool FStar_UInt16_uu___is_Mk(uint16_t projectee);
+
+extern Prims_int FStar_UInt16___proj__Mk__item__v(uint16_t projectee);
+
+extern Prims_int FStar_UInt16_v(uint16_t x);
+
+extern uint16_t FStar_UInt16_uint_to_t(Prims_int x);
+
+extern uint16_t FStar_UInt16_minus(uint16_t a);
+
+extern uint32_t FStar_UInt16_n_minus_one;
+
+static inline uint16_t
+FStar_UInt16_eq_mask(uint16_t a, uint16_t b)
+{
+ uint16_t x = a ^ b;
+ uint16_t minus_x = ~x + (uint16_t)1U;
+ uint16_t x_or_minus_x = x | minus_x;
+ uint16_t xnx = x_or_minus_x >> (uint32_t)15U;
+ return xnx - (uint16_t)1U;
+}
+
+static inline uint16_t
+FStar_UInt16_gte_mask(uint16_t a, uint16_t b)
+{
+ uint16_t x = a;
+ uint16_t y = b;
+ uint16_t x_xor_y = x ^ y;
+ uint16_t x_sub_y = x - y;
+ uint16_t x_sub_y_xor_y = x_sub_y ^ y;
+ uint16_t q = x_xor_y | x_sub_y_xor_y;
+ uint16_t x_xor_q = x ^ q;
+ uint16_t x_xor_q_ = x_xor_q >> (uint32_t)15U;
+ return x_xor_q_ - (uint16_t)1U;
+}
+
+extern Prims_string FStar_UInt16_to_string(uint16_t uu____888);
+
+extern Prims_string FStar_UInt16_to_string_hex(uint16_t uu____899);
+
+extern Prims_string FStar_UInt16_to_string_hex_pad(uint16_t uu____910);
+
+extern uint16_t FStar_UInt16_of_string(Prims_string uu____921);
+
+extern Prims_int FStar_UInt8_n;
+
+extern bool FStar_UInt8_uu___is_Mk(uint8_t projectee);
+
+extern Prims_int FStar_UInt8___proj__Mk__item__v(uint8_t projectee);
+
+extern Prims_int FStar_UInt8_v(uint8_t x);
+
+extern uint8_t FStar_UInt8_uint_to_t(Prims_int x);
+
+extern uint8_t FStar_UInt8_minus(uint8_t a);
+
+extern uint32_t FStar_UInt8_n_minus_one;
+
+static inline uint8_t
+FStar_UInt8_eq_mask(uint8_t a, uint8_t b)
+{
+ uint8_t x = a ^ b;
+ uint8_t minus_x = ~x + (uint8_t)1U;
+ uint8_t x_or_minus_x = x | minus_x;
+ uint8_t xnx = x_or_minus_x >> (uint32_t)7U;
+ return xnx - (uint8_t)1U;
+}
+
+static inline uint8_t
+FStar_UInt8_gte_mask(uint8_t a, uint8_t b)
+{
+ uint8_t x = a;
+ uint8_t y = b;
+ uint8_t x_xor_y = x ^ y;
+ uint8_t x_sub_y = x - y;
+ uint8_t x_sub_y_xor_y = x_sub_y ^ y;
+ uint8_t q = x_xor_y | x_sub_y_xor_y;
+ uint8_t x_xor_q = x ^ q;
+ uint8_t x_xor_q_ = x_xor_q >> (uint32_t)7U;
+ return x_xor_q_ - (uint8_t)1U;
+}
+
+extern Prims_string FStar_UInt8_to_string(uint8_t uu____888);
+
+extern Prims_string FStar_UInt8_to_string_hex(uint8_t uu____899);
+
+extern Prims_string FStar_UInt8_to_string_hex_pad(uint8_t uu____910);
+
+extern uint8_t FStar_UInt8_of_string(Prims_string uu____921);
+
+typedef uint8_t FStar_UInt8_byte;
+
+#define __FStar_UInt_8_16_32_64_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h
new file mode 100644
index 0000000000..a6bff78d99
--- /dev/null
+++ b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License.
+*/
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include "kremlin/internal/compat.h"
+#include "kremlin/lowstar_endianness.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/internal/target.h"
+
+#ifndef __LowStar_Endianness_H
+#define __LowStar_Endianness_H
+
+#include "FStar_UInt128.h"
+
+static inline void store128_le(uint8_t *x0, FStar_UInt128_uint128 x1);
+
+static inline FStar_UInt128_uint128 load128_le(uint8_t *x0);
+
+static inline void store128_be(uint8_t *x0, FStar_UInt128_uint128 x1);
+
+static inline FStar_UInt128_uint128 load128_be(uint8_t *x0);
+
+#define __LowStar_Endianness_H_DEFINED
+#endif
diff --git a/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h
new file mode 100644
index 0000000000..5372de42a7
--- /dev/null
+++ b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h
@@ -0,0 +1,220 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+/******************************************************************************/
+/* Machine integers (128-bit arithmetic) */
+/******************************************************************************/
+
+/* This header contains two things.
+ *
+ * First, an implementation of 128-bit arithmetic suitable for 64-bit GCC and
+ * Clang, i.e. all the operations from FStar.UInt128.
+ *
+ * Second, 128-bit operations from C.Endianness (or LowStar.Endianness),
+ * suitable for any compiler and platform (via a series of ifdefs). This second
+ * part is unfortunate, and should be fixed by moving {load,store}128_{be,le} to
+ * FStar.UInt128 to avoid a maze of preprocessor guards and hand-written code.
+ * */
+
+/* This file is used for both the minimal and generic kremlib distributions. As
+ * such, it assumes that the machine integers have been bundled the exact same
+ * way in both cases. */
+
+#include "FStar_UInt128.h"
+#include "FStar_UInt_8_16_32_64.h"
+#include "LowStar_Endianness.h"
+
+/* GCC + using native unsigned __int128 support */
+
+inline static uint128_t
+load128_le(uint8_t *b)
+{
+ uint128_t l = (uint128_t)load64_le(b);
+ uint128_t h = (uint128_t)load64_le(b + 8);
+ return (h << 64 | l);
+}
+
+inline static void
+store128_le(uint8_t *b, uint128_t n)
+{
+ store64_le(b, (uint64_t)n);
+ store64_le(b + 8, (uint64_t)(n >> 64));
+}
+
+inline static uint128_t
+load128_be(uint8_t *b)
+{
+ uint128_t h = (uint128_t)load64_be(b);
+ uint128_t l = (uint128_t)load64_be(b + 8);
+ return (h << 64 | l);
+}
+
+inline static void
+store128_be(uint8_t *b, uint128_t n)
+{
+ store64_be(b, (uint64_t)(n >> 64));
+ store64_be(b + 8, (uint64_t)n);
+}
+
+inline static uint128_t
+FStar_UInt128_add(uint128_t x, uint128_t y)
+{
+ return x + y;
+}
+
+inline static uint128_t
+FStar_UInt128_mul(uint128_t x, uint128_t y)
+{
+ return x * y;
+}
+
+inline static uint128_t
+FStar_UInt128_add_mod(uint128_t x, uint128_t y)
+{
+ return x + y;
+}
+
+inline static uint128_t
+FStar_UInt128_sub(uint128_t x, uint128_t y)
+{
+ return x - y;
+}
+
+inline static uint128_t
+FStar_UInt128_sub_mod(uint128_t x, uint128_t y)
+{
+ return x - y;
+}
+
+inline static uint128_t
+FStar_UInt128_logand(uint128_t x, uint128_t y)
+{
+ return x & y;
+}
+
+inline static uint128_t
+FStar_UInt128_logor(uint128_t x, uint128_t y)
+{
+ return x | y;
+}
+
+inline static uint128_t
+FStar_UInt128_logxor(uint128_t x, uint128_t y)
+{
+ return x ^ y;
+}
+
+inline static uint128_t
+FStar_UInt128_lognot(uint128_t x)
+{
+ return ~x;
+}
+
+inline static uint128_t
+FStar_UInt128_shift_left(uint128_t x, uint32_t y)
+{
+ return x << y;
+}
+
+inline static uint128_t
+FStar_UInt128_shift_right(uint128_t x, uint32_t y)
+{
+ return x >> y;
+}
+
+inline static uint128_t
+FStar_UInt128_uint64_to_uint128(uint64_t x)
+{
+ return (uint128_t)x;
+}
+
+inline static uint64_t
+FStar_UInt128_uint128_to_uint64(uint128_t x)
+{
+ return (uint64_t)x;
+}
+
+inline static uint128_t
+FStar_UInt128_mul_wide(uint64_t x, uint64_t y)
+{
+ return ((uint128_t)x) * y;
+}
+
+inline static uint128_t
+FStar_UInt128_eq_mask(uint128_t x, uint128_t y)
+{
+ uint64_t mask =
+ FStar_UInt64_eq_mask((uint64_t)(x >> 64), (uint64_t)(y >> 64)) &
+ FStar_UInt64_eq_mask(x, y);
+ return ((uint128_t)mask) << 64 | mask;
+}
+
+inline static uint128_t
+FStar_UInt128_gte_mask(uint128_t x, uint128_t y)
+{
+ uint64_t mask =
+ (FStar_UInt64_gte_mask(x >> 64, y >> 64) &
+ ~(FStar_UInt64_eq_mask(x >> 64, y >> 64))) |
+ (FStar_UInt64_eq_mask(x >> 64, y >> 64) & FStar_UInt64_gte_mask(x, y));
+ return ((uint128_t)mask) << 64 | mask;
+}
+
+inline static uint64_t
+FStar_UInt128___proj__Mkuint128__item__low(uint128_t x)
+{
+ return (uint64_t)x;
+}
+
+inline static uint64_t
+FStar_UInt128___proj__Mkuint128__item__high(uint128_t x)
+{
+ return (uint64_t)(x >> 64);
+}
+
+inline static uint128_t
+FStar_UInt128_add_underspec(uint128_t x, uint128_t y)
+{
+ return x + y;
+}
+
+inline static uint128_t
+FStar_UInt128_sub_underspec(uint128_t x, uint128_t y)
+{
+ return x - y;
+}
+
+inline static bool
+FStar_UInt128_eq(uint128_t x, uint128_t y)
+{
+ return x == y;
+}
+
+inline static bool
+FStar_UInt128_gt(uint128_t x, uint128_t y)
+{
+ return x > y;
+}
+
+inline static bool
+FStar_UInt128_lt(uint128_t x, uint128_t y)
+{
+ return x < y;
+}
+
+inline static bool
+FStar_UInt128_gte(uint128_t x, uint128_t y)
+{
+ return x >= y;
+}
+
+inline static bool
+FStar_UInt128_lte(uint128_t x, uint128_t y)
+{
+ return x <= y;
+}
+
+inline static uint128_t
+FStar_UInt128_mul32(uint64_t x, uint32_t y)
+{
+ return (uint128_t)x * (uint128_t)y;
+}
diff --git a/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h
new file mode 100644
index 0000000000..fca0e2d962
--- /dev/null
+++ b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h
@@ -0,0 +1,528 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+/* This file was generated by KreMLin <https://github.com/FStarLang/kremlin>
+ * then hand-edited to use MSVC intrinsics KreMLin invocation:
+ * C:\users\barrybo\mitls2c\kremlin\_build\src\Kremlin.native -minimal -fnouint128 C:/users/barrybo/mitls2c/FStar/ulib/FStar.UInt128.fst -tmpdir ../secure_api/out/runtime_switch/uint128 -skip-compilation -add-include "kremlib0.h" -drop FStar.Int.Cast.Full -bundle FStar.UInt128=FStar.*,Prims
+ * F* version: 15104ff8
+ * KreMLin version: 318b7fa8
+ */
+#include "kremlin/internal/types.h"
+#include "FStar_UInt128.h"
+#include "FStar_UInt_8_16_32_64.h"
+
+#ifndef _MSC_VER
+#error This file only works with the MSVC compiler
+#endif
+
+/* JP: need to rip out HAS_OPTIMIZED since the header guards in types.h are now
+ * done properly and only include this file when we know for sure we are on
+ * 64-bit MSVC. */
+
+#if defined(_M_X64) && !defined(KRML_VERIFIED_UINT128)
+#define HAS_OPTIMIZED 1
+#else
+#define HAS_OPTIMIZED 0
+#endif
+
+// Define .low and .high in terms of the __m128i fields, to reduce
+// the amount of churn in this file.
+#if HAS_OPTIMIZED
+#include <intrin.h>
+#include <immintrin.h>
+#define low m128i_u64[0]
+#define high m128i_u64[1]
+#endif
+
+inline static FStar_UInt128_uint128
+load128_le(uint8_t *b)
+{
+#if HAS_OPTIMIZED
+ return _mm_loadu_si128((__m128i *)b);
+#else
+ return (
+ (FStar_UInt128_uint128){.low = load64_le(b), .high = load64_le(b + 8) });
+#endif
+}
+
+inline static void
+store128_le(uint8_t *b, FStar_UInt128_uint128 n)
+{
+ store64_le(b, n.low);
+ store64_le(b + 8, n.high);
+}
+
+inline static FStar_UInt128_uint128
+load128_be(uint8_t *b)
+{
+ uint64_t l = load64_be(b + 8);
+ uint64_t h = load64_be(b);
+#if HAS_OPTIMIZED
+ return _mm_set_epi64x(h, l);
+#else
+ return ((FStar_UInt128_uint128){.low = l, .high = h });
+#endif
+}
+
+inline static void
+store128_be(uint8_t *b, uint128_t n)
+{
+ store64_be(b, n.high);
+ store64_be(b + 8, n.low);
+}
+
+inline static uint64_t
+FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b)
+{
+ return (a ^ (a ^ b | a - b ^ b)) >> (uint32_t)63U;
+}
+
+inline static uint64_t
+FStar_UInt128_carry(uint64_t a, uint64_t b)
+{
+ return FStar_UInt128_constant_time_carry(a, b);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ uint64_t l, h;
+
+ unsigned char carry =
+ _addcarry_u64(0, a.low, b.low, &l); // low/CF = a.low+b.low+0
+ _addcarry_u64(carry, a.high, b.high, &h); // high = a.high+b.high+CF
+ return _mm_set_epi64x(h, l);
+#else
+ return ((FStar_UInt128_uint128){
+ .low = a.low + b.low,
+ .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) });
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return FStar_UInt128_add(a, b);
+#else
+ return ((FStar_UInt128_uint128){
+ .low = a.low + b.low,
+ .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) });
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return FStar_UInt128_add(a, b);
+#else
+ return ((FStar_UInt128_uint128){
+ .low = a.low + b.low,
+ .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) });
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ uint64_t l, h;
+
+ unsigned char borrow = _subborrow_u64(0, a.low, b.low, &l);
+ _subborrow_u64(borrow, a.high, b.high, &h);
+ return _mm_set_epi64x(h, l);
+#else
+ return ((FStar_UInt128_uint128){
+ .low = a.low - b.low,
+ .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) });
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return FStar_UInt128_sub(a, b);
+#else
+ return ((FStar_UInt128_uint128){
+ .low = a.low - b.low,
+ .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) });
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return ((FStar_UInt128_uint128){
+ .low = a.low - b.low,
+ .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) });
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return FStar_UInt128_sub(a, b);
+#else
+ return FStar_UInt128_sub_mod_impl(a, b);
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return _mm_and_si128(a, b);
+#else
+ return (
+ (FStar_UInt128_uint128){.low = a.low & b.low, .high = a.high & b.high });
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return _mm_xor_si128(a, b);
+#else
+ return (
+ (FStar_UInt128_uint128){.low = a.low ^ b.low, .high = a.high ^ b.high });
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ return _mm_or_si128(a, b);
+#else
+ return (
+ (FStar_UInt128_uint128){.low = a.low | b.low, .high = a.high | b.high });
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_lognot(FStar_UInt128_uint128 a)
+{
+#if HAS_OPTIMIZED
+ return _mm_andnot_si128(a, a);
+#else
+ return ((FStar_UInt128_uint128){.low = ~a.low, .high = ~a.high });
+#endif
+}
+
+static const uint32_t FStar_UInt128_u32_64 = (uint32_t)64U;
+
+inline static uint64_t
+FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return (hi << s) + (lo >> FStar_UInt128_u32_64 - s);
+}
+
+inline static uint64_t
+FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return FStar_UInt128_add_u64_shift_left(hi, lo, s);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+ if (s == (uint32_t)0U)
+ return a;
+ else
+ return ((FStar_UInt128_uint128){
+ .low = a.low << s,
+ .high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s) });
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+ return ((FStar_UInt128_uint128){.low = (uint64_t)0U,
+ .high = a.low << s - FStar_UInt128_u32_64 });
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s)
+{
+#if HAS_OPTIMIZED
+ if (s == 0) {
+ return a;
+ } else if (s < FStar_UInt128_u32_64) {
+ uint64_t l = a.low << s;
+ uint64_t h = __shiftleft128(a.low, a.high, (unsigned char)s);
+ return _mm_set_epi64x(h, l);
+ } else {
+ return _mm_set_epi64x(a.low << (s - FStar_UInt128_u32_64), 0);
+ }
+#else
+ if (s < FStar_UInt128_u32_64)
+ return FStar_UInt128_shift_left_small(a, s);
+ else
+ return FStar_UInt128_shift_left_large(a, s);
+#endif
+}
+
+inline static uint64_t
+FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return (lo >> s) + (hi << FStar_UInt128_u32_64 - s);
+}
+
+inline static uint64_t
+FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+ return FStar_UInt128_add_u64_shift_right(hi, lo, s);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+ if (s == (uint32_t)0U)
+ return a;
+ else
+ return ((FStar_UInt128_uint128){
+ .low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s),
+ .high = a.high >> s });
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+ return ((FStar_UInt128_uint128){.low = a.high >> s - FStar_UInt128_u32_64,
+ .high = (uint64_t)0U });
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s)
+{
+#if HAS_OPTIMIZED
+ if (s == 0) {
+ return a;
+ } else if (s < FStar_UInt128_u32_64) {
+ uint64_t l = __shiftright128(a.low, a.high, (unsigned char)s);
+ uint64_t h = a.high >> s;
+ return _mm_set_epi64x(h, l);
+ } else {
+ return _mm_set_epi64x(0, a.high >> (s - FStar_UInt128_u32_64));
+ }
+#else
+ if (s < FStar_UInt128_u32_64)
+ return FStar_UInt128_shift_right_small(a, s);
+ else
+ return FStar_UInt128_shift_right_large(a, s);
+#endif
+}
+
+inline static bool
+FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.low == b.low && a.high == b.high;
+}
+
+inline static bool
+FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high > b.high || a.high == b.high && a.low > b.low;
+}
+
+inline static bool
+FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high < b.high || a.high == b.high && a.low < b.low;
+}
+
+inline static bool
+FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high > b.high || a.high == b.high && a.low >= b.low;
+}
+
+inline static bool
+FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+ return a.high < b.high || a.high == b.high && a.low <= b.low;
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED
+ // PCMPW to produce 4 32-bit values, all either 0x0 or 0xffffffff
+ __m128i r32 = _mm_cmpeq_epi32(a, b);
+ // Shuffle 3,2,1,0 into 2,3,0,1 (swapping dwords inside each half)
+ __m128i s32 = _mm_shuffle_epi32(r32, _MM_SHUFFLE(2, 3, 0, 1));
+ // Bitwise and to compute (3&2),(2&3),(1&0),(0&1)
+ __m128i ret64 = _mm_and_si128(r32, s32);
+ // Swap the two 64-bit values to form s64
+ __m128i s64 =
+ _mm_shuffle_epi32(ret64, _MM_SHUFFLE(1, 0, 3, 2)); // 3,2,1,0 -> 1,0,3,2
+ // And them together
+ return _mm_and_si128(ret64, s64);
+#else
+ return (
+ (FStar_UInt128_uint128){.low = FStar_UInt64_eq_mask(a.low, b.low) &
+ FStar_UInt64_eq_mask(a.high, b.high),
+ .high = FStar_UInt64_eq_mask(a.low, b.low) &
+ FStar_UInt64_eq_mask(a.high, b.high) });
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+#if HAS_OPTIMIZED && 0
+ // ge - compare 3,2,1,0 for >= and generating 0 or 0xffffffff for each
+ // eq - compare 3,2,1,0 for == and generating 0 or 0xffffffff for each
+ // slot 0 = ge0 | (eq0 & ge1) | (eq0 & eq1 & ge2) | (eq0 & eq1 & eq2 & ge3)
+ // then splat slot 0 to 3,2,1,0
+ __m128i gt = _mm_cmpgt_epi32(a, b);
+ __m128i eq = _mm_cmpeq_epi32(a, b);
+ __m128i ge = _mm_or_si128(gt, eq);
+ __m128i ge0 = ge;
+ __m128i eq0 = eq;
+ __m128i ge1 = _mm_srli_si128(ge, 4); // shift ge from 3,2,1,0 to 0x0,3,2,1
+ __m128i t1 = _mm_and_si128(eq0, ge1);
+ __m128i ret = _mm_or_si128(ge, t1); // ge0 | (eq0 & ge1) is now in 0
+ __m128i eq1 = _mm_srli_si128(eq, 4); // shift eq from 3,2,1,0 to 0x0,3,2,1
+ __m128i ge2 =
+ _mm_srli_si128(ge1, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,3,2
+ __m128i t2 =
+ _mm_and_si128(eq0, _mm_and_si128(eq1, ge2)); // t2 = (eq0 & eq1 & ge2)
+ ret = _mm_or_si128(ret, t2);
+ __m128i eq2 = _mm_srli_si128(eq1, 4); // shift eq from 3,2,1,0 to 0x0,00,00,3
+ __m128i ge3 =
+ _mm_srli_si128(ge2, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,0x0,3
+ __m128i t3 = _mm_and_si128(
+ eq0, _mm_and_si128(
+ eq1, _mm_and_si128(eq2, ge3))); // t3 = (eq0 & eq1 & eq2 & ge3)
+ ret = _mm_or_si128(ret, t3);
+ return _mm_shuffle_epi32(
+ ret,
+ _MM_SHUFFLE(0, 0, 0, 0)); // the result is in 0. Shuffle into all dwords.
+#else
+ return ((FStar_UInt128_uint128){
+ .low = FStar_UInt64_gte_mask(a.high, b.high) &
+ ~FStar_UInt64_eq_mask(a.high, b.high) |
+ FStar_UInt64_eq_mask(a.high, b.high) &
+ FStar_UInt64_gte_mask(a.low, b.low),
+ .high = FStar_UInt64_gte_mask(a.high, b.high) &
+ ~FStar_UInt64_eq_mask(a.high, b.high) |
+ FStar_UInt64_eq_mask(a.high, b.high) &
+ FStar_UInt64_gte_mask(a.low, b.low) });
+#endif
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_uint64_to_uint128(uint64_t a)
+{
+#if HAS_OPTIMIZED
+ return _mm_set_epi64x(0, a);
+#else
+ return ((FStar_UInt128_uint128){.low = a, .high = (uint64_t)0U });
+#endif
+}
+
+inline static uint64_t
+FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a)
+{
+ return a.low;
+}
+
+inline static uint64_t
+FStar_UInt128_u64_mod_32(uint64_t a)
+{
+ return a & (uint64_t)0xffffffffU;
+}
+
+static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U;
+
+inline static uint64_t
+FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo)
+{
+ return lo + (hi << FStar_UInt128_u32_32);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_mul32(uint64_t x, uint32_t y)
+{
+#if HAS_OPTIMIZED
+ uint64_t l, h;
+ l = _umul128(x, (uint64_t)y, &h);
+ return _mm_set_epi64x(h, l);
+#else
+ return ((FStar_UInt128_uint128){
+ .low = FStar_UInt128_u32_combine(
+ (x >> FStar_UInt128_u32_32) * (uint64_t)y +
+ (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >>
+ FStar_UInt128_u32_32),
+ FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)),
+ .high = (x >> FStar_UInt128_u32_32) * (uint64_t)y +
+ (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >>
+ FStar_UInt128_u32_32) >>
+ FStar_UInt128_u32_32 });
+#endif
+}
+
+/* Note: static headers bring scope collision issues when they define types!
+ * Because now client (kremlin-generated) code will include this header and
+ * there might be type collisions if the client code uses quadruples of uint64s.
+ * So, we cannot use the kremlin-generated name. */
+typedef struct K_quad_s {
+ uint64_t fst;
+ uint64_t snd;
+ uint64_t thd;
+ uint64_t f3;
+} K_quad;
+
+inline static K_quad
+FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y)
+{
+ return ((K_quad){
+ .fst = FStar_UInt128_u64_mod_32(x),
+ .snd = FStar_UInt128_u64_mod_32(
+ FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)),
+ .thd = x >> FStar_UInt128_u32_32,
+ .f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) +
+ (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >>
+ FStar_UInt128_u32_32) });
+}
+
+static uint64_t
+FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo)
+{
+ return lo + (hi << FStar_UInt128_u32_32);
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y)
+{
+ K_quad scrut =
+ FStar_UInt128_mul_wide_impl_t_(x, y);
+ uint64_t u1 = scrut.fst;
+ uint64_t w3 = scrut.snd;
+ uint64_t x_ = scrut.thd;
+ uint64_t t_ = scrut.f3;
+ return ((FStar_UInt128_uint128){
+ .low = FStar_UInt128_u32_combine_(
+ u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), w3),
+ .high =
+ x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) +
+ (u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_) >>
+ FStar_UInt128_u32_32) });
+}
+
+inline static FStar_UInt128_uint128
+FStar_UInt128_mul_wide(uint64_t x, uint64_t y)
+{
+#if HAS_OPTIMIZED
+ uint64_t l, h;
+ l = _umul128(x, y, &h);
+ return _mm_set_epi64x(h, l);
+#else
+ return FStar_UInt128_mul_wide_impl(x, y);
+#endif
+}
diff --git a/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_struct_endianness.h b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_struct_endianness.h
new file mode 100644
index 0000000000..61fe85c49e
--- /dev/null
+++ b/security/nss/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_struct_endianness.h
@@ -0,0 +1,84 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+ Licensed under the Apache 2.0 License. */
+
+#ifndef FSTAR_UINT128_STRUCT_ENDIANNESS_H
+#define FSTAR_UINT128_STRUCT_ENDIANNESS_H
+
+/* Hand-written implementation of endianness-related uint128 functions
+ * for the extracted uint128 implementation */
+
+/* Access 64-bit fields within the int128. */
+#define HIGH64_OF(x) ((x)->high)
+#define LOW64_OF(x) ((x)->low)
+
+/* A series of definitions written using pointers. */
+
+inline static void
+load128_le_(uint8_t *b, uint128_t *r)
+{
+ LOW64_OF(r) = load64_le(b);
+ HIGH64_OF(r) = load64_le(b + 8);
+}
+
+inline static void
+store128_le_(uint8_t *b, uint128_t *n)
+{
+ store64_le(b, LOW64_OF(n));
+ store64_le(b + 8, HIGH64_OF(n));
+}
+
+inline static void
+load128_be_(uint8_t *b, uint128_t *r)
+{
+ HIGH64_OF(r) = load64_be(b);
+ LOW64_OF(r) = load64_be(b + 8);
+}
+
+inline static void
+store128_be_(uint8_t *b, uint128_t *n)
+{
+ store64_be(b, HIGH64_OF(n));
+ store64_be(b + 8, LOW64_OF(n));
+}
+
+#ifndef KRML_NOSTRUCT_PASSING
+
+inline static uint128_t
+load128_le(uint8_t *b)
+{
+ uint128_t r;
+ load128_le_(b, &r);
+ return r;
+}
+
+inline static void
+store128_le(uint8_t *b, uint128_t n)
+{
+ store128_le_(b, &n);
+}
+
+inline static uint128_t
+load128_be(uint8_t *b)
+{
+ uint128_t r;
+ load128_be_(b, &r);
+ return r;
+}
+
+inline static void
+store128_be(uint8_t *b, uint128_t n)
+{
+ store128_be_(b, &n);
+}
+
+#else /* !defined(KRML_STRUCT_PASSING) */
+
+#define print128 print128_
+#define load128_le load128_le_
+#define store128_le store128_le_
+#define load128_be load128_be_
+#define store128_be store128_be_
+
+#endif /* KRML_STRUCT_PASSING */
+
+#endif
diff --git a/security/nss/lib/freebl/verified/libintvector.h b/security/nss/lib/freebl/verified/libintvector.h
new file mode 100644
index 0000000000..24a2217860
--- /dev/null
+++ b/security/nss/lib/freebl/verified/libintvector.h
@@ -0,0 +1,586 @@
+#ifndef __Vec_Intrin_H
+#define __Vec_Intrin_H
+
+#include <sys/types.h>
+
+#define Lib_IntVector_Intrinsics_bit_mask64(x) -((x)&1)
+
+#if defined(__x86_64__) || defined(_M_X64)
+
+// The following functions are only available on machines that support Intel AVX
+
+#include <emmintrin.h>
+#include <tmmintrin.h>
+#include <smmintrin.h>
+
+typedef __m128i Lib_IntVector_Intrinsics_vec128;
+
+#define Lib_IntVector_Intrinsics_ni_aes_enc(x0, x1) \
+ (_mm_aesenc_si128(x0, x1))
+
+#define Lib_IntVector_Intrinsics_ni_aes_enc_last(x0, x1) \
+ (_mm_aesenclast_si128(x0, x1))
+
+#define Lib_IntVector_Intrinsics_ni_aes_keygen_assist(x0, x1) \
+ (_mm_aeskeygenassist_si128(x0, x1))
+
+#define Lib_IntVector_Intrinsics_ni_clmul(x0, x1, x2) \
+ (_mm_clmulepi64_si128(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \
+ (_mm_xor_si128(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \
+ (_mm_cmpeq_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \
+ (_mm_cmpeq_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \
+ (_mm_cmpgt_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \
+ (_mm_cmpgt_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \
+ (_mm_or_si128(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \
+ (_mm_and_si128(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \
+ (_mm_xor_si128(x0, _mm_set1_epi32(-1)))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \
+ (_mm_slli_si128(x0, (x1) / 8))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \
+ (_mm_srli_si128(x0, (x1) / 8))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \
+ (_mm_slli_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \
+ (_mm_srli_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \
+ (_mm_slli_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \
+ (_mm_srli_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) \
+ (_mm_shuffle_epi8(x0, _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3)))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) \
+ (_mm_shuffle_epi8(x0, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) \
+ (_mm_shuffle_epi8(x0, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1)))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \
+ (((x1) == 8 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) : ((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : ((x1) == 24 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) : _mm_xor_si128(_mm_slli_epi32(x0, x1), _mm_srli_epi32(x0, 32 - (x1)))))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \
+ (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, 32 - (x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \
+ (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x4, x3, x2, x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \
+ (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2 * x1 + 1, 2 * x1, 2 * x2 + 1, 2 * x2)))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \
+ (_mm_shuffle_epi32(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4)))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \
+ (_mm_shuffle_epi32(x0, _MM_SHUFFLE((2 * x1 + 3) % 4, (2 * x1 + 2) % 4, (2 * x1 + 1) % 4, (2 * x1) % 4)))
+
+#define Lib_IntVector_Intrinsics_vec128_load_le(x0) \
+ (_mm_loadu_si128((__m128i*)(x0)))
+
+#define Lib_IntVector_Intrinsics_vec128_store_le(x0, x1) \
+ (_mm_storeu_si128((__m128i*)(x0), x1))
+
+#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \
+ (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))
+
+#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \
+ (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))
+
+#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \
+ (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))
+
+#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \
+ (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))))
+
+#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \
+ (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))))
+
+#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \
+ (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))))
+
+#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \
+ (_mm_insert_epi8(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \
+ (_mm_insert_epi32(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \
+ (_mm_insert_epi64(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \
+ (_mm_extract_epi8(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \
+ (_mm_extract_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \
+ (_mm_extract_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_zero \
+ (_mm_setzero_si128())
+
+#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \
+ (_mm_add_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \
+ (_mm_sub_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \
+ (_mm_mul_epu32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \
+ (_mm_mul_epu32(x0, _mm_set1_epi64x(x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \
+ (_mm_add_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \
+ (_mm_sub_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \
+ (_mm_mullo_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \
+ (_mm_mullo_epi32(x0, _mm_set1_epi32(x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_load128(x) \
+ ((__m128i)x)
+
+#define Lib_IntVector_Intrinsics_vec128_load64(x) \
+ (_mm_set1_epi64x(x)) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec128_load64s(x0, x1) \
+ (_mm_set_epi64x(x1, x0)) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec128_load32(x) \
+ (_mm_set1_epi32(x))
+
+#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \
+ (_mm_set_epi32(x3, x2, x1, x0)) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \
+ (_mm_unpacklo_epi32(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \
+ (_mm_unpackhi_epi32(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \
+ (_mm_unpacklo_epi64(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \
+ (_mm_unpackhi_epi64(x1, x2))
+
+// The following functions are only available on machines that support Intel AVX2
+
+#include <immintrin.h>
+#include <wmmintrin.h>
+
+typedef __m256i Lib_IntVector_Intrinsics_vec256;
+
+#define Lib_IntVector_Intrinsics_vec256_eq64(x0, x1) \
+ (_mm256_cmpeq_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_eq32(x0, x1) \
+ (_mm256_cmpeq_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_gt64(x0, x1) \
+ (_mm256_cmpgt_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_gt32(x0, x1) \
+ (_mm256_cmpgt_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_xor(x0, x1) \
+ (_mm256_xor_si256(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_or(x0, x1) \
+ (_mm256_or_si256(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_and(x0, x1) \
+ (_mm256_and_si256(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_lognot(x0) \
+ (_mm256_xor_si256(x0, _mm256_set1_epi32(-1)))
+
+#define Lib_IntVector_Intrinsics_vec256_shift_left(x0, x1) \
+ (_mm256_slli_si256(x0, (x1) / 8))
+
+#define Lib_IntVector_Intrinsics_vec256_shift_right(x0, x1) \
+ (_mm256_srli_si256(x0, (x1) / 8))
+
+#define Lib_IntVector_Intrinsics_vec256_shift_left64(x0, x1) \
+ (_mm256_slli_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_shift_right64(x0, x1) \
+ (_mm256_srli_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_shift_left32(x0, x1) \
+ (_mm256_slli_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_shift_right32(x0, x1) \
+ (_mm256_srli_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1, 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, x1) \
+ ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) : _mm256_or_si256(_mm256_slli_epi32(x0, x1), _mm256_srli_epi32(x0, 32 - (x1)))))))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right32(x0, x1) \
+ (Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, 32 - (x1)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1, 8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2, 9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3, 10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5, 12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6, 13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) \
+ (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7, 14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, x1) \
+ ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) : (x1 == 32 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) : (x1 == 40 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) : (x1 == 48 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) : (x1 == 56 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) : _mm256_xor_si256(_mm256_srli_epi64((x0), (x1)), _mm256_slli_epi64((x0), (64 - (x1))))))))))))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_left64(x0, x1) \
+ (Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, 64 - (x1)))
+
+#define Lib_IntVector_Intrinsics_vec256_shuffle64(x0, x1, x2, x3, x4) \
+ (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE(x4, x3, x2, x1)))
+
+#define Lib_IntVector_Intrinsics_vec256_shuffle32(x0, x1, x2, x3, x4, x5, x6, x7, x8) \
+ (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32(x8, x7, x6, x5, x4, x3, x2, x1)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes32(x0, x1) \
+ (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32((x1 + 7) % 8, (x1 + 6) % 8, (x1 + 5) % 8, (x1 + 4) % 8, (x1 + 3 % 8), (x1 + 2) % 8, (x1 + 1) % 8, x1 % 8)))
+
+#define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(x0, x1) \
+ (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4)))
+
+#define Lib_IntVector_Intrinsics_vec256_load_le(x0) \
+ (_mm256_loadu_si256((__m256i*)(x0)))
+
+#define Lib_IntVector_Intrinsics_vec256_load32_be(x0) \
+ (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))
+
+#define Lib_IntVector_Intrinsics_vec256_load64_be(x0) \
+ (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))
+
+#define Lib_IntVector_Intrinsics_vec256_store_le(x0, x1) \
+ (_mm256_storeu_si256((__m256i*)(x0), x1))
+
+#define Lib_IntVector_Intrinsics_vec256_store32_be(x0, x1) \
+ (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))))
+
+#define Lib_IntVector_Intrinsics_vec256_store64_be(x0, x1) \
+ (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))))
+
+#define Lib_IntVector_Intrinsics_vec256_insert8(x0, x1, x2) \
+ (_mm256_insert_epi8(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_insert32(x0, x1, x2) \
+ (_mm256_insert_epi32(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_insert64(x0, x1, x2) \
+ (_mm256_insert_epi64(x0, x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_extract8(x0, x1) \
+ (_mm256_extract_epi8(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_extract32(x0, x1) \
+ (_mm256_extract_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_extract64(x0, x1) \
+ (_mm256_extract_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_zero \
+ (_mm256_setzero_si256())
+
+#define Lib_IntVector_Intrinsics_vec256_add64(x0, x1) \
+ (_mm256_add_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_sub64(x0, x1) \
+ (_mm256_sub_epi64(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_mul64(x0, x1) \
+ (_mm256_mul_epu32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_smul64(x0, x1) \
+ (_mm256_mul_epu32(x0, _mm256_set1_epi64x(x1)))
+
+#define Lib_IntVector_Intrinsics_vec256_add32(x0, x1) \
+ (_mm256_add_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_sub32(x0, x1) \
+ (_mm256_sub_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_mul32(x0, x1) \
+ (_mm256_mullo_epi32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec256_smul32(x0, x1) \
+ (_mm256_mullo_epi32(x0, _mm256_set1_epi32(x1)))
+
+#define Lib_IntVector_Intrinsics_vec256_load64(x1) \
+ (_mm256_set1_epi64x(x1)) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec256_load64s(x0, x1, x2, x3) \
+ (_mm256_set_epi64x(x3, x2, x1, x0)) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec256_load32(x) \
+ (_mm256_set1_epi32(x))
+
+#define Lib_IntVector_Intrinsics_vec256_load32s(x0, x1, x2, x3, x4, x5, x6, x7) \
+ (_mm256_set_epi32(x7, x6, x5, x4, x3, x2, x1, x0)) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec256_load128(x) \
+ (_mm256_set_m128i((__m128i)x))
+
+#define Lib_IntVector_Intrinsics_vec256_load128s(x0, x1) \
+ (_mm256_set_m128i((__m128i)x1, (__m128i)x0))
+
+#define Lib_IntVector_Intrinsics_vec256_interleave_low32(x1, x2) \
+ (_mm256_unpacklo_epi32(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_interleave_high32(x1, x2) \
+ (_mm256_unpackhi_epi32(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_interleave_low64(x1, x2) \
+ (_mm256_unpacklo_epi64(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_interleave_high64(x1, x2) \
+ (_mm256_unpackhi_epi64(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec256_interleave_low128(x1, x2) \
+ (_mm256_permute2x128_si256(x1, x2, 0x20))
+
+#define Lib_IntVector_Intrinsics_vec256_interleave_high128(x1, x2) \
+ (_mm256_permute2x128_si256(x1, x2, 0x31))
+
+#elif defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
+#include <arm_neon.h>
+
+typedef uint32x4_t Lib_IntVector_Intrinsics_vec128;
+
+#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \
+ (veorq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \
+ (vceqq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \
+ (vceqq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \
+ (vcgtq_u32(x0, x1))
+
+#define high32(x0) \
+ (vmovn_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), 32)))
+
+#define low32(x0) \
+ (vmovn_u64(vreinterpretq_u64_u32(x0)))
+
+#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \
+ (vreinterpretq_u32_u64(vmovl_u32(vorr_u32(vcgt_u32(high32(x0), high32(x1)), vand_u32(vceq_u32(high32(x0), high32(x1)), vcgt_u32(low32(x0), low32(x1)))))))
+
+#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \
+ (vorrq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \
+ (vandq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \
+ (vmvnq_u32(x0))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \
+ (vextq_u32(x0, vdupq_n_u8(0), 16 - (x1) / 8))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \
+ (vextq_u32(x0, vdupq_n_u8(0), (x1) / 8))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \
+ (vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x0), x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \
+ (vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \
+ (vshlq_n_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \
+ (vshrq_n_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x1) \
+ (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \
+ (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), (x1)), (x0), 32 - (x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x1) \
+ (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \
+ (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), 32 - (x1)), (x0), (x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \
+ (vextq_u32(x0, x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \
+ (vextq_u64(x0, x0, x1))
+
+/*
+#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \
+ (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x1,x2,x3,x4)))
+
+#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \
+ (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2*x1+1,2*x1,2*x2+1,2*x2)))
+*/
+
+#define Lib_IntVector_Intrinsics_vec128_load_le(x0) \
+ (vld1q_u32((const uint32_t*)(x0)))
+
+#define Lib_IntVector_Intrinsics_vec128_store_le(x0, x1) \
+ (vst1q_u32((uint32_t*)(x0), (x1)))
+
+/*
+#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \
+ ( Lib_IntVector_Intrinsics_vec128 l = vrev64q_u8(vld1q_u32((uint32_t*)(x0)));
+
+*/
+
+#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \
+ (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0))))))
+
+#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \
+ (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0))))))
+
+/*
+#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \
+ (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))))
+*/
+
+#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \
+ (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x1))))))
+
+#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \
+ (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(x1))))))
+
+#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \
+ (vsetq_lane_u8(x1, x0, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \
+ (vsetq_lane_u32(x1, x0, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \
+ (vreinterpretq_u32_u64(vsetq_lane_u64(x1, vreinterpretq_u64_u32(x0), x2)))
+
+#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \
+ (vgetq_lane_u8(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \
+ (vgetq_lane_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \
+ (vgetq_lane_u64(vreinterpretq_u64_u32(x0), x1))
+
+#define Lib_IntVector_Intrinsics_vec128_zero \
+ (vdupq_n_u32(0))
+
+#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \
+ (vreinterpretq_u32_u64(vaddq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \
+ (vreinterpretq_u32_u64(vsubq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1))))
+
+#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \
+ (vreinterpretq_u32_u64(vmull_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), vmovn_u64(vreinterpretq_u64_u32(x1)))))
+
+#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \
+ (vreinterpretq_u32_u64(vmull_n_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), (uint32_t)x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \
+ (vaddq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \
+ (vsubq_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \
+ (vmulq_lane_u32(x0, x1))
+
+#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \
+ (vmulq_lane_u32(x0, vdupq_n_u32(x1)))
+
+#define Lib_IntVector_Intrinsics_vec128_load128(x) \
+ ((uint32x4_t)(x))
+
+#define Lib_IntVector_Intrinsics_vec128_load64(x) \
+ (vreinterpretq_u32_u64(vdupq_n_u64(x))) /* hi lo */
+
+#define Lib_IntVector_Intrinsics_vec128_load32(x) \
+ (vdupq_n_u32(x)) /* hi lo */
+
+static inline Lib_IntVector_Intrinsics_vec128
+Lib_IntVector_Intrinsics_vec128_load64s(uint64_t x1, uint64_t x2)
+{
+ const uint64_t a[2] = { x1, x2 };
+ return vreinterpretq_u32_u64(vld1q_u64(a));
+}
+
+static inline Lib_IntVector_Intrinsics_vec128
+Lib_IntVector_Intrinsics_vec128_load32s(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4)
+{
+ const uint32_t a[4] = { x1, x2, x3, x4 };
+ return vld1q_u32(a);
+}
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \
+ (vzip1q_u32(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \
+ (vzip2q_u32(x1, x2))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \
+ (vreinterpretq_u32_u64(vzip1q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2))))
+
+#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \
+ (vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2))))
+
+#endif
+#endif