diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /security/nss/lib/freebl/verified | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'security/nss/lib/freebl/verified')
58 files changed, 16935 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/verified/Hacl_Bignum25519_51.h b/security/nss/lib/freebl/verified/Hacl_Bignum25519_51.h new file mode 100644 index 0000000000..d53e43c21d --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Bignum25519_51.h @@ -0,0 +1,678 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Bignum25519_51_H +#define __Hacl_Bignum25519_51_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +static inline void +Hacl_Impl_Curve25519_Field51_fadd(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + uint64_t f10 = f1[0U]; + uint64_t f20 = f2[0U]; + uint64_t f11 = f1[1U]; + uint64_t f21 = f2[1U]; + uint64_t f12 = f1[2U]; + uint64_t f22 = f2[2U]; + uint64_t f13 = f1[3U]; + uint64_t f23 = f2[3U]; + uint64_t f14 = f1[4U]; + uint64_t f24 = f2[4U]; + out[0U] = f10 + f20; + out[1U] = f11 + f21; + out[2U] = f12 + f22; + out[3U] = f13 + f23; + out[4U] = f14 + f24; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fsub(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + uint64_t f10 = f1[0U]; + uint64_t f20 = f2[0U]; + uint64_t f11 = f1[1U]; + uint64_t f21 = f2[1U]; + uint64_t f12 = f1[2U]; + uint64_t f22 = f2[2U]; + uint64_t f13 = f1[3U]; + uint64_t f23 = f2[3U]; + uint64_t f14 = f1[4U]; + uint64_t f24 = f2[4U]; + out[0U] = f10 + (uint64_t)0x3fffffffffff68U - f20; + out[1U] = f11 + (uint64_t)0x3ffffffffffff8U - f21; + out[2U] = f12 + (uint64_t)0x3ffffffffffff8U - f22; + out[3U] = f13 + (uint64_t)0x3ffffffffffff8U - f23; + out[4U] = f14 + (uint64_t)0x3ffffffffffff8U - f24; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fmul( + uint64_t *out, + uint64_t *f1, + uint64_t *f2, + FStar_UInt128_uint128 *uu___) +{ + uint64_t f10 = f1[0U]; + uint64_t f11 = f1[1U]; + uint64_t f12 = f1[2U]; + uint64_t f13 = f1[3U]; + uint64_t f14 = f1[4U]; + uint64_t f20 = f2[0U]; + uint64_t f21 = f2[1U]; + uint64_t f22 = f2[2U]; + uint64_t f23 = f2[3U]; + uint64_t f24 = f2[4U]; + uint64_t tmp1 = f21 * (uint64_t)19U; + uint64_t tmp2 = f22 * (uint64_t)19U; + uint64_t tmp3 = f23 * (uint64_t)19U; + uint64_t tmp4 = f24 * (uint64_t)19U; + FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20); + FStar_UInt128_uint128 o10 = FStar_UInt128_mul_wide(f10, f21); + FStar_UInt128_uint128 o20 = FStar_UInt128_mul_wide(f10, f22); + FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23); + FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24); + FStar_UInt128_uint128 o01 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp4)); + FStar_UInt128_uint128 o11 = FStar_UInt128_add(o10, FStar_UInt128_mul_wide(f11, f20)); + FStar_UInt128_uint128 o21 = FStar_UInt128_add(o20, FStar_UInt128_mul_wide(f11, f21)); + FStar_UInt128_uint128 o31 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22)); + FStar_UInt128_uint128 o41 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23)); + FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f12, tmp3)); + FStar_UInt128_uint128 o12 = FStar_UInt128_add(o11, FStar_UInt128_mul_wide(f12, tmp4)); + FStar_UInt128_uint128 o22 = FStar_UInt128_add(o21, FStar_UInt128_mul_wide(f12, f20)); + FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f12, f21)); + FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f12, f22)); + FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f13, tmp2)); + FStar_UInt128_uint128 o13 = FStar_UInt128_add(o12, FStar_UInt128_mul_wide(f13, tmp3)); + FStar_UInt128_uint128 o23 = FStar_UInt128_add(o22, FStar_UInt128_mul_wide(f13, tmp4)); + FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f13, f20)); + FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f13, f21)); + FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f14, tmp1)); + FStar_UInt128_uint128 o14 = FStar_UInt128_add(o13, FStar_UInt128_mul_wide(f14, tmp2)); + FStar_UInt128_uint128 o24 = FStar_UInt128_add(o23, FStar_UInt128_mul_wide(f14, tmp3)); + FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f14, tmp4)); + FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f14, f20)); + FStar_UInt128_uint128 tmp_w0 = o04; + FStar_UInt128_uint128 tmp_w1 = o14; + FStar_UInt128_uint128 tmp_w2 = o24; + FStar_UInt128_uint128 tmp_w3 = o34; + FStar_UInt128_uint128 tmp_w4 = o44; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp01 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp11 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp21 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp31 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp41 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp01 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t o0 = tmp0_; + uint64_t o1 = tmp11 + c5; + uint64_t o2 = tmp21; + uint64_t o3 = tmp31; + uint64_t o4 = tmp41; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fmul2( + uint64_t *out, + uint64_t *f1, + uint64_t *f2, + FStar_UInt128_uint128 *uu___) +{ + uint64_t f10 = f1[0U]; + uint64_t f11 = f1[1U]; + uint64_t f12 = f1[2U]; + uint64_t f13 = f1[3U]; + uint64_t f14 = f1[4U]; + uint64_t f20 = f2[0U]; + uint64_t f21 = f2[1U]; + uint64_t f22 = f2[2U]; + uint64_t f23 = f2[3U]; + uint64_t f24 = f2[4U]; + uint64_t f30 = f1[5U]; + uint64_t f31 = f1[6U]; + uint64_t f32 = f1[7U]; + uint64_t f33 = f1[8U]; + uint64_t f34 = f1[9U]; + uint64_t f40 = f2[5U]; + uint64_t f41 = f2[6U]; + uint64_t f42 = f2[7U]; + uint64_t f43 = f2[8U]; + uint64_t f44 = f2[9U]; + uint64_t tmp11 = f21 * (uint64_t)19U; + uint64_t tmp12 = f22 * (uint64_t)19U; + uint64_t tmp13 = f23 * (uint64_t)19U; + uint64_t tmp14 = f24 * (uint64_t)19U; + uint64_t tmp21 = f41 * (uint64_t)19U; + uint64_t tmp22 = f42 * (uint64_t)19U; + uint64_t tmp23 = f43 * (uint64_t)19U; + uint64_t tmp24 = f44 * (uint64_t)19U; + FStar_UInt128_uint128 o00 = FStar_UInt128_mul_wide(f10, f20); + FStar_UInt128_uint128 o15 = FStar_UInt128_mul_wide(f10, f21); + FStar_UInt128_uint128 o25 = FStar_UInt128_mul_wide(f10, f22); + FStar_UInt128_uint128 o30 = FStar_UInt128_mul_wide(f10, f23); + FStar_UInt128_uint128 o40 = FStar_UInt128_mul_wide(f10, f24); + FStar_UInt128_uint128 o010 = FStar_UInt128_add(o00, FStar_UInt128_mul_wide(f11, tmp14)); + FStar_UInt128_uint128 o110 = FStar_UInt128_add(o15, FStar_UInt128_mul_wide(f11, f20)); + FStar_UInt128_uint128 o210 = FStar_UInt128_add(o25, FStar_UInt128_mul_wide(f11, f21)); + FStar_UInt128_uint128 o310 = FStar_UInt128_add(o30, FStar_UInt128_mul_wide(f11, f22)); + FStar_UInt128_uint128 o410 = FStar_UInt128_add(o40, FStar_UInt128_mul_wide(f11, f23)); + FStar_UInt128_uint128 o020 = FStar_UInt128_add(o010, FStar_UInt128_mul_wide(f12, tmp13)); + FStar_UInt128_uint128 o120 = FStar_UInt128_add(o110, FStar_UInt128_mul_wide(f12, tmp14)); + FStar_UInt128_uint128 o220 = FStar_UInt128_add(o210, FStar_UInt128_mul_wide(f12, f20)); + FStar_UInt128_uint128 o320 = FStar_UInt128_add(o310, FStar_UInt128_mul_wide(f12, f21)); + FStar_UInt128_uint128 o420 = FStar_UInt128_add(o410, FStar_UInt128_mul_wide(f12, f22)); + FStar_UInt128_uint128 o030 = FStar_UInt128_add(o020, FStar_UInt128_mul_wide(f13, tmp12)); + FStar_UInt128_uint128 o130 = FStar_UInt128_add(o120, FStar_UInt128_mul_wide(f13, tmp13)); + FStar_UInt128_uint128 o230 = FStar_UInt128_add(o220, FStar_UInt128_mul_wide(f13, tmp14)); + FStar_UInt128_uint128 o330 = FStar_UInt128_add(o320, FStar_UInt128_mul_wide(f13, f20)); + FStar_UInt128_uint128 o430 = FStar_UInt128_add(o420, FStar_UInt128_mul_wide(f13, f21)); + FStar_UInt128_uint128 o040 = FStar_UInt128_add(o030, FStar_UInt128_mul_wide(f14, tmp11)); + FStar_UInt128_uint128 o140 = FStar_UInt128_add(o130, FStar_UInt128_mul_wide(f14, tmp12)); + FStar_UInt128_uint128 o240 = FStar_UInt128_add(o230, FStar_UInt128_mul_wide(f14, tmp13)); + FStar_UInt128_uint128 o340 = FStar_UInt128_add(o330, FStar_UInt128_mul_wide(f14, tmp14)); + FStar_UInt128_uint128 o440 = FStar_UInt128_add(o430, FStar_UInt128_mul_wide(f14, f20)); + FStar_UInt128_uint128 tmp_w10 = o040; + FStar_UInt128_uint128 tmp_w11 = o140; + FStar_UInt128_uint128 tmp_w12 = o240; + FStar_UInt128_uint128 tmp_w13 = o340; + FStar_UInt128_uint128 tmp_w14 = o440; + FStar_UInt128_uint128 o0 = FStar_UInt128_mul_wide(f30, f40); + FStar_UInt128_uint128 o1 = FStar_UInt128_mul_wide(f30, f41); + FStar_UInt128_uint128 o2 = FStar_UInt128_mul_wide(f30, f42); + FStar_UInt128_uint128 o3 = FStar_UInt128_mul_wide(f30, f43); + FStar_UInt128_uint128 o4 = FStar_UInt128_mul_wide(f30, f44); + FStar_UInt128_uint128 o01 = FStar_UInt128_add(o0, FStar_UInt128_mul_wide(f31, tmp24)); + FStar_UInt128_uint128 o111 = FStar_UInt128_add(o1, FStar_UInt128_mul_wide(f31, f40)); + FStar_UInt128_uint128 o211 = FStar_UInt128_add(o2, FStar_UInt128_mul_wide(f31, f41)); + FStar_UInt128_uint128 o31 = FStar_UInt128_add(o3, FStar_UInt128_mul_wide(f31, f42)); + FStar_UInt128_uint128 o41 = FStar_UInt128_add(o4, FStar_UInt128_mul_wide(f31, f43)); + FStar_UInt128_uint128 o02 = FStar_UInt128_add(o01, FStar_UInt128_mul_wide(f32, tmp23)); + FStar_UInt128_uint128 o121 = FStar_UInt128_add(o111, FStar_UInt128_mul_wide(f32, tmp24)); + FStar_UInt128_uint128 o221 = FStar_UInt128_add(o211, FStar_UInt128_mul_wide(f32, f40)); + FStar_UInt128_uint128 o32 = FStar_UInt128_add(o31, FStar_UInt128_mul_wide(f32, f41)); + FStar_UInt128_uint128 o42 = FStar_UInt128_add(o41, FStar_UInt128_mul_wide(f32, f42)); + FStar_UInt128_uint128 o03 = FStar_UInt128_add(o02, FStar_UInt128_mul_wide(f33, tmp22)); + FStar_UInt128_uint128 o131 = FStar_UInt128_add(o121, FStar_UInt128_mul_wide(f33, tmp23)); + FStar_UInt128_uint128 o231 = FStar_UInt128_add(o221, FStar_UInt128_mul_wide(f33, tmp24)); + FStar_UInt128_uint128 o33 = FStar_UInt128_add(o32, FStar_UInt128_mul_wide(f33, f40)); + FStar_UInt128_uint128 o43 = FStar_UInt128_add(o42, FStar_UInt128_mul_wide(f33, f41)); + FStar_UInt128_uint128 o04 = FStar_UInt128_add(o03, FStar_UInt128_mul_wide(f34, tmp21)); + FStar_UInt128_uint128 o141 = FStar_UInt128_add(o131, FStar_UInt128_mul_wide(f34, tmp22)); + FStar_UInt128_uint128 o241 = FStar_UInt128_add(o231, FStar_UInt128_mul_wide(f34, tmp23)); + FStar_UInt128_uint128 o34 = FStar_UInt128_add(o33, FStar_UInt128_mul_wide(f34, tmp24)); + FStar_UInt128_uint128 o44 = FStar_UInt128_add(o43, FStar_UInt128_mul_wide(f34, f40)); + FStar_UInt128_uint128 tmp_w20 = o04; + FStar_UInt128_uint128 tmp_w21 = o141; + FStar_UInt128_uint128 tmp_w22 = o241; + FStar_UInt128_uint128 tmp_w23 = o34; + FStar_UInt128_uint128 tmp_w24 = o44; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(tmp_w10, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w11, FStar_UInt128_uint64_to_uint128(c00)); + uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w12, FStar_UInt128_uint64_to_uint128(c10)); + uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w13, FStar_UInt128_uint64_to_uint128(c20)); + uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w14, FStar_UInt128_uint64_to_uint128(c30)); + uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp00 + c40 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c50 = l_4 >> (uint32_t)51U; + uint64_t o100 = tmp0_; + uint64_t o112 = tmp10 + c50; + uint64_t o122 = tmp20; + uint64_t o132 = tmp30; + uint64_t o142 = tmp40; + FStar_UInt128_uint128 + l_5 = FStar_UInt128_add(tmp_w20, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U)); + FStar_UInt128_uint128 l_6 = FStar_UInt128_add(tmp_w21, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U)); + FStar_UInt128_uint128 l_7 = FStar_UInt128_add(tmp_w22, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U)); + FStar_UInt128_uint128 l_8 = FStar_UInt128_add(tmp_w23, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U)); + FStar_UInt128_uint128 l_9 = FStar_UInt128_add(tmp_w24, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U)); + uint64_t l_10 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_10 >> (uint32_t)51U; + uint64_t o200 = tmp0_0; + uint64_t o212 = tmp1 + c5; + uint64_t o222 = tmp2; + uint64_t o232 = tmp3; + uint64_t o242 = tmp4; + uint64_t o10 = o100; + uint64_t o11 = o112; + uint64_t o12 = o122; + uint64_t o13 = o132; + uint64_t o14 = o142; + uint64_t o20 = o200; + uint64_t o21 = o212; + uint64_t o22 = o222; + uint64_t o23 = o232; + uint64_t o24 = o242; + out[0U] = o10; + out[1U] = o11; + out[2U] = o12; + out[3U] = o13; + out[4U] = o14; + out[5U] = o20; + out[6U] = o21; + out[7U] = o22; + out[8U] = o23; + out[9U] = o24; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fmul1(uint64_t *out, uint64_t *f1, uint64_t f2) +{ + uint64_t f10 = f1[0U]; + uint64_t f11 = f1[1U]; + uint64_t f12 = f1[2U]; + uint64_t f13 = f1[3U]; + uint64_t f14 = f1[4U]; + FStar_UInt128_uint128 tmp_w0 = FStar_UInt128_mul_wide(f2, f10); + FStar_UInt128_uint128 tmp_w1 = FStar_UInt128_mul_wide(f2, f11); + FStar_UInt128_uint128 tmp_w2 = FStar_UInt128_mul_wide(f2, f12); + FStar_UInt128_uint128 tmp_w3 = FStar_UInt128_mul_wide(f2, f13); + FStar_UInt128_uint128 tmp_w4 = FStar_UInt128_mul_wide(f2, f14); + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(tmp_w0, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(tmp_w1, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(tmp_w2, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(tmp_w3, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(tmp_w4, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t o0 = tmp0_; + uint64_t o1 = tmp1 + c5; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fsqr(uint64_t *out, uint64_t *f, FStar_UInt128_uint128 *uu___) +{ + uint64_t f0 = f[0U]; + uint64_t f1 = f[1U]; + uint64_t f2 = f[2U]; + uint64_t f3 = f[3U]; + uint64_t f4 = f[4U]; + uint64_t d0 = (uint64_t)2U * f0; + uint64_t d1 = (uint64_t)2U * f1; + uint64_t d2 = (uint64_t)38U * f2; + uint64_t d3 = (uint64_t)19U * f3; + uint64_t d419 = (uint64_t)19U * f4; + uint64_t d4 = (uint64_t)2U * d419; + FStar_UInt128_uint128 + s0 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f0, f0), + FStar_UInt128_mul_wide(d4, f1)), + FStar_UInt128_mul_wide(d2, f3)); + FStar_UInt128_uint128 + s1 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f1), + FStar_UInt128_mul_wide(d4, f2)), + FStar_UInt128_mul_wide(d3, f3)); + FStar_UInt128_uint128 + s2 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f2), + FStar_UInt128_mul_wide(f1, f1)), + FStar_UInt128_mul_wide(d4, f3)); + FStar_UInt128_uint128 + s3 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f3), + FStar_UInt128_mul_wide(d1, f2)), + FStar_UInt128_mul_wide(f4, d419)); + FStar_UInt128_uint128 + s4 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f4), + FStar_UInt128_mul_wide(d1, f3)), + FStar_UInt128_mul_wide(f2, f2)); + FStar_UInt128_uint128 o00 = s0; + FStar_UInt128_uint128 o10 = s1; + FStar_UInt128_uint128 o20 = s2; + FStar_UInt128_uint128 o30 = s3; + FStar_UInt128_uint128 o40 = s4; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(o00, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o10, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o20, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o30, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o40, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t o0 = tmp0_; + uint64_t o1 = tmp1 + c5; + uint64_t o2 = tmp2; + uint64_t o3 = tmp3; + uint64_t o4 = tmp4; + out[0U] = o0; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +static inline void +Hacl_Impl_Curve25519_Field51_fsqr2(uint64_t *out, uint64_t *f, FStar_UInt128_uint128 *uu___) +{ + uint64_t f10 = f[0U]; + uint64_t f11 = f[1U]; + uint64_t f12 = f[2U]; + uint64_t f13 = f[3U]; + uint64_t f14 = f[4U]; + uint64_t f20 = f[5U]; + uint64_t f21 = f[6U]; + uint64_t f22 = f[7U]; + uint64_t f23 = f[8U]; + uint64_t f24 = f[9U]; + uint64_t d00 = (uint64_t)2U * f10; + uint64_t d10 = (uint64_t)2U * f11; + uint64_t d20 = (uint64_t)38U * f12; + uint64_t d30 = (uint64_t)19U * f13; + uint64_t d4190 = (uint64_t)19U * f14; + uint64_t d40 = (uint64_t)2U * d4190; + FStar_UInt128_uint128 + s00 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f10, f10), + FStar_UInt128_mul_wide(d40, f11)), + FStar_UInt128_mul_wide(d20, f13)); + FStar_UInt128_uint128 + s10 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f11), + FStar_UInt128_mul_wide(d40, f12)), + FStar_UInt128_mul_wide(d30, f13)); + FStar_UInt128_uint128 + s20 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f12), + FStar_UInt128_mul_wide(f11, f11)), + FStar_UInt128_mul_wide(d40, f13)); + FStar_UInt128_uint128 + s30 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f13), + FStar_UInt128_mul_wide(d10, f12)), + FStar_UInt128_mul_wide(f14, d4190)); + FStar_UInt128_uint128 + s40 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d00, f14), + FStar_UInt128_mul_wide(d10, f13)), + FStar_UInt128_mul_wide(f12, f12)); + FStar_UInt128_uint128 o100 = s00; + FStar_UInt128_uint128 o110 = s10; + FStar_UInt128_uint128 o120 = s20; + FStar_UInt128_uint128 o130 = s30; + FStar_UInt128_uint128 o140 = s40; + uint64_t d0 = (uint64_t)2U * f20; + uint64_t d1 = (uint64_t)2U * f21; + uint64_t d2 = (uint64_t)38U * f22; + uint64_t d3 = (uint64_t)19U * f23; + uint64_t d419 = (uint64_t)19U * f24; + uint64_t d4 = (uint64_t)2U * d419; + FStar_UInt128_uint128 + s0 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(f20, f20), + FStar_UInt128_mul_wide(d4, f21)), + FStar_UInt128_mul_wide(d2, f23)); + FStar_UInt128_uint128 + s1 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f21), + FStar_UInt128_mul_wide(d4, f22)), + FStar_UInt128_mul_wide(d3, f23)); + FStar_UInt128_uint128 + s2 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f22), + FStar_UInt128_mul_wide(f21, f21)), + FStar_UInt128_mul_wide(d4, f23)); + FStar_UInt128_uint128 + s3 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f23), + FStar_UInt128_mul_wide(d1, f22)), + FStar_UInt128_mul_wide(f24, d419)); + FStar_UInt128_uint128 + s4 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, f24), + FStar_UInt128_mul_wide(d1, f23)), + FStar_UInt128_mul_wide(f22, f22)); + FStar_UInt128_uint128 o200 = s0; + FStar_UInt128_uint128 o210 = s1; + FStar_UInt128_uint128 o220 = s2; + FStar_UInt128_uint128 o230 = s3; + FStar_UInt128_uint128 o240 = s4; + FStar_UInt128_uint128 + l_ = FStar_UInt128_add(o100, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp00 = FStar_UInt128_uint128_to_uint64(l_) & (uint64_t)0x7ffffffffffffU; + uint64_t c00 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_, (uint32_t)51U)); + FStar_UInt128_uint128 l_0 = FStar_UInt128_add(o110, FStar_UInt128_uint64_to_uint128(c00)); + uint64_t tmp10 = FStar_UInt128_uint128_to_uint64(l_0) & (uint64_t)0x7ffffffffffffU; + uint64_t c10 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_0, (uint32_t)51U)); + FStar_UInt128_uint128 l_1 = FStar_UInt128_add(o120, FStar_UInt128_uint64_to_uint128(c10)); + uint64_t tmp20 = FStar_UInt128_uint128_to_uint64(l_1) & (uint64_t)0x7ffffffffffffU; + uint64_t c20 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_1, (uint32_t)51U)); + FStar_UInt128_uint128 l_2 = FStar_UInt128_add(o130, FStar_UInt128_uint64_to_uint128(c20)); + uint64_t tmp30 = FStar_UInt128_uint128_to_uint64(l_2) & (uint64_t)0x7ffffffffffffU; + uint64_t c30 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_2, (uint32_t)51U)); + FStar_UInt128_uint128 l_3 = FStar_UInt128_add(o140, FStar_UInt128_uint64_to_uint128(c30)); + uint64_t tmp40 = FStar_UInt128_uint128_to_uint64(l_3) & (uint64_t)0x7ffffffffffffU; + uint64_t c40 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_3, (uint32_t)51U)); + uint64_t l_4 = tmp00 + c40 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c50 = l_4 >> (uint32_t)51U; + uint64_t o101 = tmp0_; + uint64_t o111 = tmp10 + c50; + uint64_t o121 = tmp20; + uint64_t o131 = tmp30; + uint64_t o141 = tmp40; + FStar_UInt128_uint128 + l_5 = FStar_UInt128_add(o200, FStar_UInt128_uint64_to_uint128((uint64_t)0U)); + uint64_t tmp0 = FStar_UInt128_uint128_to_uint64(l_5) & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_5, (uint32_t)51U)); + FStar_UInt128_uint128 l_6 = FStar_UInt128_add(o210, FStar_UInt128_uint64_to_uint128(c0)); + uint64_t tmp1 = FStar_UInt128_uint128_to_uint64(l_6) & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_6, (uint32_t)51U)); + FStar_UInt128_uint128 l_7 = FStar_UInt128_add(o220, FStar_UInt128_uint64_to_uint128(c1)); + uint64_t tmp2 = FStar_UInt128_uint128_to_uint64(l_7) & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_7, (uint32_t)51U)); + FStar_UInt128_uint128 l_8 = FStar_UInt128_add(o230, FStar_UInt128_uint64_to_uint128(c2)); + uint64_t tmp3 = FStar_UInt128_uint128_to_uint64(l_8) & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_8, (uint32_t)51U)); + FStar_UInt128_uint128 l_9 = FStar_UInt128_add(o240, FStar_UInt128_uint64_to_uint128(c3)); + uint64_t tmp4 = FStar_UInt128_uint128_to_uint64(l_9) & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(l_9, (uint32_t)51U)); + uint64_t l_10 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_0 = l_10 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_10 >> (uint32_t)51U; + uint64_t o201 = tmp0_0; + uint64_t o211 = tmp1 + c5; + uint64_t o221 = tmp2; + uint64_t o231 = tmp3; + uint64_t o241 = tmp4; + uint64_t o10 = o101; + uint64_t o11 = o111; + uint64_t o12 = o121; + uint64_t o13 = o131; + uint64_t o14 = o141; + uint64_t o20 = o201; + uint64_t o21 = o211; + uint64_t o22 = o221; + uint64_t o23 = o231; + uint64_t o24 = o241; + out[0U] = o10; + out[1U] = o11; + out[2U] = o12; + out[3U] = o13; + out[4U] = o14; + out[5U] = o20; + out[6U] = o21; + out[7U] = o22; + out[8U] = o23; + out[9U] = o24; +} + +static inline void +Hacl_Impl_Curve25519_Field51_store_felem(uint64_t *u64s, uint64_t *f) +{ + uint64_t f0 = f[0U]; + uint64_t f1 = f[1U]; + uint64_t f2 = f[2U]; + uint64_t f3 = f[3U]; + uint64_t f4 = f[4U]; + uint64_t l_ = f0 + (uint64_t)0U; + uint64_t tmp0 = l_ & (uint64_t)0x7ffffffffffffU; + uint64_t c0 = l_ >> (uint32_t)51U; + uint64_t l_0 = f1 + c0; + uint64_t tmp1 = l_0 & (uint64_t)0x7ffffffffffffU; + uint64_t c1 = l_0 >> (uint32_t)51U; + uint64_t l_1 = f2 + c1; + uint64_t tmp2 = l_1 & (uint64_t)0x7ffffffffffffU; + uint64_t c2 = l_1 >> (uint32_t)51U; + uint64_t l_2 = f3 + c2; + uint64_t tmp3 = l_2 & (uint64_t)0x7ffffffffffffU; + uint64_t c3 = l_2 >> (uint32_t)51U; + uint64_t l_3 = f4 + c3; + uint64_t tmp4 = l_3 & (uint64_t)0x7ffffffffffffU; + uint64_t c4 = l_3 >> (uint32_t)51U; + uint64_t l_4 = tmp0 + c4 * (uint64_t)19U; + uint64_t tmp0_ = l_4 & (uint64_t)0x7ffffffffffffU; + uint64_t c5 = l_4 >> (uint32_t)51U; + uint64_t f01 = tmp0_; + uint64_t f11 = tmp1 + c5; + uint64_t f21 = tmp2; + uint64_t f31 = tmp3; + uint64_t f41 = tmp4; + uint64_t m0 = FStar_UInt64_gte_mask(f01, (uint64_t)0x7ffffffffffedU); + uint64_t m1 = FStar_UInt64_eq_mask(f11, (uint64_t)0x7ffffffffffffU); + uint64_t m2 = FStar_UInt64_eq_mask(f21, (uint64_t)0x7ffffffffffffU); + uint64_t m3 = FStar_UInt64_eq_mask(f31, (uint64_t)0x7ffffffffffffU); + uint64_t m4 = FStar_UInt64_eq_mask(f41, (uint64_t)0x7ffffffffffffU); + uint64_t mask = (((m0 & m1) & m2) & m3) & m4; + uint64_t f0_ = f01 - (mask & (uint64_t)0x7ffffffffffedU); + uint64_t f1_ = f11 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f2_ = f21 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f3_ = f31 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f4_ = f41 - (mask & (uint64_t)0x7ffffffffffffU); + uint64_t f02 = f0_; + uint64_t f12 = f1_; + uint64_t f22 = f2_; + uint64_t f32 = f3_; + uint64_t f42 = f4_; + uint64_t o00 = f02 | f12 << (uint32_t)51U; + uint64_t o10 = f12 >> (uint32_t)13U | f22 << (uint32_t)38U; + uint64_t o20 = f22 >> (uint32_t)26U | f32 << (uint32_t)25U; + uint64_t o30 = f32 >> (uint32_t)39U | f42 << (uint32_t)12U; + uint64_t o0 = o00; + uint64_t o1 = o10; + uint64_t o2 = o20; + uint64_t o3 = o30; + u64s[0U] = o0; + u64s[1U] = o1; + u64s[2U] = o2; + u64s[3U] = o3; +} + +static inline void +Hacl_Impl_Curve25519_Field51_cswap2(uint64_t bit, uint64_t *p1, uint64_t *p2) +{ + uint64_t mask = (uint64_t)0U - bit; + KRML_MAYBE_FOR10(i, + (uint32_t)0U, + (uint32_t)10U, + (uint32_t)1U, + uint64_t dummy = mask & (p1[i] ^ p2[i]); + p1[i] = p1[i] ^ dummy; + p2[i] = p2[i] ^ dummy;); +} + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Bignum25519_51_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20.c b/security/nss/lib/freebl/verified/Hacl_Chacha20.c new file mode 100644 index 0000000000..d8827b3bc4 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20.c @@ -0,0 +1,226 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "internal/Hacl_Chacha20.h" + +const uint32_t + Hacl_Impl_Chacha20_Vec_chacha20_constants[4U] = { (uint32_t)0x61707865U, (uint32_t)0x3320646eU, (uint32_t)0x79622d32U, (uint32_t)0x6b206574U }; + +static inline void +quarter_round(uint32_t *st, uint32_t a, uint32_t b, uint32_t c, uint32_t d) +{ + uint32_t sta = st[a]; + uint32_t stb0 = st[b]; + uint32_t std0 = st[d]; + uint32_t sta10 = sta + stb0; + uint32_t std10 = std0 ^ sta10; + uint32_t std2 = std10 << (uint32_t)16U | std10 >> (uint32_t)16U; + st[a] = sta10; + st[d] = std2; + uint32_t sta0 = st[c]; + uint32_t stb1 = st[d]; + uint32_t std3 = st[b]; + uint32_t sta11 = sta0 + stb1; + uint32_t std11 = std3 ^ sta11; + uint32_t std20 = std11 << (uint32_t)12U | std11 >> (uint32_t)20U; + st[c] = sta11; + st[b] = std20; + uint32_t sta2 = st[a]; + uint32_t stb2 = st[b]; + uint32_t std4 = st[d]; + uint32_t sta12 = sta2 + stb2; + uint32_t std12 = std4 ^ sta12; + uint32_t std21 = std12 << (uint32_t)8U | std12 >> (uint32_t)24U; + st[a] = sta12; + st[d] = std21; + uint32_t sta3 = st[c]; + uint32_t stb = st[d]; + uint32_t std = st[b]; + uint32_t sta1 = sta3 + stb; + uint32_t std1 = std ^ sta1; + uint32_t std22 = std1 << (uint32_t)7U | std1 >> (uint32_t)25U; + st[c] = sta1; + st[b] = std22; +} + +static inline void +double_round(uint32_t *st) +{ + quarter_round(st, (uint32_t)0U, (uint32_t)4U, (uint32_t)8U, (uint32_t)12U); + quarter_round(st, (uint32_t)1U, (uint32_t)5U, (uint32_t)9U, (uint32_t)13U); + quarter_round(st, (uint32_t)2U, (uint32_t)6U, (uint32_t)10U, (uint32_t)14U); + quarter_round(st, (uint32_t)3U, (uint32_t)7U, (uint32_t)11U, (uint32_t)15U); + quarter_round(st, (uint32_t)0U, (uint32_t)5U, (uint32_t)10U, (uint32_t)15U); + quarter_round(st, (uint32_t)1U, (uint32_t)6U, (uint32_t)11U, (uint32_t)12U); + quarter_round(st, (uint32_t)2U, (uint32_t)7U, (uint32_t)8U, (uint32_t)13U); + quarter_round(st, (uint32_t)3U, (uint32_t)4U, (uint32_t)9U, (uint32_t)14U); +} + +static inline void +rounds(uint32_t *st) +{ + double_round(st); + double_round(st); + double_round(st); + double_round(st); + double_round(st); + double_round(st); + double_round(st); + double_round(st); + double_round(st); + double_round(st); +} + +static inline void +chacha20_core(uint32_t *k, uint32_t *ctx, uint32_t ctr) +{ + memcpy(k, ctx, (uint32_t)16U * sizeof(uint32_t)); + uint32_t ctr_u32 = ctr; + k[12U] = k[12U] + ctr_u32; + rounds(k); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint32_t *os = k; + uint32_t x = k[i] + ctx[i]; + os[i] = x;); + k[12U] = k[12U] + ctr_u32; +} + +static const uint32_t + chacha20_constants[4U] = { (uint32_t)0x61707865U, (uint32_t)0x3320646eU, (uint32_t)0x79622d32U, (uint32_t)0x6b206574U }; + +void +Hacl_Impl_Chacha20_chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n, uint32_t ctr) +{ + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint32_t *os = ctx; + uint32_t x = chacha20_constants[i]; + os[i] = x;); + KRML_MAYBE_FOR8(i, + (uint32_t)0U, + (uint32_t)8U, + (uint32_t)1U, + uint32_t *os = ctx + (uint32_t)4U; + uint8_t *bj = k + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + ctx[12U] = ctr; + KRML_MAYBE_FOR3(i, + (uint32_t)0U, + (uint32_t)3U, + (uint32_t)1U, + uint32_t *os = ctx + (uint32_t)13U; + uint8_t *bj = n + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); +} + +static void +chacha20_encrypt_block(uint32_t *ctx, uint8_t *out, uint32_t incr, uint8_t *text) +{ + uint32_t k[16U] = { 0U }; + chacha20_core(k, ctx, incr); + uint32_t bl[16U] = { 0U }; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint32_t *os = bl; + uint8_t *bj = text + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint32_t *os = bl; + uint32_t x = bl[i] ^ k[i]; + os[i] = x;); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + store32_le(out + i * (uint32_t)4U, bl[i]);); +} + +static inline void +chacha20_encrypt_last(uint32_t *ctx, uint32_t len, uint8_t *out, uint32_t incr, uint8_t *text) +{ + uint8_t plain[64U] = { 0U }; + memcpy(plain, text, len * sizeof(uint8_t)); + chacha20_encrypt_block(ctx, plain, incr, plain); + memcpy(out, plain, len * sizeof(uint8_t)); +} + +void +Hacl_Impl_Chacha20_chacha20_update(uint32_t *ctx, uint32_t len, uint8_t *out, uint8_t *text) +{ + uint32_t rem = len % (uint32_t)64U; + uint32_t nb = len / (uint32_t)64U; + uint32_t rem1 = len % (uint32_t)64U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + chacha20_encrypt_block(ctx, out + i * (uint32_t)64U, i, text + i * (uint32_t)64U); + } + if (rem1 > (uint32_t)0U) { + chacha20_encrypt_last(ctx, rem, out + nb * (uint32_t)64U, nb, text + nb * (uint32_t)64U); + } +} + +void +Hacl_Chacha20_chacha20_encrypt( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n, + uint32_t ctr) +{ + uint32_t ctx[16U] = { 0U }; + Hacl_Impl_Chacha20_chacha20_init(ctx, key, n, ctr); + Hacl_Impl_Chacha20_chacha20_update(ctx, len, out, text); +} + +void +Hacl_Chacha20_chacha20_decrypt( + uint32_t len, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, + uint8_t *n, + uint32_t ctr) +{ + uint32_t ctx[16U] = { 0U }; + Hacl_Impl_Chacha20_chacha20_init(ctx, key, n, ctr); + Hacl_Impl_Chacha20_chacha20_update(ctx, len, out, cipher); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20.h b/security/nss/lib/freebl/verified/Hacl_Chacha20.h new file mode 100644 index 0000000000..56f2ae0640 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20.h @@ -0,0 +1,61 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Chacha20_H +#define __Hacl_Chacha20_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +void +Hacl_Chacha20_chacha20_encrypt( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n, + uint32_t ctr); + +void +Hacl_Chacha20_chacha20_decrypt( + uint32_t len, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, + uint8_t *n, + uint32_t ctr); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Chacha20_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c new file mode 100644 index 0000000000..d7ee9647ac --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c @@ -0,0 +1,1177 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20Poly1305_128.h" + +#include "internal/Hacl_Poly1305_128.h" +#include "internal/Hacl_Krmllib.h" +#include "libintvector.h" +static inline void +poly1305_padded_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t len, uint8_t *text) +{ + uint32_t n = len / (uint32_t)16U; + uint32_t r = len % (uint32_t)16U; + uint8_t *blocks = text; + uint8_t *rem = text + n * (uint32_t)16U; + Lib_IntVector_Intrinsics_vec128 *pre0 = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc0 = ctx; + uint32_t sz_block = (uint32_t)32U; + uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block; + uint8_t *t00 = blocks; + if (len0 > (uint32_t)0U) { + uint32_t bs = (uint32_t)32U; + uint8_t *text0 = t00; + Hacl_Impl_Poly1305_Field32xN_128_load_acc2(acc0, text0); + uint32_t len1 = len0 - bs; + uint8_t *text1 = t00 + bs; + uint32_t nb = len1 / bs; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = text1 + i * bs; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(block); + Lib_IntVector_Intrinsics_vec128 + b2 = Lib_IntVector_Intrinsics_vec128_load64_le(block + (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + f00 = + Lib_IntVector_Intrinsics_vec128_and(lo, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f15 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f25 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f0 = f00; + Lib_IntVector_Intrinsics_vec128 f1 = f15; + Lib_IntVector_Intrinsics_vec128 f2 = f25; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f0; + e[1U] = f1; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *rn = pre0 + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec128 *rn5 = pre0 + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec128 r0 = rn[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = rn[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = rn[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = rn[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = rn[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = rn5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = rn5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = rn5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = rn5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = acc0[0U]; + Lib_IntVector_Intrinsics_vec128 f110 = acc0[1U]; + Lib_IntVector_Intrinsics_vec128 f120 = acc0[2U]; + Lib_IntVector_Intrinsics_vec128 f130 = acc0[3U]; + Lib_IntVector_Intrinsics_vec128 f140 = acc0[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec128 + a01 = + Lib_IntVector_Intrinsics_vec128_add64(a0, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f110)); + Lib_IntVector_Intrinsics_vec128 + a11 = + Lib_IntVector_Intrinsics_vec128_add64(a1, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f110)); + Lib_IntVector_Intrinsics_vec128 + a21 = + Lib_IntVector_Intrinsics_vec128_add64(a2, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f110)); + Lib_IntVector_Intrinsics_vec128 + a31 = + Lib_IntVector_Intrinsics_vec128_add64(a3, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f110)); + Lib_IntVector_Intrinsics_vec128 + a41 = + Lib_IntVector_Intrinsics_vec128_add64(a4, + Lib_IntVector_Intrinsics_vec128_mul64(r3, f110)); + Lib_IntVector_Intrinsics_vec128 + a02 = + Lib_IntVector_Intrinsics_vec128_add64(a01, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f120)); + Lib_IntVector_Intrinsics_vec128 + a12 = + Lib_IntVector_Intrinsics_vec128_add64(a11, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f120)); + Lib_IntVector_Intrinsics_vec128 + a22 = + Lib_IntVector_Intrinsics_vec128_add64(a21, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f120)); + Lib_IntVector_Intrinsics_vec128 + a32 = + Lib_IntVector_Intrinsics_vec128_add64(a31, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f120)); + Lib_IntVector_Intrinsics_vec128 + a42 = + Lib_IntVector_Intrinsics_vec128_add64(a41, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f120)); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f130)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f130)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f130)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f130)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f130)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r51, f140)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f140)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f140)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f140)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f140)); + Lib_IntVector_Intrinsics_vec128 t01 = a04; + Lib_IntVector_Intrinsics_vec128 t1 = a14; + Lib_IntVector_Intrinsics_vec128 t2 = a24; + Lib_IntVector_Intrinsics_vec128 t3 = a34; + Lib_IntVector_Intrinsics_vec128 t4 = a44; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o00 = x02; + Lib_IntVector_Intrinsics_vec128 o10 = x12; + Lib_IntVector_Intrinsics_vec128 o20 = x21; + Lib_IntVector_Intrinsics_vec128 o30 = x32; + Lib_IntVector_Intrinsics_vec128 o40 = x42; + acc0[0U] = o00; + acc0[1U] = o10; + acc0[2U] = o20; + acc0[3U] = o30; + acc0[4U] = o40; + Lib_IntVector_Intrinsics_vec128 f100 = acc0[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = acc0[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = acc0[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = acc0[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = acc0[4U]; + Lib_IntVector_Intrinsics_vec128 f20 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f21 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f22 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f23 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f24 = e[4U]; + Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_add64(f100, f20); + Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_add64(f11, f21); + Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_add64(f12, f22); + Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_add64(f13, f23); + Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_add64(f14, f24); + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(acc0, pre0); + } + uint32_t len1 = n * (uint32_t)16U - len0; + uint8_t *t10 = blocks + len0; + uint32_t nb = len1 / (uint32_t)16U; + uint32_t rem1 = len1 % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = t10 + i * (uint32_t)16U; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r1 = pre0; + Lib_IntVector_Intrinsics_vec128 *r5 = pre0 + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec128 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc0[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc0[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc0[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc0[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc0[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t01 = a06; + Lib_IntVector_Intrinsics_vec128 t11 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + if (rem1 > (uint32_t)0U) { + uint8_t *last = t10 + nb * (uint32_t)16U; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem1 * sizeof(uint8_t)); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U]; + e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec128_or(fi, mask); + Lib_IntVector_Intrinsics_vec128 *r1 = pre0; + Lib_IntVector_Intrinsics_vec128 *r5 = pre0 + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec128 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc0[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc0[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc0[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc0[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc0[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t01 = a06; + Lib_IntVector_Intrinsics_vec128 t11 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, rem, r * sizeof(uint8_t)); + if (r > (uint32_t)0U) { + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r1 = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec128 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t0 = a06; + Lib_IntVector_Intrinsics_vec128 t1 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +static inline void +poly1305_do_128( + uint8_t *k, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *out) +{ + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 ctx[25U] KRML_POST_ALIGN(16) = { 0U }; + uint8_t block[16U] = { 0U }; + Hacl_Poly1305_128_poly1305_init(ctx, k); + if (aadlen != (uint32_t)0U) { + poly1305_padded_128(ctx, aadlen, aad); + } + if (mlen != (uint32_t)0U) { + poly1305_padded_128(ctx, mlen, m); + } + store64_le(block, (uint64_t)aadlen); + store64_le(block + (uint32_t)8U, (uint64_t)mlen); + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t0 = a06; + Lib_IntVector_Intrinsics_vec128 t1 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + Hacl_Poly1305_128_poly1305_finish(out, k, ctx); +} + +void +Hacl_Chacha20Poly1305_128_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + Hacl_Chacha20_Vec128_chacha20_encrypt_128(mlen, cipher, m, k, n, (uint32_t)1U); + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_Vec128_chacha20_encrypt_128((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_128(key, aadlen, aad, mlen, cipher, mac); +} + +uint32_t +Hacl_Chacha20Poly1305_128_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + uint8_t computed_mac[16U] = { 0U }; + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_Vec128_chacha20_encrypt_128((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_128(key, aadlen, aad, mlen, cipher, computed_mac); + uint8_t res = (uint8_t)255U; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); + res = uu____0 & res;); + uint8_t z = res; + if (z == (uint8_t)255U) { + Hacl_Chacha20_Vec128_chacha20_encrypt_128(mlen, m, cipher, k, n, (uint32_t)1U); + return (uint32_t)0U; + } + return (uint32_t)1U; +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h new file mode 100644 index 0000000000..01e2a4f517 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h @@ -0,0 +1,67 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Chacha20Poly1305_128_H +#define __Hacl_Chacha20Poly1305_128_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Poly1305_128.h" +#include "Hacl_Krmllib.h" +#include "Hacl_Chacha20_Vec128.h" + +void +Hacl_Chacha20Poly1305_128_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +uint32_t +Hacl_Chacha20Poly1305_128_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Chacha20Poly1305_128_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c new file mode 100644 index 0000000000..a4e54f1e27 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c @@ -0,0 +1,1179 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20Poly1305_256.h" + +#include "internal/Hacl_Poly1305_256.h" +#include "internal/Hacl_Krmllib.h" +#include "libintvector.h" +static inline void +poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t *text) +{ + uint32_t n = len / (uint32_t)16U; + uint32_t r = len % (uint32_t)16U; + uint8_t *blocks = text; + uint8_t *rem = text + n * (uint32_t)16U; + Lib_IntVector_Intrinsics_vec256 *pre0 = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *acc0 = ctx; + uint32_t sz_block = (uint32_t)64U; + uint32_t len0 = n * (uint32_t)16U / sz_block * sz_block; + uint8_t *t00 = blocks; + if (len0 > (uint32_t)0U) { + uint32_t bs = (uint32_t)64U; + uint8_t *text0 = t00; + Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc0, text0); + uint32_t len1 = len0 - bs; + uint8_t *text1 = t00 + bs; + uint32_t nb = len1 / bs; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = text1 + i * bs; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block); + Lib_IntVector_Intrinsics_vec256 + hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 + mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi); + Lib_IntVector_Intrinsics_vec256 + m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi); + Lib_IntVector_Intrinsics_vec256 + m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U); + Lib_IntVector_Intrinsics_vec256 + m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U); + Lib_IntVector_Intrinsics_vec256 + m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1); + Lib_IntVector_Intrinsics_vec256 + t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1); + Lib_IntVector_Intrinsics_vec256 + t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3); + Lib_IntVector_Intrinsics_vec256 + t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U); + Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260); + Lib_IntVector_Intrinsics_vec256 + t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260); + Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260); + Lib_IntVector_Intrinsics_vec256 + t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U); + Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260); + Lib_IntVector_Intrinsics_vec256 + o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 o00 = o5; + Lib_IntVector_Intrinsics_vec256 o11 = o10; + Lib_IntVector_Intrinsics_vec256 o21 = o20; + Lib_IntVector_Intrinsics_vec256 o31 = o30; + Lib_IntVector_Intrinsics_vec256 o41 = o40; + e[0U] = o00; + e[1U] = o11; + e[2U] = o21; + e[3U] = o31; + e[4U] = o41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *rn = pre0 + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec256 *rn5 = pre0 + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec256 r0 = rn[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = rn[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = rn[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = rn[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = rn[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = acc0[0U]; + Lib_IntVector_Intrinsics_vec256 f110 = acc0[1U]; + Lib_IntVector_Intrinsics_vec256 f120 = acc0[2U]; + Lib_IntVector_Intrinsics_vec256 f130 = acc0[3U]; + Lib_IntVector_Intrinsics_vec256 f140 = acc0[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec256 + a01 = + Lib_IntVector_Intrinsics_vec256_add64(a0, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f110)); + Lib_IntVector_Intrinsics_vec256 + a11 = + Lib_IntVector_Intrinsics_vec256_add64(a1, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f110)); + Lib_IntVector_Intrinsics_vec256 + a21 = + Lib_IntVector_Intrinsics_vec256_add64(a2, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f110)); + Lib_IntVector_Intrinsics_vec256 + a31 = + Lib_IntVector_Intrinsics_vec256_add64(a3, + Lib_IntVector_Intrinsics_vec256_mul64(r2, f110)); + Lib_IntVector_Intrinsics_vec256 + a41 = + Lib_IntVector_Intrinsics_vec256_add64(a4, + Lib_IntVector_Intrinsics_vec256_mul64(r3, f110)); + Lib_IntVector_Intrinsics_vec256 + a02 = + Lib_IntVector_Intrinsics_vec256_add64(a01, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f120)); + Lib_IntVector_Intrinsics_vec256 + a12 = + Lib_IntVector_Intrinsics_vec256_add64(a11, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f120)); + Lib_IntVector_Intrinsics_vec256 + a22 = + Lib_IntVector_Intrinsics_vec256_add64(a21, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f120)); + Lib_IntVector_Intrinsics_vec256 + a32 = + Lib_IntVector_Intrinsics_vec256_add64(a31, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f120)); + Lib_IntVector_Intrinsics_vec256 + a42 = + Lib_IntVector_Intrinsics_vec256_add64(a41, + Lib_IntVector_Intrinsics_vec256_mul64(r2, f120)); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r52, f130)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f130)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f130)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f130)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f130)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r51, f140)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r52, f140)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f140)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f140)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f140)); + Lib_IntVector_Intrinsics_vec256 t01 = a04; + Lib_IntVector_Intrinsics_vec256 t1 = a14; + Lib_IntVector_Intrinsics_vec256 t2 = a24; + Lib_IntVector_Intrinsics_vec256 t3 = a34; + Lib_IntVector_Intrinsics_vec256 t4 = a44; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o01 = x02; + Lib_IntVector_Intrinsics_vec256 o12 = x12; + Lib_IntVector_Intrinsics_vec256 o22 = x21; + Lib_IntVector_Intrinsics_vec256 o32 = x32; + Lib_IntVector_Intrinsics_vec256 o42 = x42; + acc0[0U] = o01; + acc0[1U] = o12; + acc0[2U] = o22; + acc0[3U] = o32; + acc0[4U] = o42; + Lib_IntVector_Intrinsics_vec256 f100 = acc0[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = acc0[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = acc0[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = acc0[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = acc0[4U]; + Lib_IntVector_Intrinsics_vec256 f20 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f21 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f22 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f23 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f24 = e[4U]; + Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20); + Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21); + Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22); + Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23); + Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24); + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc0, pre0); + } + uint32_t len1 = n * (uint32_t)16U - len0; + uint8_t *t10 = blocks + len0; + uint32_t nb = len1 / (uint32_t)16U; + uint32_t rem1 = len1 % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = t10 + i * (uint32_t)16U; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *r1 = pre0; + Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t01 = a06; + Lib_IntVector_Intrinsics_vec256 t11 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + if (rem1 > (uint32_t)0U) { + uint8_t *last = t10 + nb * (uint32_t)16U; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem1 * sizeof(uint8_t)); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U]; + e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask); + Lib_IntVector_Intrinsics_vec256 *r1 = pre0; + Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t01 = a06; + Lib_IntVector_Intrinsics_vec256 t11 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, rem, r * sizeof(uint8_t)); + if (r > (uint32_t)0U) { + Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *acc = ctx; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *r1 = pre; + Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; + Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t0 = a06; + Lib_IntVector_Intrinsics_vec256 t1 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +static inline void +poly1305_do_256( + uint8_t *k, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *out) +{ + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t block[16U] = { 0U }; + Hacl_Poly1305_256_poly1305_init(ctx, k); + if (aadlen != (uint32_t)0U) { + poly1305_padded_256(ctx, aadlen, aad); + } + if (mlen != (uint32_t)0U) { + poly1305_padded_256(ctx, mlen, m); + } + store64_le(block, (uint64_t)aadlen); + store64_le(block + (uint32_t)8U, (uint64_t)mlen); + Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *acc = ctx; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *r = pre; + Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t0 = a06; + Lib_IntVector_Intrinsics_vec256 t1 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + Hacl_Poly1305_256_poly1305_finish(out, k, ctx); +} + +void +Hacl_Chacha20Poly1305_256_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, cipher, m, k, n, (uint32_t)1U); + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_256(key, aadlen, aad, mlen, cipher, mac); +} + +uint32_t +Hacl_Chacha20Poly1305_256_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + uint8_t computed_mac[16U] = { 0U }; + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_256(key, aadlen, aad, mlen, cipher, computed_mac); + uint8_t res = (uint8_t)255U; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); + res = uu____0 & res;); + uint8_t z = res; + if (z == (uint8_t)255U) { + Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, m, cipher, k, n, (uint32_t)1U); + return (uint32_t)0U; + } + return (uint32_t)1U; +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h new file mode 100644 index 0000000000..9a81e01f57 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h @@ -0,0 +1,67 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Chacha20Poly1305_256_H +#define __Hacl_Chacha20Poly1305_256_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Poly1305_256.h" +#include "Hacl_Krmllib.h" +#include "Hacl_Chacha20_Vec256.h" + +void +Hacl_Chacha20Poly1305_256_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +uint32_t +Hacl_Chacha20Poly1305_256_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Chacha20Poly1305_256_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c new file mode 100644 index 0000000000..f8efb00377 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c @@ -0,0 +1,592 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20Poly1305_32.h" + +#include "internal/Hacl_Krmllib.h" + +static inline void +poly1305_padded_32(uint64_t *ctx, uint32_t len, uint8_t *text) +{ + uint32_t n = len / (uint32_t)16U; + uint32_t r = len % (uint32_t)16U; + uint8_t *blocks = text; + uint8_t *rem = text + n * (uint32_t)16U; + uint64_t *pre0 = ctx + (uint32_t)5U; + uint64_t *acc0 = ctx; + uint32_t nb = n * (uint32_t)16U / (uint32_t)16U; + uint32_t rem1 = n * (uint32_t)16U % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = blocks + i * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r1 = pre0; + uint64_t *r5 = pre0 + (uint32_t)5U; + uint64_t r0 = r1[0U]; + uint64_t r11 = r1[1U]; + uint64_t r2 = r1[2U]; + uint64_t r3 = r1[3U]; + uint64_t r4 = r1[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc0[0U]; + uint64_t a1 = acc0[1U]; + uint64_t a2 = acc0[2U]; + uint64_t a3 = acc0[3U]; + uint64_t a4 = acc0[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r11 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r11 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r11 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r11 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + if (rem1 > (uint32_t)0U) { + uint8_t *last = blocks + nb * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem1 * sizeof(uint8_t)); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U; + uint64_t mask = b; + uint64_t fi = e[rem1 * (uint32_t)8U / (uint32_t)26U]; + e[rem1 * (uint32_t)8U / (uint32_t)26U] = fi | mask; + uint64_t *r1 = pre0; + uint64_t *r5 = pre0 + (uint32_t)5U; + uint64_t r0 = r1[0U]; + uint64_t r11 = r1[1U]; + uint64_t r2 = r1[2U]; + uint64_t r3 = r1[3U]; + uint64_t r4 = r1[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc0[0U]; + uint64_t a1 = acc0[1U]; + uint64_t a2 = acc0[2U]; + uint64_t a3 = acc0[3U]; + uint64_t a4 = acc0[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r11 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r11 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r11 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r11 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc0[0U] = o0; + acc0[1U] = o1; + acc0[2U] = o2; + acc0[3U] = o3; + acc0[4U] = o4; + } + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, rem, r * sizeof(uint8_t)); + if (r > (uint32_t)0U) { + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r1 = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r1[0U]; + uint64_t r11 = r1[1U]; + uint64_t r2 = r1[2U]; + uint64_t r3 = r1[3U]; + uint64_t r4 = r1[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r11 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r11 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r11 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r11 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +static inline void +poly1305_do_32( + uint8_t *k, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *out) +{ + uint64_t ctx[25U] = { 0U }; + uint8_t block[16U] = { 0U }; + Hacl_Poly1305_32_poly1305_init(ctx, k); + if (aadlen != (uint32_t)0U) { + poly1305_padded_32(ctx, aadlen, aad); + } + if (mlen != (uint32_t)0U) { + poly1305_padded_32(ctx, mlen, m); + } + store64_le(block, (uint64_t)aadlen); + store64_le(block + (uint32_t)8U, (uint64_t)mlen); + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + Hacl_Poly1305_32_poly1305_finish(out, k, ctx); +} + +void +Hacl_Chacha20Poly1305_32_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + Hacl_Chacha20_chacha20_encrypt(mlen, cipher, m, k, n, (uint32_t)1U); + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_chacha20_encrypt((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_32(key, aadlen, aad, mlen, cipher, mac); +} + +uint32_t +Hacl_Chacha20Poly1305_32_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac) +{ + uint8_t computed_mac[16U] = { 0U }; + uint8_t tmp[64U] = { 0U }; + Hacl_Chacha20_chacha20_encrypt((uint32_t)64U, tmp, tmp, k, n, (uint32_t)0U); + uint8_t *key = tmp; + poly1305_do_32(key, aadlen, aad, mlen, cipher, computed_mac); + uint8_t res = (uint8_t)255U; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); + res = uu____0 & res;); + uint8_t z = res; + if (z == (uint8_t)255U) { + Hacl_Chacha20_chacha20_encrypt(mlen, m, cipher, k, n, (uint32_t)1U); + return (uint32_t)0U; + } + return (uint32_t)1U; +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h new file mode 100644 index 0000000000..a3d23d6d3a --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h @@ -0,0 +1,67 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Chacha20Poly1305_32_H +#define __Hacl_Chacha20Poly1305_32_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Poly1305_32.h" +#include "Hacl_Krmllib.h" +#include "Hacl_Chacha20.h" + +void +Hacl_Chacha20Poly1305_32_aead_encrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +uint32_t +Hacl_Chacha20Poly1305_32_aead_decrypt( + uint8_t *k, + uint8_t *n, + uint32_t aadlen, + uint8_t *aad, + uint32_t mlen, + uint8_t *m, + uint8_t *cipher, + uint8_t *mac); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Chacha20Poly1305_32_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c new file mode 100644 index 0000000000..697a36bb31 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c @@ -0,0 +1,819 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20_Vec128.h" + +#include "internal/Hacl_Chacha20.h" +#include "libintvector.h" +static inline void +double_round_128(Lib_IntVector_Intrinsics_vec128 *st) +{ + st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[4U]); + Lib_IntVector_Intrinsics_vec128 std = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[0U]); + st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std, (uint32_t)16U); + st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[12U]); + Lib_IntVector_Intrinsics_vec128 std0 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[8U]); + st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std0, (uint32_t)12U); + st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[4U]); + Lib_IntVector_Intrinsics_vec128 std1 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[0U]); + st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std1, (uint32_t)8U); + st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[12U]); + Lib_IntVector_Intrinsics_vec128 std2 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[8U]); + st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std2, (uint32_t)7U); + st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[5U]); + Lib_IntVector_Intrinsics_vec128 std3 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[1U]); + st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std3, (uint32_t)16U); + st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[13U]); + Lib_IntVector_Intrinsics_vec128 std4 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[9U]); + st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std4, (uint32_t)12U); + st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[5U]); + Lib_IntVector_Intrinsics_vec128 std5 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[1U]); + st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std5, (uint32_t)8U); + st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[13U]); + Lib_IntVector_Intrinsics_vec128 std6 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[9U]); + st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std6, (uint32_t)7U); + st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[6U]); + Lib_IntVector_Intrinsics_vec128 std7 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[2U]); + st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std7, (uint32_t)16U); + st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[14U]); + Lib_IntVector_Intrinsics_vec128 std8 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[10U]); + st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std8, (uint32_t)12U); + st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[6U]); + Lib_IntVector_Intrinsics_vec128 std9 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[2U]); + st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std9, (uint32_t)8U); + st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[14U]); + Lib_IntVector_Intrinsics_vec128 std10 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[10U]); + st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std10, (uint32_t)7U); + st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[7U]); + Lib_IntVector_Intrinsics_vec128 std11 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[3U]); + st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std11, (uint32_t)16U); + st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[15U]); + Lib_IntVector_Intrinsics_vec128 std12 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[11U]); + st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std12, (uint32_t)12U); + st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[7U]); + Lib_IntVector_Intrinsics_vec128 std13 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[3U]); + st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std13, (uint32_t)8U); + st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[15U]); + Lib_IntVector_Intrinsics_vec128 std14 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[11U]); + st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std14, (uint32_t)7U); + st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[5U]); + Lib_IntVector_Intrinsics_vec128 std15 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[0U]); + st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std15, (uint32_t)16U); + st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[15U]); + Lib_IntVector_Intrinsics_vec128 std16 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[10U]); + st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std16, (uint32_t)12U); + st[0U] = Lib_IntVector_Intrinsics_vec128_add32(st[0U], st[5U]); + Lib_IntVector_Intrinsics_vec128 std17 = Lib_IntVector_Intrinsics_vec128_xor(st[15U], st[0U]); + st[15U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std17, (uint32_t)8U); + st[10U] = Lib_IntVector_Intrinsics_vec128_add32(st[10U], st[15U]); + Lib_IntVector_Intrinsics_vec128 std18 = Lib_IntVector_Intrinsics_vec128_xor(st[5U], st[10U]); + st[5U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std18, (uint32_t)7U); + st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[6U]); + Lib_IntVector_Intrinsics_vec128 std19 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[1U]); + st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std19, (uint32_t)16U); + st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[12U]); + Lib_IntVector_Intrinsics_vec128 std20 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[11U]); + st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std20, (uint32_t)12U); + st[1U] = Lib_IntVector_Intrinsics_vec128_add32(st[1U], st[6U]); + Lib_IntVector_Intrinsics_vec128 std21 = Lib_IntVector_Intrinsics_vec128_xor(st[12U], st[1U]); + st[12U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std21, (uint32_t)8U); + st[11U] = Lib_IntVector_Intrinsics_vec128_add32(st[11U], st[12U]); + Lib_IntVector_Intrinsics_vec128 std22 = Lib_IntVector_Intrinsics_vec128_xor(st[6U], st[11U]); + st[6U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std22, (uint32_t)7U); + st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[7U]); + Lib_IntVector_Intrinsics_vec128 std23 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[2U]); + st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std23, (uint32_t)16U); + st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[13U]); + Lib_IntVector_Intrinsics_vec128 std24 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[8U]); + st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std24, (uint32_t)12U); + st[2U] = Lib_IntVector_Intrinsics_vec128_add32(st[2U], st[7U]); + Lib_IntVector_Intrinsics_vec128 std25 = Lib_IntVector_Intrinsics_vec128_xor(st[13U], st[2U]); + st[13U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std25, (uint32_t)8U); + st[8U] = Lib_IntVector_Intrinsics_vec128_add32(st[8U], st[13U]); + Lib_IntVector_Intrinsics_vec128 std26 = Lib_IntVector_Intrinsics_vec128_xor(st[7U], st[8U]); + st[7U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std26, (uint32_t)7U); + st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[4U]); + Lib_IntVector_Intrinsics_vec128 std27 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[3U]); + st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std27, (uint32_t)16U); + st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[14U]); + Lib_IntVector_Intrinsics_vec128 std28 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[9U]); + st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std28, (uint32_t)12U); + st[3U] = Lib_IntVector_Intrinsics_vec128_add32(st[3U], st[4U]); + Lib_IntVector_Intrinsics_vec128 std29 = Lib_IntVector_Intrinsics_vec128_xor(st[14U], st[3U]); + st[14U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std29, (uint32_t)8U); + st[9U] = Lib_IntVector_Intrinsics_vec128_add32(st[9U], st[14U]); + Lib_IntVector_Intrinsics_vec128 std30 = Lib_IntVector_Intrinsics_vec128_xor(st[4U], st[9U]); + st[4U] = Lib_IntVector_Intrinsics_vec128_rotate_left32(std30, (uint32_t)7U); +} + +static inline void +chacha20_core_128( + Lib_IntVector_Intrinsics_vec128 *k, + Lib_IntVector_Intrinsics_vec128 *ctx, + uint32_t ctr) +{ + memcpy(k, ctx, (uint32_t)16U * sizeof(Lib_IntVector_Intrinsics_vec128)); + uint32_t ctr_u32 = (uint32_t)4U * ctr; + Lib_IntVector_Intrinsics_vec128 cv = Lib_IntVector_Intrinsics_vec128_load32(ctr_u32); + k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + double_round_128(k); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 *os = k; + Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_add32(k[i], ctx[i]); + os[i] = x;); + k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv); +} + +static inline void +chacha20_init_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *k, uint8_t *n, uint32_t ctr) +{ + uint32_t ctx1[16U] = { 0U }; + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint32_t *os = ctx1; + uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i]; + os[i] = x;); + KRML_MAYBE_FOR8(i, + (uint32_t)0U, + (uint32_t)8U, + (uint32_t)1U, + uint32_t *os = ctx1 + (uint32_t)4U; + uint8_t *bj = k + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + ctx1[12U] = ctr; + KRML_MAYBE_FOR3(i, + (uint32_t)0U, + (uint32_t)3U, + (uint32_t)1U, + uint32_t *os = ctx1 + (uint32_t)13U; + uint8_t *bj = n + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 *os = ctx; + uint32_t x = ctx1[i]; + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_load32(x); + os[i] = x0;); + Lib_IntVector_Intrinsics_vec128 + ctr1 = + Lib_IntVector_Intrinsics_vec128_load32s((uint32_t)0U, + (uint32_t)1U, + (uint32_t)2U, + (uint32_t)3U); + Lib_IntVector_Intrinsics_vec128 c12 = ctx[12U]; + ctx[12U] = Lib_IntVector_Intrinsics_vec128_add32(c12, ctr1); +} + +void +Hacl_Chacha20_Vec128_chacha20_encrypt_128( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n, + uint32_t ctr) +{ + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 ctx[16U] KRML_POST_ALIGN(16) = { 0U }; + chacha20_init_128(ctx, key, n, ctr); + uint32_t rem = len % (uint32_t)256U; + uint32_t nb = len / (uint32_t)256U; + uint32_t rem1 = len % (uint32_t)256U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *uu____0 = out + i * (uint32_t)256U; + uint8_t *uu____1 = text + i * (uint32_t)256U; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U }; + chacha20_core_128(k, ctx, i); + Lib_IntVector_Intrinsics_vec128 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec128 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec128 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec128 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec128 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec128 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec128 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec128 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec128 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec128 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec128 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec128 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec128 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec128 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec128 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec128 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 + v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 v0__0 = v0__; + Lib_IntVector_Intrinsics_vec128 v2__0 = v2__; + Lib_IntVector_Intrinsics_vec128 v1__0 = v1__; + Lib_IntVector_Intrinsics_vec128 v3__0 = v3__; + Lib_IntVector_Intrinsics_vec128 v0 = v0__0; + Lib_IntVector_Intrinsics_vec128 v1 = v1__0; + Lib_IntVector_Intrinsics_vec128 v2 = v2__0; + Lib_IntVector_Intrinsics_vec128 v3 = v3__0; + Lib_IntVector_Intrinsics_vec128 + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1; + Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1; + Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1; + Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1; + Lib_IntVector_Intrinsics_vec128 v4 = v0__2; + Lib_IntVector_Intrinsics_vec128 v5 = v1__2; + Lib_IntVector_Intrinsics_vec128 v6 = v2__2; + Lib_IntVector_Intrinsics_vec128 v7 = v3__2; + Lib_IntVector_Intrinsics_vec128 + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 + v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3; + Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3; + Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3; + Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3; + Lib_IntVector_Intrinsics_vec128 v8 = v0__4; + Lib_IntVector_Intrinsics_vec128 v9 = v1__4; + Lib_IntVector_Intrinsics_vec128 v10 = v2__4; + Lib_IntVector_Intrinsics_vec128 v11 = v3__4; + Lib_IntVector_Intrinsics_vec128 + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 + v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5; + Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5; + Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5; + Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5; + Lib_IntVector_Intrinsics_vec128 v12 = v0__6; + Lib_IntVector_Intrinsics_vec128 v13 = v1__6; + Lib_IntVector_Intrinsics_vec128 v14 = v2__6; + Lib_IntVector_Intrinsics_vec128 v15 = v3__6; + k[0U] = v0; + k[1U] = v4; + k[2U] = v8; + k[3U] = v12; + k[4U] = v1; + k[5U] = v5; + k[6U] = v9; + k[7U] = v13; + k[8U] = v2; + k[9U] = v6; + k[10U] = v10; + k[11U] = v14; + k[12U] = v3; + k[13U] = v7; + k[14U] = v11; + k[15U] = v15; + KRML_MAYBE_FOR16(i0, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load32_le(uu____1 + i0 * (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]); + Lib_IntVector_Intrinsics_vec128_store32_le(uu____0 + i0 * (uint32_t)16U, y);); + } + if (rem1 > (uint32_t)0U) { + uint8_t *uu____2 = out + nb * (uint32_t)256U; + uint8_t *uu____3 = text + nb * (uint32_t)256U; + uint8_t plain[256U] = { 0U }; + memcpy(plain, uu____3, rem * sizeof(uint8_t)); + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U }; + chacha20_core_128(k, ctx, nb); + Lib_IntVector_Intrinsics_vec128 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec128 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec128 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec128 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec128 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec128 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec128 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec128 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec128 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec128 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec128 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec128 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec128 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec128 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec128 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec128 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 + v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 v0__0 = v0__; + Lib_IntVector_Intrinsics_vec128 v2__0 = v2__; + Lib_IntVector_Intrinsics_vec128 v1__0 = v1__; + Lib_IntVector_Intrinsics_vec128 v3__0 = v3__; + Lib_IntVector_Intrinsics_vec128 v0 = v0__0; + Lib_IntVector_Intrinsics_vec128 v1 = v1__0; + Lib_IntVector_Intrinsics_vec128 v2 = v2__0; + Lib_IntVector_Intrinsics_vec128 v3 = v3__0; + Lib_IntVector_Intrinsics_vec128 + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1; + Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1; + Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1; + Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1; + Lib_IntVector_Intrinsics_vec128 v4 = v0__2; + Lib_IntVector_Intrinsics_vec128 v5 = v1__2; + Lib_IntVector_Intrinsics_vec128 v6 = v2__2; + Lib_IntVector_Intrinsics_vec128 v7 = v3__2; + Lib_IntVector_Intrinsics_vec128 + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 + v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3; + Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3; + Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3; + Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3; + Lib_IntVector_Intrinsics_vec128 v8 = v0__4; + Lib_IntVector_Intrinsics_vec128 v9 = v1__4; + Lib_IntVector_Intrinsics_vec128 v10 = v2__4; + Lib_IntVector_Intrinsics_vec128 v11 = v3__4; + Lib_IntVector_Intrinsics_vec128 + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 + v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5; + Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5; + Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5; + Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5; + Lib_IntVector_Intrinsics_vec128 v12 = v0__6; + Lib_IntVector_Intrinsics_vec128 v13 = v1__6; + Lib_IntVector_Intrinsics_vec128 v14 = v2__6; + Lib_IntVector_Intrinsics_vec128 v15 = v3__6; + k[0U] = v0; + k[1U] = v4; + k[2U] = v8; + k[3U] = v12; + k[4U] = v1; + k[5U] = v5; + k[6U] = v9; + k[7U] = v13; + k[8U] = v2; + k[9U] = v6; + k[10U] = v10; + k[11U] = v14; + k[12U] = v3; + k[13U] = v7; + k[14U] = v11; + k[15U] = v15; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load32_le(plain + i * (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]); + Lib_IntVector_Intrinsics_vec128_store32_le(plain + i * (uint32_t)16U, y);); + memcpy(uu____2, plain, rem * sizeof(uint8_t)); + } +} + +void +Hacl_Chacha20_Vec128_chacha20_decrypt_128( + uint32_t len, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, + uint8_t *n, + uint32_t ctr) +{ + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 ctx[16U] KRML_POST_ALIGN(16) = { 0U }; + chacha20_init_128(ctx, key, n, ctr); + uint32_t rem = len % (uint32_t)256U; + uint32_t nb = len / (uint32_t)256U; + uint32_t rem1 = len % (uint32_t)256U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *uu____0 = out + i * (uint32_t)256U; + uint8_t *uu____1 = cipher + i * (uint32_t)256U; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U }; + chacha20_core_128(k, ctx, i); + Lib_IntVector_Intrinsics_vec128 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec128 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec128 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec128 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec128 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec128 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec128 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec128 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec128 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec128 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec128 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec128 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec128 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec128 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec128 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec128 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 + v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 v0__0 = v0__; + Lib_IntVector_Intrinsics_vec128 v2__0 = v2__; + Lib_IntVector_Intrinsics_vec128 v1__0 = v1__; + Lib_IntVector_Intrinsics_vec128 v3__0 = v3__; + Lib_IntVector_Intrinsics_vec128 v0 = v0__0; + Lib_IntVector_Intrinsics_vec128 v1 = v1__0; + Lib_IntVector_Intrinsics_vec128 v2 = v2__0; + Lib_IntVector_Intrinsics_vec128 v3 = v3__0; + Lib_IntVector_Intrinsics_vec128 + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1; + Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1; + Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1; + Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1; + Lib_IntVector_Intrinsics_vec128 v4 = v0__2; + Lib_IntVector_Intrinsics_vec128 v5 = v1__2; + Lib_IntVector_Intrinsics_vec128 v6 = v2__2; + Lib_IntVector_Intrinsics_vec128 v7 = v3__2; + Lib_IntVector_Intrinsics_vec128 + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 + v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3; + Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3; + Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3; + Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3; + Lib_IntVector_Intrinsics_vec128 v8 = v0__4; + Lib_IntVector_Intrinsics_vec128 v9 = v1__4; + Lib_IntVector_Intrinsics_vec128 v10 = v2__4; + Lib_IntVector_Intrinsics_vec128 v11 = v3__4; + Lib_IntVector_Intrinsics_vec128 + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 + v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5; + Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5; + Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5; + Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5; + Lib_IntVector_Intrinsics_vec128 v12 = v0__6; + Lib_IntVector_Intrinsics_vec128 v13 = v1__6; + Lib_IntVector_Intrinsics_vec128 v14 = v2__6; + Lib_IntVector_Intrinsics_vec128 v15 = v3__6; + k[0U] = v0; + k[1U] = v4; + k[2U] = v8; + k[3U] = v12; + k[4U] = v1; + k[5U] = v5; + k[6U] = v9; + k[7U] = v13; + k[8U] = v2; + k[9U] = v6; + k[10U] = v10; + k[11U] = v14; + k[12U] = v3; + k[13U] = v7; + k[14U] = v11; + k[15U] = v15; + KRML_MAYBE_FOR16(i0, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load32_le(uu____1 + i0 * (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]); + Lib_IntVector_Intrinsics_vec128_store32_le(uu____0 + i0 * (uint32_t)16U, y);); + } + if (rem1 > (uint32_t)0U) { + uint8_t *uu____2 = out + nb * (uint32_t)256U; + uint8_t *uu____3 = cipher + nb * (uint32_t)256U; + uint8_t plain[256U] = { 0U }; + memcpy(plain, uu____3, rem * sizeof(uint8_t)); + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U }; + chacha20_core_128(k, ctx, nb); + Lib_IntVector_Intrinsics_vec128 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec128 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec128 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec128 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec128 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec128 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec128 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec128 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec128 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec128 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec128 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec128 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec128 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec128 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec128 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec128 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v1__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_, v2_); + Lib_IntVector_Intrinsics_vec128 + v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 + v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); + Lib_IntVector_Intrinsics_vec128 v0__0 = v0__; + Lib_IntVector_Intrinsics_vec128 v2__0 = v2__; + Lib_IntVector_Intrinsics_vec128 v1__0 = v1__; + Lib_IntVector_Intrinsics_vec128 v3__0 = v3__; + Lib_IntVector_Intrinsics_vec128 v0 = v0__0; + Lib_IntVector_Intrinsics_vec128 v1 = v1__0; + Lib_IntVector_Intrinsics_vec128 v2 = v2__0; + Lib_IntVector_Intrinsics_vec128 v3 = v3__0; + Lib_IntVector_Intrinsics_vec128 + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5); + Lib_IntVector_Intrinsics_vec128 + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7); + Lib_IntVector_Intrinsics_vec128 + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec128 + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1; + Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1; + Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1; + Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1; + Lib_IntVector_Intrinsics_vec128 v4 = v0__2; + Lib_IntVector_Intrinsics_vec128 v5 = v1__2; + Lib_IntVector_Intrinsics_vec128 v6 = v2__2; + Lib_IntVector_Intrinsics_vec128 v7 = v3__2; + Lib_IntVector_Intrinsics_vec128 + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9); + Lib_IntVector_Intrinsics_vec128 + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11); + Lib_IntVector_Intrinsics_vec128 + v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + Lib_IntVector_Intrinsics_vec128 + v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 + v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3; + Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3; + Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3; + Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3; + Lib_IntVector_Intrinsics_vec128 v8 = v0__4; + Lib_IntVector_Intrinsics_vec128 v9 = v1__4; + Lib_IntVector_Intrinsics_vec128 v10 = v2__4; + Lib_IntVector_Intrinsics_vec128 v11 = v3__4; + Lib_IntVector_Intrinsics_vec128 + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13); + Lib_IntVector_Intrinsics_vec128 + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15); + Lib_IntVector_Intrinsics_vec128 + v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + Lib_IntVector_Intrinsics_vec128 + v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 + v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5; + Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5; + Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5; + Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5; + Lib_IntVector_Intrinsics_vec128 v12 = v0__6; + Lib_IntVector_Intrinsics_vec128 v13 = v1__6; + Lib_IntVector_Intrinsics_vec128 v14 = v2__6; + Lib_IntVector_Intrinsics_vec128 v15 = v3__6; + k[0U] = v0; + k[1U] = v4; + k[2U] = v8; + k[3U] = v12; + k[4U] = v1; + k[5U] = v5; + k[6U] = v9; + k[7U] = v13; + k[8U] = v2; + k[9U] = v6; + k[10U] = v10; + k[11U] = v14; + k[12U] = v3; + k[13U] = v7; + k[14U] = v11; + k[15U] = v15; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load32_le(plain + i * (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]); + Lib_IntVector_Intrinsics_vec128_store32_le(plain + i * (uint32_t)16U, y);); + memcpy(uu____2, plain, rem * sizeof(uint8_t)); + } +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h new file mode 100644 index 0000000000..52b8d249f2 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h @@ -0,0 +1,61 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Chacha20_Vec128_H +#define __Hacl_Chacha20_Vec128_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +void +Hacl_Chacha20_Vec128_chacha20_encrypt_128( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n, + uint32_t ctr); + +void +Hacl_Chacha20_Vec128_chacha20_decrypt_128( + uint32_t len, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, + uint8_t *n, + uint32_t ctr); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Chacha20_Vec128_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c new file mode 100644 index 0000000000..6c3e8488eb --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.c @@ -0,0 +1,1207 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Chacha20_Vec256.h" + +#include "internal/Hacl_Chacha20.h" +#include "libintvector.h" +static inline void +double_round_256(Lib_IntVector_Intrinsics_vec256 *st) +{ + st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[4U]); + Lib_IntVector_Intrinsics_vec256 std = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[0U]); + st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std, (uint32_t)16U); + st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[12U]); + Lib_IntVector_Intrinsics_vec256 std0 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[8U]); + st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std0, (uint32_t)12U); + st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[4U]); + Lib_IntVector_Intrinsics_vec256 std1 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[0U]); + st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std1, (uint32_t)8U); + st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[12U]); + Lib_IntVector_Intrinsics_vec256 std2 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[8U]); + st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std2, (uint32_t)7U); + st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[5U]); + Lib_IntVector_Intrinsics_vec256 std3 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[1U]); + st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std3, (uint32_t)16U); + st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[13U]); + Lib_IntVector_Intrinsics_vec256 std4 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[9U]); + st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std4, (uint32_t)12U); + st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[5U]); + Lib_IntVector_Intrinsics_vec256 std5 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[1U]); + st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std5, (uint32_t)8U); + st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[13U]); + Lib_IntVector_Intrinsics_vec256 std6 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[9U]); + st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std6, (uint32_t)7U); + st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[6U]); + Lib_IntVector_Intrinsics_vec256 std7 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[2U]); + st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std7, (uint32_t)16U); + st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[14U]); + Lib_IntVector_Intrinsics_vec256 std8 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[10U]); + st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std8, (uint32_t)12U); + st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[6U]); + Lib_IntVector_Intrinsics_vec256 std9 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[2U]); + st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std9, (uint32_t)8U); + st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[14U]); + Lib_IntVector_Intrinsics_vec256 std10 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[10U]); + st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std10, (uint32_t)7U); + st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[7U]); + Lib_IntVector_Intrinsics_vec256 std11 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[3U]); + st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std11, (uint32_t)16U); + st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[15U]); + Lib_IntVector_Intrinsics_vec256 std12 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[11U]); + st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std12, (uint32_t)12U); + st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[7U]); + Lib_IntVector_Intrinsics_vec256 std13 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[3U]); + st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std13, (uint32_t)8U); + st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[15U]); + Lib_IntVector_Intrinsics_vec256 std14 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[11U]); + st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std14, (uint32_t)7U); + st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[5U]); + Lib_IntVector_Intrinsics_vec256 std15 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[0U]); + st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std15, (uint32_t)16U); + st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[15U]); + Lib_IntVector_Intrinsics_vec256 std16 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[10U]); + st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std16, (uint32_t)12U); + st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[5U]); + Lib_IntVector_Intrinsics_vec256 std17 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[0U]); + st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std17, (uint32_t)8U); + st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[15U]); + Lib_IntVector_Intrinsics_vec256 std18 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[10U]); + st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std18, (uint32_t)7U); + st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[6U]); + Lib_IntVector_Intrinsics_vec256 std19 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[1U]); + st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std19, (uint32_t)16U); + st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[12U]); + Lib_IntVector_Intrinsics_vec256 std20 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[11U]); + st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std20, (uint32_t)12U); + st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[6U]); + Lib_IntVector_Intrinsics_vec256 std21 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[1U]); + st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std21, (uint32_t)8U); + st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[12U]); + Lib_IntVector_Intrinsics_vec256 std22 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[11U]); + st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std22, (uint32_t)7U); + st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[7U]); + Lib_IntVector_Intrinsics_vec256 std23 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[2U]); + st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std23, (uint32_t)16U); + st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[13U]); + Lib_IntVector_Intrinsics_vec256 std24 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[8U]); + st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std24, (uint32_t)12U); + st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[7U]); + Lib_IntVector_Intrinsics_vec256 std25 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[2U]); + st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std25, (uint32_t)8U); + st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[13U]); + Lib_IntVector_Intrinsics_vec256 std26 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[8U]); + st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std26, (uint32_t)7U); + st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[4U]); + Lib_IntVector_Intrinsics_vec256 std27 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[3U]); + st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std27, (uint32_t)16U); + st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[14U]); + Lib_IntVector_Intrinsics_vec256 std28 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[9U]); + st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std28, (uint32_t)12U); + st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[4U]); + Lib_IntVector_Intrinsics_vec256 std29 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[3U]); + st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std29, (uint32_t)8U); + st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[14U]); + Lib_IntVector_Intrinsics_vec256 std30 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[9U]); + st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std30, (uint32_t)7U); +} + +static inline void +chacha20_core_256( + Lib_IntVector_Intrinsics_vec256 *k, + Lib_IntVector_Intrinsics_vec256 *ctx, + uint32_t ctr) +{ + memcpy(k, ctx, (uint32_t)16U * sizeof(Lib_IntVector_Intrinsics_vec256)); + uint32_t ctr_u32 = (uint32_t)8U * ctr; + Lib_IntVector_Intrinsics_vec256 cv = Lib_IntVector_Intrinsics_vec256_load32(ctr_u32); + k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + double_round_256(k); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 *os = k; + Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_add32(k[i], ctx[i]); + os[i] = x;); + k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv); +} + +static inline void +chacha20_init_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *k, uint8_t *n, uint32_t ctr) +{ + uint32_t ctx1[16U] = { 0U }; + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint32_t *os = ctx1; + uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i]; + os[i] = x;); + KRML_MAYBE_FOR8(i, + (uint32_t)0U, + (uint32_t)8U, + (uint32_t)1U, + uint32_t *os = ctx1 + (uint32_t)4U; + uint8_t *bj = k + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + ctx1[12U] = ctr; + KRML_MAYBE_FOR3(i, + (uint32_t)0U, + (uint32_t)3U, + (uint32_t)1U, + uint32_t *os = ctx1 + (uint32_t)13U; + uint8_t *bj = n + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 *os = ctx; + uint32_t x = ctx1[i]; + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_load32(x); + os[i] = x0;); + Lib_IntVector_Intrinsics_vec256 + ctr1 = + Lib_IntVector_Intrinsics_vec256_load32s((uint32_t)0U, + (uint32_t)1U, + (uint32_t)2U, + (uint32_t)3U, + (uint32_t)4U, + (uint32_t)5U, + (uint32_t)6U, + (uint32_t)7U); + Lib_IntVector_Intrinsics_vec256 c12 = ctx[12U]; + ctx[12U] = Lib_IntVector_Intrinsics_vec256_add32(c12, ctr1); +} + +void +Hacl_Chacha20_Vec256_chacha20_encrypt_256( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n, + uint32_t ctr) +{ + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 ctx[16U] KRML_POST_ALIGN(32) = { 0U }; + chacha20_init_256(ctx, key, n, ctr); + uint32_t rem = len % (uint32_t)512U; + uint32_t nb = len / (uint32_t)512U; + uint32_t rem1 = len % (uint32_t)512U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *uu____0 = out + i * (uint32_t)512U; + uint8_t *uu____1 = text + i * (uint32_t)512U; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U }; + chacha20_core_256(k, ctx, i); + Lib_IntVector_Intrinsics_vec256 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec256 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec256 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec256 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec256 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec256 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec256 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec256 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec256 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec256 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec256 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec256 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec256 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec256 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec256 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec256 v00 = st0; + Lib_IntVector_Intrinsics_vec256 v16 = st1; + Lib_IntVector_Intrinsics_vec256 v20 = st2; + Lib_IntVector_Intrinsics_vec256 v30 = st3; + Lib_IntVector_Intrinsics_vec256 v40 = st4; + Lib_IntVector_Intrinsics_vec256 v50 = st5; + Lib_IntVector_Intrinsics_vec256 v60 = st6; + Lib_IntVector_Intrinsics_vec256 v70 = st7; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); + Lib_IntVector_Intrinsics_vec256 + v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); + Lib_IntVector_Intrinsics_vec256 v0_0 = v0_; + Lib_IntVector_Intrinsics_vec256 v1_0 = v1_; + Lib_IntVector_Intrinsics_vec256 v2_0 = v2_; + Lib_IntVector_Intrinsics_vec256 v3_0 = v3_; + Lib_IntVector_Intrinsics_vec256 v4_0 = v4_; + Lib_IntVector_Intrinsics_vec256 v5_0 = v5_; + Lib_IntVector_Intrinsics_vec256 v6_0 = v6_; + Lib_IntVector_Intrinsics_vec256 v7_0 = v7_; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 + v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1; + Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1; + Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1; + Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1; + Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1; + Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1; + Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1; + Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 + v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2; + Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2; + Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2; + Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2; + Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2; + Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2; + Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2; + Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2; + Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20; + Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20; + Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20; + Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20; + Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20; + Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20; + Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20; + Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20; + Lib_IntVector_Intrinsics_vec256 v0 = v0_3; + Lib_IntVector_Intrinsics_vec256 v1 = v2_3; + Lib_IntVector_Intrinsics_vec256 v2 = v1_3; + Lib_IntVector_Intrinsics_vec256 v3 = v3_3; + Lib_IntVector_Intrinsics_vec256 v4 = v4_3; + Lib_IntVector_Intrinsics_vec256 v5 = v6_3; + Lib_IntVector_Intrinsics_vec256 v6 = v5_3; + Lib_IntVector_Intrinsics_vec256 v7 = v7_3; + Lib_IntVector_Intrinsics_vec256 v01 = st8; + Lib_IntVector_Intrinsics_vec256 v110 = st9; + Lib_IntVector_Intrinsics_vec256 v21 = st10; + Lib_IntVector_Intrinsics_vec256 v31 = st11; + Lib_IntVector_Intrinsics_vec256 v41 = st12; + Lib_IntVector_Intrinsics_vec256 v51 = st13; + Lib_IntVector_Intrinsics_vec256 v61 = st14; + Lib_IntVector_Intrinsics_vec256 v71 = st15; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + Lib_IntVector_Intrinsics_vec256 + v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4; + Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4; + Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4; + Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4; + Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4; + Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4; + Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4; + Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 + v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11; + Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11; + Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11; + Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11; + Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11; + Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11; + Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11; + Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11; + Lib_IntVector_Intrinsics_vec256 + v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 + v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21; + Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21; + Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21; + Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21; + Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21; + Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21; + Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21; + Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21; + Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22; + Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22; + Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22; + Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22; + Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22; + Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22; + Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22; + Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22; + Lib_IntVector_Intrinsics_vec256 v8 = v0_6; + Lib_IntVector_Intrinsics_vec256 v9 = v2_6; + Lib_IntVector_Intrinsics_vec256 v10 = v1_6; + Lib_IntVector_Intrinsics_vec256 v11 = v3_6; + Lib_IntVector_Intrinsics_vec256 v12 = v4_6; + Lib_IntVector_Intrinsics_vec256 v13 = v6_6; + Lib_IntVector_Intrinsics_vec256 v14 = v5_6; + Lib_IntVector_Intrinsics_vec256 v15 = v7_6; + k[0U] = v0; + k[1U] = v8; + k[2U] = v1; + k[3U] = v9; + k[4U] = v2; + k[5U] = v10; + k[6U] = v3; + k[7U] = v11; + k[8U] = v4; + k[9U] = v12; + k[10U] = v5; + k[11U] = v13; + k[12U] = v6; + k[13U] = v14; + k[14U] = v7; + k[15U] = v15; + KRML_MAYBE_FOR16(i0, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 + x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]); + Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y);); + } + if (rem1 > (uint32_t)0U) { + uint8_t *uu____2 = out + nb * (uint32_t)512U; + uint8_t *uu____3 = text + nb * (uint32_t)512U; + uint8_t plain[512U] = { 0U }; + memcpy(plain, uu____3, rem * sizeof(uint8_t)); + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U }; + chacha20_core_256(k, ctx, nb); + Lib_IntVector_Intrinsics_vec256 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec256 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec256 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec256 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec256 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec256 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec256 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec256 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec256 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec256 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec256 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec256 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec256 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec256 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec256 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec256 v00 = st0; + Lib_IntVector_Intrinsics_vec256 v16 = st1; + Lib_IntVector_Intrinsics_vec256 v20 = st2; + Lib_IntVector_Intrinsics_vec256 v30 = st3; + Lib_IntVector_Intrinsics_vec256 v40 = st4; + Lib_IntVector_Intrinsics_vec256 v50 = st5; + Lib_IntVector_Intrinsics_vec256 v60 = st6; + Lib_IntVector_Intrinsics_vec256 v70 = st7; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); + Lib_IntVector_Intrinsics_vec256 + v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); + Lib_IntVector_Intrinsics_vec256 v0_0 = v0_; + Lib_IntVector_Intrinsics_vec256 v1_0 = v1_; + Lib_IntVector_Intrinsics_vec256 v2_0 = v2_; + Lib_IntVector_Intrinsics_vec256 v3_0 = v3_; + Lib_IntVector_Intrinsics_vec256 v4_0 = v4_; + Lib_IntVector_Intrinsics_vec256 v5_0 = v5_; + Lib_IntVector_Intrinsics_vec256 v6_0 = v6_; + Lib_IntVector_Intrinsics_vec256 v7_0 = v7_; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 + v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1; + Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1; + Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1; + Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1; + Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1; + Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1; + Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1; + Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 + v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2; + Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2; + Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2; + Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2; + Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2; + Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2; + Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2; + Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2; + Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20; + Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20; + Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20; + Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20; + Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20; + Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20; + Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20; + Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20; + Lib_IntVector_Intrinsics_vec256 v0 = v0_3; + Lib_IntVector_Intrinsics_vec256 v1 = v2_3; + Lib_IntVector_Intrinsics_vec256 v2 = v1_3; + Lib_IntVector_Intrinsics_vec256 v3 = v3_3; + Lib_IntVector_Intrinsics_vec256 v4 = v4_3; + Lib_IntVector_Intrinsics_vec256 v5 = v6_3; + Lib_IntVector_Intrinsics_vec256 v6 = v5_3; + Lib_IntVector_Intrinsics_vec256 v7 = v7_3; + Lib_IntVector_Intrinsics_vec256 v01 = st8; + Lib_IntVector_Intrinsics_vec256 v110 = st9; + Lib_IntVector_Intrinsics_vec256 v21 = st10; + Lib_IntVector_Intrinsics_vec256 v31 = st11; + Lib_IntVector_Intrinsics_vec256 v41 = st12; + Lib_IntVector_Intrinsics_vec256 v51 = st13; + Lib_IntVector_Intrinsics_vec256 v61 = st14; + Lib_IntVector_Intrinsics_vec256 v71 = st15; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + Lib_IntVector_Intrinsics_vec256 + v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4; + Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4; + Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4; + Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4; + Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4; + Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4; + Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4; + Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 + v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11; + Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11; + Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11; + Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11; + Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11; + Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11; + Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11; + Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11; + Lib_IntVector_Intrinsics_vec256 + v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 + v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21; + Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21; + Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21; + Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21; + Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21; + Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21; + Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21; + Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21; + Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22; + Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22; + Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22; + Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22; + Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22; + Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22; + Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22; + Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22; + Lib_IntVector_Intrinsics_vec256 v8 = v0_6; + Lib_IntVector_Intrinsics_vec256 v9 = v2_6; + Lib_IntVector_Intrinsics_vec256 v10 = v1_6; + Lib_IntVector_Intrinsics_vec256 v11 = v3_6; + Lib_IntVector_Intrinsics_vec256 v12 = v4_6; + Lib_IntVector_Intrinsics_vec256 v13 = v6_6; + Lib_IntVector_Intrinsics_vec256 v14 = v5_6; + Lib_IntVector_Intrinsics_vec256 v15 = v7_6; + k[0U] = v0; + k[1U] = v8; + k[2U] = v1; + k[3U] = v9; + k[4U] = v2; + k[5U] = v10; + k[6U] = v3; + k[7U] = v11; + k[8U] = v4; + k[9U] = v12; + k[10U] = v5; + k[11U] = v13; + k[12U] = v6; + k[13U] = v14; + k[14U] = v7; + k[15U] = v15; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 + x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]); + Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y);); + memcpy(uu____2, plain, rem * sizeof(uint8_t)); + } +} + +void +Hacl_Chacha20_Vec256_chacha20_decrypt_256( + uint32_t len, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, + uint8_t *n, + uint32_t ctr) +{ + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 ctx[16U] KRML_POST_ALIGN(32) = { 0U }; + chacha20_init_256(ctx, key, n, ctr); + uint32_t rem = len % (uint32_t)512U; + uint32_t nb = len / (uint32_t)512U; + uint32_t rem1 = len % (uint32_t)512U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *uu____0 = out + i * (uint32_t)512U; + uint8_t *uu____1 = cipher + i * (uint32_t)512U; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U }; + chacha20_core_256(k, ctx, i); + Lib_IntVector_Intrinsics_vec256 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec256 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec256 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec256 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec256 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec256 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec256 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec256 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec256 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec256 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec256 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec256 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec256 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec256 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec256 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec256 v00 = st0; + Lib_IntVector_Intrinsics_vec256 v16 = st1; + Lib_IntVector_Intrinsics_vec256 v20 = st2; + Lib_IntVector_Intrinsics_vec256 v30 = st3; + Lib_IntVector_Intrinsics_vec256 v40 = st4; + Lib_IntVector_Intrinsics_vec256 v50 = st5; + Lib_IntVector_Intrinsics_vec256 v60 = st6; + Lib_IntVector_Intrinsics_vec256 v70 = st7; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); + Lib_IntVector_Intrinsics_vec256 + v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); + Lib_IntVector_Intrinsics_vec256 v0_0 = v0_; + Lib_IntVector_Intrinsics_vec256 v1_0 = v1_; + Lib_IntVector_Intrinsics_vec256 v2_0 = v2_; + Lib_IntVector_Intrinsics_vec256 v3_0 = v3_; + Lib_IntVector_Intrinsics_vec256 v4_0 = v4_; + Lib_IntVector_Intrinsics_vec256 v5_0 = v5_; + Lib_IntVector_Intrinsics_vec256 v6_0 = v6_; + Lib_IntVector_Intrinsics_vec256 v7_0 = v7_; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 + v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1; + Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1; + Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1; + Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1; + Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1; + Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1; + Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1; + Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 + v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2; + Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2; + Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2; + Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2; + Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2; + Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2; + Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2; + Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2; + Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20; + Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20; + Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20; + Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20; + Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20; + Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20; + Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20; + Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20; + Lib_IntVector_Intrinsics_vec256 v0 = v0_3; + Lib_IntVector_Intrinsics_vec256 v1 = v2_3; + Lib_IntVector_Intrinsics_vec256 v2 = v1_3; + Lib_IntVector_Intrinsics_vec256 v3 = v3_3; + Lib_IntVector_Intrinsics_vec256 v4 = v4_3; + Lib_IntVector_Intrinsics_vec256 v5 = v6_3; + Lib_IntVector_Intrinsics_vec256 v6 = v5_3; + Lib_IntVector_Intrinsics_vec256 v7 = v7_3; + Lib_IntVector_Intrinsics_vec256 v01 = st8; + Lib_IntVector_Intrinsics_vec256 v110 = st9; + Lib_IntVector_Intrinsics_vec256 v21 = st10; + Lib_IntVector_Intrinsics_vec256 v31 = st11; + Lib_IntVector_Intrinsics_vec256 v41 = st12; + Lib_IntVector_Intrinsics_vec256 v51 = st13; + Lib_IntVector_Intrinsics_vec256 v61 = st14; + Lib_IntVector_Intrinsics_vec256 v71 = st15; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + Lib_IntVector_Intrinsics_vec256 + v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4; + Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4; + Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4; + Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4; + Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4; + Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4; + Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4; + Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 + v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11; + Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11; + Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11; + Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11; + Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11; + Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11; + Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11; + Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11; + Lib_IntVector_Intrinsics_vec256 + v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 + v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21; + Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21; + Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21; + Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21; + Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21; + Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21; + Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21; + Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21; + Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22; + Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22; + Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22; + Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22; + Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22; + Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22; + Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22; + Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22; + Lib_IntVector_Intrinsics_vec256 v8 = v0_6; + Lib_IntVector_Intrinsics_vec256 v9 = v2_6; + Lib_IntVector_Intrinsics_vec256 v10 = v1_6; + Lib_IntVector_Intrinsics_vec256 v11 = v3_6; + Lib_IntVector_Intrinsics_vec256 v12 = v4_6; + Lib_IntVector_Intrinsics_vec256 v13 = v6_6; + Lib_IntVector_Intrinsics_vec256 v14 = v5_6; + Lib_IntVector_Intrinsics_vec256 v15 = v7_6; + k[0U] = v0; + k[1U] = v8; + k[2U] = v1; + k[3U] = v9; + k[4U] = v2; + k[5U] = v10; + k[6U] = v3; + k[7U] = v11; + k[8U] = v4; + k[9U] = v12; + k[10U] = v5; + k[11U] = v13; + k[12U] = v6; + k[13U] = v14; + k[14U] = v7; + k[15U] = v15; + KRML_MAYBE_FOR16(i0, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 + x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]); + Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y);); + } + if (rem1 > (uint32_t)0U) { + uint8_t *uu____2 = out + nb * (uint32_t)512U; + uint8_t *uu____3 = cipher + nb * (uint32_t)512U; + uint8_t plain[512U] = { 0U }; + memcpy(plain, uu____3, rem * sizeof(uint8_t)); + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U }; + chacha20_core_256(k, ctx, nb); + Lib_IntVector_Intrinsics_vec256 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec256 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec256 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec256 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec256 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec256 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec256 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec256 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec256 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec256 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec256 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec256 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec256 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec256 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec256 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec256 v00 = st0; + Lib_IntVector_Intrinsics_vec256 v16 = st1; + Lib_IntVector_Intrinsics_vec256 v20 = st2; + Lib_IntVector_Intrinsics_vec256 v30 = st3; + Lib_IntVector_Intrinsics_vec256 v40 = st4; + Lib_IntVector_Intrinsics_vec256 v50 = st5; + Lib_IntVector_Intrinsics_vec256 v60 = st6; + Lib_IntVector_Intrinsics_vec256 v70 = st7; + Lib_IntVector_Intrinsics_vec256 + v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16); + Lib_IntVector_Intrinsics_vec256 + v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec256 + v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50); + Lib_IntVector_Intrinsics_vec256 + v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); + Lib_IntVector_Intrinsics_vec256 + v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); + Lib_IntVector_Intrinsics_vec256 v0_0 = v0_; + Lib_IntVector_Intrinsics_vec256 v1_0 = v1_; + Lib_IntVector_Intrinsics_vec256 v2_0 = v2_; + Lib_IntVector_Intrinsics_vec256 v3_0 = v3_; + Lib_IntVector_Intrinsics_vec256 v4_0 = v4_; + Lib_IntVector_Intrinsics_vec256 v5_0 = v5_; + Lib_IntVector_Intrinsics_vec256 v6_0 = v6_; + Lib_IntVector_Intrinsics_vec256 v7_0 = v7_; + Lib_IntVector_Intrinsics_vec256 + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); + Lib_IntVector_Intrinsics_vec256 + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec256 + v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); + Lib_IntVector_Intrinsics_vec256 + v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 + v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1; + Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1; + Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1; + Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1; + Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1; + Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1; + Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1; + Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1; + Lib_IntVector_Intrinsics_vec256 + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10); + Lib_IntVector_Intrinsics_vec256 + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10); + Lib_IntVector_Intrinsics_vec256 + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10); + Lib_IntVector_Intrinsics_vec256 + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 + v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2; + Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2; + Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2; + Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2; + Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2; + Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2; + Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2; + Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2; + Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20; + Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20; + Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20; + Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20; + Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20; + Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20; + Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20; + Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20; + Lib_IntVector_Intrinsics_vec256 v0 = v0_3; + Lib_IntVector_Intrinsics_vec256 v1 = v2_3; + Lib_IntVector_Intrinsics_vec256 v2 = v1_3; + Lib_IntVector_Intrinsics_vec256 v3 = v3_3; + Lib_IntVector_Intrinsics_vec256 v4 = v4_3; + Lib_IntVector_Intrinsics_vec256 v5 = v6_3; + Lib_IntVector_Intrinsics_vec256 v6 = v5_3; + Lib_IntVector_Intrinsics_vec256 v7 = v7_3; + Lib_IntVector_Intrinsics_vec256 v01 = st8; + Lib_IntVector_Intrinsics_vec256 v110 = st9; + Lib_IntVector_Intrinsics_vec256 v21 = st10; + Lib_IntVector_Intrinsics_vec256 v31 = st11; + Lib_IntVector_Intrinsics_vec256 v41 = st12; + Lib_IntVector_Intrinsics_vec256 v51 = st13; + Lib_IntVector_Intrinsics_vec256 v61 = st14; + Lib_IntVector_Intrinsics_vec256 v71 = st15; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + Lib_IntVector_Intrinsics_vec256 + v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4; + Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4; + Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4; + Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4; + Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4; + Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4; + Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4; + Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5); + Lib_IntVector_Intrinsics_vec256 + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 + v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11; + Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11; + Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11; + Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11; + Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11; + Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11; + Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11; + Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11; + Lib_IntVector_Intrinsics_vec256 + v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 + v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21; + Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21; + Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21; + Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21; + Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21; + Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21; + Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21; + Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21; + Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22; + Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22; + Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22; + Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22; + Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22; + Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22; + Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22; + Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22; + Lib_IntVector_Intrinsics_vec256 v8 = v0_6; + Lib_IntVector_Intrinsics_vec256 v9 = v2_6; + Lib_IntVector_Intrinsics_vec256 v10 = v1_6; + Lib_IntVector_Intrinsics_vec256 v11 = v3_6; + Lib_IntVector_Intrinsics_vec256 v12 = v4_6; + Lib_IntVector_Intrinsics_vec256 v13 = v6_6; + Lib_IntVector_Intrinsics_vec256 v14 = v5_6; + Lib_IntVector_Intrinsics_vec256 v15 = v7_6; + k[0U] = v0; + k[1U] = v8; + k[2U] = v1; + k[3U] = v9; + k[4U] = v2; + k[5U] = v10; + k[6U] = v3; + k[7U] = v11; + k[8U] = v4; + k[9U] = v12; + k[10U] = v5; + k[11U] = v13; + k[12U] = v6; + k[13U] = v14; + k[14U] = v7; + k[15U] = v15; + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 + x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]); + Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y);); + memcpy(uu____2, plain, rem * sizeof(uint8_t)); + } +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h new file mode 100644 index 0000000000..5e9dccb9e2 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec256.h @@ -0,0 +1,61 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Chacha20_Vec256_H +#define __Hacl_Chacha20_Vec256_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +void +Hacl_Chacha20_Vec256_chacha20_encrypt_256( + uint32_t len, + uint8_t *out, + uint8_t *text, + uint8_t *key, + uint8_t *n, + uint32_t ctr); + +void +Hacl_Chacha20_Vec256_chacha20_decrypt_256( + uint32_t len, + uint8_t *out, + uint8_t *cipher, + uint8_t *key, + uint8_t *n, + uint32_t ctr); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Chacha20_Vec256_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_51.c b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.c new file mode 100644 index 0000000000..09bfc4be83 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.c @@ -0,0 +1,296 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "internal/Hacl_Curve25519_51.h" + +#include "internal/Hacl_Krmllib.h" + +static const uint8_t g25519[32U] = { (uint8_t)9U }; + +static void +point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, FStar_UInt128_uint128 *tmp2) +{ + uint64_t *nq = p01_tmp1; + uint64_t *nq_p1 = p01_tmp1 + (uint32_t)10U; + uint64_t *tmp1 = p01_tmp1 + (uint32_t)20U; + uint64_t *x1 = q; + uint64_t *x2 = nq; + uint64_t *z2 = nq + (uint32_t)5U; + uint64_t *z3 = nq_p1 + (uint32_t)5U; + uint64_t *a = tmp1; + uint64_t *b = tmp1 + (uint32_t)5U; + uint64_t *ab = tmp1; + uint64_t *dc = tmp1 + (uint32_t)10U; + Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2); + Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2); + uint64_t *x3 = nq_p1; + uint64_t *z31 = nq_p1 + (uint32_t)5U; + uint64_t *d0 = dc; + uint64_t *c0 = dc + (uint32_t)5U; + Hacl_Impl_Curve25519_Field51_fadd(c0, x3, z31); + Hacl_Impl_Curve25519_Field51_fsub(d0, x3, z31); + Hacl_Impl_Curve25519_Field51_fmul2(dc, dc, ab, tmp2); + Hacl_Impl_Curve25519_Field51_fadd(x3, d0, c0); + Hacl_Impl_Curve25519_Field51_fsub(z31, d0, c0); + uint64_t *a1 = tmp1; + uint64_t *b1 = tmp1 + (uint32_t)5U; + uint64_t *d = tmp1 + (uint32_t)10U; + uint64_t *c = tmp1 + (uint32_t)15U; + uint64_t *ab1 = tmp1; + uint64_t *dc1 = tmp1 + (uint32_t)10U; + Hacl_Impl_Curve25519_Field51_fsqr2(dc1, ab1, tmp2); + Hacl_Impl_Curve25519_Field51_fsqr2(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + a1[4U] = c[4U]; + Hacl_Impl_Curve25519_Field51_fsub(c, d, c); + Hacl_Impl_Curve25519_Field51_fmul1(b1, c, (uint64_t)121665U); + Hacl_Impl_Curve25519_Field51_fadd(b1, b1, d); + Hacl_Impl_Curve25519_Field51_fmul2(nq, dc1, ab1, tmp2); + Hacl_Impl_Curve25519_Field51_fmul(z3, z3, x1, tmp2); +} + +static void +point_double(uint64_t *nq, uint64_t *tmp1, FStar_UInt128_uint128 *tmp2) +{ + uint64_t *x2 = nq; + uint64_t *z2 = nq + (uint32_t)5U; + uint64_t *a = tmp1; + uint64_t *b = tmp1 + (uint32_t)5U; + uint64_t *d = tmp1 + (uint32_t)10U; + uint64_t *c = tmp1 + (uint32_t)15U; + uint64_t *ab = tmp1; + uint64_t *dc = tmp1 + (uint32_t)10U; + Hacl_Impl_Curve25519_Field51_fadd(a, x2, z2); + Hacl_Impl_Curve25519_Field51_fsub(b, x2, z2); + Hacl_Impl_Curve25519_Field51_fsqr2(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + a[4U] = c[4U]; + Hacl_Impl_Curve25519_Field51_fsub(c, d, c); + Hacl_Impl_Curve25519_Field51_fmul1(b, c, (uint64_t)121665U); + Hacl_Impl_Curve25519_Field51_fadd(b, b, d); + Hacl_Impl_Curve25519_Field51_fmul2(nq, dc, ab, tmp2); +} + +static void +montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init) +{ + FStar_UInt128_uint128 tmp2[10U]; + for (uint32_t _i = 0U; _i < (uint32_t)10U; ++_i) + tmp2[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); + uint64_t p01_tmp1_swap[41U] = { 0U }; + uint64_t *p0 = p01_tmp1_swap; + uint64_t *p01 = p01_tmp1_swap; + uint64_t *p03 = p01; + uint64_t *p11 = p01 + (uint32_t)10U; + memcpy(p11, init, (uint32_t)10U * sizeof(uint64_t)); + uint64_t *x0 = p03; + uint64_t *z0 = p03 + (uint32_t)5U; + x0[0U] = (uint64_t)1U; + x0[1U] = (uint64_t)0U; + x0[2U] = (uint64_t)0U; + x0[3U] = (uint64_t)0U; + x0[4U] = (uint64_t)0U; + z0[0U] = (uint64_t)0U; + z0[1U] = (uint64_t)0U; + z0[2U] = (uint64_t)0U; + z0[3U] = (uint64_t)0U; + z0[4U] = (uint64_t)0U; + uint64_t *p01_tmp1 = p01_tmp1_swap; + uint64_t *p01_tmp11 = p01_tmp1_swap; + uint64_t *nq1 = p01_tmp1_swap; + uint64_t *nq_p11 = p01_tmp1_swap + (uint32_t)10U; + uint64_t *swap = p01_tmp1_swap + (uint32_t)40U; + Hacl_Impl_Curve25519_Field51_cswap2((uint64_t)1U, nq1, nq_p11); + point_add_and_double(init, p01_tmp11, tmp2); + swap[0U] = (uint64_t)1U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)251U; i++) { + uint64_t *p01_tmp12 = p01_tmp1_swap; + uint64_t *swap1 = p01_tmp1_swap + (uint32_t)40U; + uint64_t *nq2 = p01_tmp12; + uint64_t *nq_p12 = p01_tmp12 + (uint32_t)10U; + uint64_t + bit = + (uint64_t)(key[((uint32_t)253U - i) / (uint32_t)8U] >> ((uint32_t)253U - i) % (uint32_t)8U & (uint8_t)1U); + uint64_t sw = swap1[0U] ^ bit; + Hacl_Impl_Curve25519_Field51_cswap2(sw, nq2, nq_p12); + point_add_and_double(init, p01_tmp12, tmp2); + swap1[0U] = bit; + } + uint64_t sw = swap[0U]; + Hacl_Impl_Curve25519_Field51_cswap2(sw, nq1, nq_p11); + uint64_t *nq10 = p01_tmp1; + uint64_t *tmp1 = p01_tmp1 + (uint32_t)20U; + point_double(nq10, tmp1, tmp2); + point_double(nq10, tmp1, tmp2); + point_double(nq10, tmp1, tmp2); + memcpy(out, p0, (uint32_t)10U * sizeof(uint64_t)); +} + +void +Hacl_Curve25519_51_fsquare_times( + uint64_t *o, + uint64_t *inp, + FStar_UInt128_uint128 *tmp, + uint32_t n) +{ + Hacl_Impl_Curve25519_Field51_fsqr(o, inp, tmp); + for (uint32_t i = (uint32_t)0U; i < n - (uint32_t)1U; i++) { + Hacl_Impl_Curve25519_Field51_fsqr(o, o, tmp); + } +} + +void +Hacl_Curve25519_51_finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tmp) +{ + uint64_t t1[20U] = { 0U }; + uint64_t *a1 = t1; + uint64_t *b1 = t1 + (uint32_t)5U; + uint64_t *t010 = t1 + (uint32_t)15U; + FStar_UInt128_uint128 *tmp10 = tmp; + Hacl_Curve25519_51_fsquare_times(a1, i, tmp10, (uint32_t)1U); + Hacl_Curve25519_51_fsquare_times(t010, a1, tmp10, (uint32_t)2U); + Hacl_Impl_Curve25519_Field51_fmul(b1, t010, i, tmp); + Hacl_Impl_Curve25519_Field51_fmul(a1, b1, a1, tmp); + Hacl_Curve25519_51_fsquare_times(t010, a1, tmp10, (uint32_t)1U); + Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp); + Hacl_Curve25519_51_fsquare_times(t010, b1, tmp10, (uint32_t)5U); + Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp); + uint64_t *b10 = t1 + (uint32_t)5U; + uint64_t *c10 = t1 + (uint32_t)10U; + uint64_t *t011 = t1 + (uint32_t)15U; + FStar_UInt128_uint128 *tmp11 = tmp; + Hacl_Curve25519_51_fsquare_times(t011, b10, tmp11, (uint32_t)10U); + Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp); + Hacl_Curve25519_51_fsquare_times(t011, c10, tmp11, (uint32_t)20U); + Hacl_Impl_Curve25519_Field51_fmul(t011, t011, c10, tmp); + Hacl_Curve25519_51_fsquare_times(t011, t011, tmp11, (uint32_t)10U); + Hacl_Impl_Curve25519_Field51_fmul(b10, t011, b10, tmp); + Hacl_Curve25519_51_fsquare_times(t011, b10, tmp11, (uint32_t)50U); + Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp); + uint64_t *b11 = t1 + (uint32_t)5U; + uint64_t *c1 = t1 + (uint32_t)10U; + uint64_t *t01 = t1 + (uint32_t)15U; + FStar_UInt128_uint128 *tmp1 = tmp; + Hacl_Curve25519_51_fsquare_times(t01, c1, tmp1, (uint32_t)100U); + Hacl_Impl_Curve25519_Field51_fmul(t01, t01, c1, tmp); + Hacl_Curve25519_51_fsquare_times(t01, t01, tmp1, (uint32_t)50U); + Hacl_Impl_Curve25519_Field51_fmul(t01, t01, b11, tmp); + Hacl_Curve25519_51_fsquare_times(t01, t01, tmp1, (uint32_t)5U); + uint64_t *a = t1; + uint64_t *t0 = t1 + (uint32_t)15U; + Hacl_Impl_Curve25519_Field51_fmul(o, t0, a, tmp); +} + +static void +encode_point(uint8_t *o, uint64_t *i) +{ + uint64_t *x = i; + uint64_t *z = i + (uint32_t)5U; + uint64_t tmp[5U] = { 0U }; + uint64_t u64s[4U] = { 0U }; + FStar_UInt128_uint128 tmp_w[10U]; + for (uint32_t _i = 0U; _i < (uint32_t)10U; ++_i) + tmp_w[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); + Hacl_Curve25519_51_finv(tmp, z, tmp_w); + Hacl_Impl_Curve25519_Field51_fmul(tmp, tmp, x, tmp_w); + Hacl_Impl_Curve25519_Field51_store_felem(u64s, tmp); + KRML_MAYBE_FOR4(i0, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + store64_le(o + i0 * (uint32_t)8U, u64s[i0]);); +} + +void +Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub) +{ + uint64_t init[10U] = { 0U }; + uint64_t tmp[4U] = { 0U }; + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint64_t *os = tmp; + uint8_t *bj = pub + i * (uint32_t)8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + uint64_t tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (uint64_t)0x7fffffffffffffffU; + uint64_t *x = init; + uint64_t *z = init + (uint32_t)5U; + z[0U] = (uint64_t)1U; + z[1U] = (uint64_t)0U; + z[2U] = (uint64_t)0U; + z[3U] = (uint64_t)0U; + z[4U] = (uint64_t)0U; + uint64_t f0l = tmp[0U] & (uint64_t)0x7ffffffffffffU; + uint64_t f0h = tmp[0U] >> (uint32_t)51U; + uint64_t f1l = (tmp[1U] & (uint64_t)0x3fffffffffU) << (uint32_t)13U; + uint64_t f1h = tmp[1U] >> (uint32_t)38U; + uint64_t f2l = (tmp[2U] & (uint64_t)0x1ffffffU) << (uint32_t)26U; + uint64_t f2h = tmp[2U] >> (uint32_t)25U; + uint64_t f3l = (tmp[3U] & (uint64_t)0xfffU) << (uint32_t)39U; + uint64_t f3h = tmp[3U] >> (uint32_t)12U; + x[0U] = f0l; + x[1U] = f0h | f1l; + x[2U] = f1h | f2l; + x[3U] = f2h | f3l; + x[4U] = f3h; + montgomery_ladder(init, priv, init); + encode_point(out, init); +} + +void +Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv) +{ + uint8_t basepoint[32U] = { 0U }; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { + uint8_t *os = basepoint; + uint8_t x = g25519[i]; + os[i] = x; + } + Hacl_Curve25519_51_scalarmult(pub, priv, basepoint); +} + +bool +Hacl_Curve25519_51_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub) +{ + uint8_t zeros[32U] = { 0U }; + Hacl_Curve25519_51_scalarmult(out, priv, pub); + uint8_t res = (uint8_t)255U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { + uint8_t uu____0 = FStar_UInt8_eq_mask(out[i], zeros[i]); + res = uu____0 & res; + } + uint8_t z = res; + bool r = z == (uint8_t)255U; + return !r; +} diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_51.h b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.h new file mode 100644 index 0000000000..2a4494a7af --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_51.h @@ -0,0 +1,50 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Curve25519_51_H +#define __Hacl_Curve25519_51_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" +#include "Hacl_Bignum25519_51.h" + +void Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub); + +void Hacl_Curve25519_51_secret_to_public(uint8_t *pub, uint8_t *priv); + +bool Hacl_Curve25519_51_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Curve25519_51_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_64.c b/security/nss/lib/freebl/verified/Hacl_Curve25519_64.c new file mode 100644 index 0000000000..40144d48dc --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_64.c @@ -0,0 +1,400 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Curve25519_64.h" + +#include "internal/Vale.h" +#include "internal/Hacl_Krmllib.h" +#include "config.h" +#include "curve25519-inline.h" +static inline void +add_scalar0(uint64_t *out, uint64_t *f1, uint64_t f2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + add_scalar(out, f1, f2); +#else + uint64_t uu____0 = add_scalar_e(out, f1, f2); +#endif +} + +static inline void +fadd0(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fadd(out, f1, f2); +#else + uint64_t uu____0 = fadd_e(out, f1, f2); +#endif +} + +static inline void +fsub0(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fsub(out, f1, f2); +#else + uint64_t uu____0 = fsub_e(out, f1, f2); +#endif +} + +static inline void +fmul0(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fmul(out, f1, f2, tmp); +#else + uint64_t uu____0 = fmul_e(tmp, f1, out, f2); +#endif +} + +static inline void +fmul20(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fmul2(out, f1, f2, tmp); +#else + uint64_t uu____0 = fmul2_e(tmp, f1, out, f2); +#endif +} + +static inline void +fmul_scalar0(uint64_t *out, uint64_t *f1, uint64_t f2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fmul_scalar(out, f1, f2); +#else + uint64_t uu____0 = fmul_scalar_e(out, f1, f2); +#endif +} + +static inline void +fsqr0(uint64_t *out, uint64_t *f1, uint64_t *tmp) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fsqr(out, f1, tmp); +#else + uint64_t uu____0 = fsqr_e(tmp, f1, out); +#endif +} + +static inline void +fsqr20(uint64_t *out, uint64_t *f, uint64_t *tmp) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fsqr2(out, f, tmp); +#else + uint64_t uu____0 = fsqr2_e(tmp, f, out); +#endif +} + +static inline void +cswap20(uint64_t bit, uint64_t *p1, uint64_t *p2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + cswap2(bit, p1, p2); +#else + uint64_t uu____0 = cswap2_e(bit, p1, p2); +#endif +} + +static const uint8_t g25519[32U] = { (uint8_t)9U }; + +static void +point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, uint64_t *tmp2) +{ + uint64_t *nq = p01_tmp1; + uint64_t *nq_p1 = p01_tmp1 + (uint32_t)8U; + uint64_t *tmp1 = p01_tmp1 + (uint32_t)16U; + uint64_t *x1 = q; + uint64_t *x2 = nq; + uint64_t *z2 = nq + (uint32_t)4U; + uint64_t *z3 = nq_p1 + (uint32_t)4U; + uint64_t *a = tmp1; + uint64_t *b = tmp1 + (uint32_t)4U; + uint64_t *ab = tmp1; + uint64_t *dc = tmp1 + (uint32_t)8U; + fadd0(a, x2, z2); + fsub0(b, x2, z2); + uint64_t *x3 = nq_p1; + uint64_t *z31 = nq_p1 + (uint32_t)4U; + uint64_t *d0 = dc; + uint64_t *c0 = dc + (uint32_t)4U; + fadd0(c0, x3, z31); + fsub0(d0, x3, z31); + fmul20(dc, dc, ab, tmp2); + fadd0(x3, d0, c0); + fsub0(z31, d0, c0); + uint64_t *a1 = tmp1; + uint64_t *b1 = tmp1 + (uint32_t)4U; + uint64_t *d = tmp1 + (uint32_t)8U; + uint64_t *c = tmp1 + (uint32_t)12U; + uint64_t *ab1 = tmp1; + uint64_t *dc1 = tmp1 + (uint32_t)8U; + fsqr20(dc1, ab1, tmp2); + fsqr20(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + fsub0(c, d, c); + fmul_scalar0(b1, c, (uint64_t)121665U); + fadd0(b1, b1, d); + fmul20(nq, dc1, ab1, tmp2); + fmul0(z3, z3, x1, tmp2); +} + +static void +point_double(uint64_t *nq, uint64_t *tmp1, uint64_t *tmp2) +{ + uint64_t *x2 = nq; + uint64_t *z2 = nq + (uint32_t)4U; + uint64_t *a = tmp1; + uint64_t *b = tmp1 + (uint32_t)4U; + uint64_t *d = tmp1 + (uint32_t)8U; + uint64_t *c = tmp1 + (uint32_t)12U; + uint64_t *ab = tmp1; + uint64_t *dc = tmp1 + (uint32_t)8U; + fadd0(a, x2, z2); + fsub0(b, x2, z2); + fsqr20(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + fsub0(c, d, c); + fmul_scalar0(b, c, (uint64_t)121665U); + fadd0(b, b, d); + fmul20(nq, dc, ab, tmp2); +} + +static void +montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init) +{ + uint64_t tmp2[16U] = { 0U }; + uint64_t p01_tmp1_swap[33U] = { 0U }; + uint64_t *p0 = p01_tmp1_swap; + uint64_t *p01 = p01_tmp1_swap; + uint64_t *p03 = p01; + uint64_t *p11 = p01 + (uint32_t)8U; + memcpy(p11, init, (uint32_t)8U * sizeof(uint64_t)); + uint64_t *x0 = p03; + uint64_t *z0 = p03 + (uint32_t)4U; + x0[0U] = (uint64_t)1U; + x0[1U] = (uint64_t)0U; + x0[2U] = (uint64_t)0U; + x0[3U] = (uint64_t)0U; + z0[0U] = (uint64_t)0U; + z0[1U] = (uint64_t)0U; + z0[2U] = (uint64_t)0U; + z0[3U] = (uint64_t)0U; + uint64_t *p01_tmp1 = p01_tmp1_swap; + uint64_t *p01_tmp11 = p01_tmp1_swap; + uint64_t *nq1 = p01_tmp1_swap; + uint64_t *nq_p11 = p01_tmp1_swap + (uint32_t)8U; + uint64_t *swap = p01_tmp1_swap + (uint32_t)32U; + cswap20((uint64_t)1U, nq1, nq_p11); + point_add_and_double(init, p01_tmp11, tmp2); + swap[0U] = (uint64_t)1U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)251U; i++) { + uint64_t *p01_tmp12 = p01_tmp1_swap; + uint64_t *swap1 = p01_tmp1_swap + (uint32_t)32U; + uint64_t *nq2 = p01_tmp12; + uint64_t *nq_p12 = p01_tmp12 + (uint32_t)8U; + uint64_t + bit = + (uint64_t)(key[((uint32_t)253U - i) / (uint32_t)8U] >> ((uint32_t)253U - i) % (uint32_t)8U & (uint8_t)1U); + uint64_t sw = swap1[0U] ^ bit; + cswap20(sw, nq2, nq_p12); + point_add_and_double(init, p01_tmp12, tmp2); + swap1[0U] = bit; + } + uint64_t sw = swap[0U]; + cswap20(sw, nq1, nq_p11); + uint64_t *nq10 = p01_tmp1; + uint64_t *tmp1 = p01_tmp1 + (uint32_t)16U; + point_double(nq10, tmp1, tmp2); + point_double(nq10, tmp1, tmp2); + point_double(nq10, tmp1, tmp2); + memcpy(out, p0, (uint32_t)8U * sizeof(uint64_t)); +} + +static void +fsquare_times(uint64_t *o, uint64_t *inp, uint64_t *tmp, uint32_t n) +{ + fsqr0(o, inp, tmp); + for (uint32_t i = (uint32_t)0U; i < n - (uint32_t)1U; i++) { + fsqr0(o, o, tmp); + } +} + +static void +finv(uint64_t *o, uint64_t *i, uint64_t *tmp) +{ + uint64_t t1[16U] = { 0U }; + uint64_t *a1 = t1; + uint64_t *b1 = t1 + (uint32_t)4U; + uint64_t *t010 = t1 + (uint32_t)12U; + uint64_t *tmp10 = tmp; + fsquare_times(a1, i, tmp10, (uint32_t)1U); + fsquare_times(t010, a1, tmp10, (uint32_t)2U); + fmul0(b1, t010, i, tmp); + fmul0(a1, b1, a1, tmp); + fsquare_times(t010, a1, tmp10, (uint32_t)1U); + fmul0(b1, t010, b1, tmp); + fsquare_times(t010, b1, tmp10, (uint32_t)5U); + fmul0(b1, t010, b1, tmp); + uint64_t *b10 = t1 + (uint32_t)4U; + uint64_t *c10 = t1 + (uint32_t)8U; + uint64_t *t011 = t1 + (uint32_t)12U; + uint64_t *tmp11 = tmp; + fsquare_times(t011, b10, tmp11, (uint32_t)10U); + fmul0(c10, t011, b10, tmp); + fsquare_times(t011, c10, tmp11, (uint32_t)20U); + fmul0(t011, t011, c10, tmp); + fsquare_times(t011, t011, tmp11, (uint32_t)10U); + fmul0(b10, t011, b10, tmp); + fsquare_times(t011, b10, tmp11, (uint32_t)50U); + fmul0(c10, t011, b10, tmp); + uint64_t *b11 = t1 + (uint32_t)4U; + uint64_t *c1 = t1 + (uint32_t)8U; + uint64_t *t01 = t1 + (uint32_t)12U; + uint64_t *tmp1 = tmp; + fsquare_times(t01, c1, tmp1, (uint32_t)100U); + fmul0(t01, t01, c1, tmp); + fsquare_times(t01, t01, tmp1, (uint32_t)50U); + fmul0(t01, t01, b11, tmp); + fsquare_times(t01, t01, tmp1, (uint32_t)5U); + uint64_t *a = t1; + uint64_t *t0 = t1 + (uint32_t)12U; + fmul0(o, t0, a, tmp); +} + +static void +store_felem(uint64_t *b, uint64_t *f) +{ + uint64_t f30 = f[3U]; + uint64_t top_bit0 = f30 >> (uint32_t)63U; + f[3U] = f30 & (uint64_t)0x7fffffffffffffffU; + add_scalar0(f, f, (uint64_t)19U * top_bit0); + uint64_t f31 = f[3U]; + uint64_t top_bit = f31 >> (uint32_t)63U; + f[3U] = f31 & (uint64_t)0x7fffffffffffffffU; + add_scalar0(f, f, (uint64_t)19U * top_bit); + uint64_t f0 = f[0U]; + uint64_t f1 = f[1U]; + uint64_t f2 = f[2U]; + uint64_t f3 = f[3U]; + uint64_t m0 = FStar_UInt64_gte_mask(f0, (uint64_t)0xffffffffffffffedU); + uint64_t m1 = FStar_UInt64_eq_mask(f1, (uint64_t)0xffffffffffffffffU); + uint64_t m2 = FStar_UInt64_eq_mask(f2, (uint64_t)0xffffffffffffffffU); + uint64_t m3 = FStar_UInt64_eq_mask(f3, (uint64_t)0x7fffffffffffffffU); + uint64_t mask = ((m0 & m1) & m2) & m3; + uint64_t f0_ = f0 - (mask & (uint64_t)0xffffffffffffffedU); + uint64_t f1_ = f1 - (mask & (uint64_t)0xffffffffffffffffU); + uint64_t f2_ = f2 - (mask & (uint64_t)0xffffffffffffffffU); + uint64_t f3_ = f3 - (mask & (uint64_t)0x7fffffffffffffffU); + uint64_t o0 = f0_; + uint64_t o1 = f1_; + uint64_t o2 = f2_; + uint64_t o3 = f3_; + b[0U] = o0; + b[1U] = o1; + b[2U] = o2; + b[3U] = o3; +} + +static void +encode_point(uint8_t *o, uint64_t *i) +{ + uint64_t *x = i; + uint64_t *z = i + (uint32_t)4U; + uint64_t tmp[4U] = { 0U }; + uint64_t u64s[4U] = { 0U }; + uint64_t tmp_w[16U] = { 0U }; + finv(tmp, z, tmp_w); + fmul0(tmp, tmp, x, tmp_w); + store_felem(u64s, tmp); + KRML_MAYBE_FOR4(i0, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + store64_le(o + i0 * (uint32_t)8U, u64s[i0]);); +} + +void +Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub) +{ + uint64_t init[8U] = { 0U }; + uint64_t tmp[4U] = { 0U }; + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint64_t *os = tmp; + uint8_t *bj = pub + i * (uint32_t)8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + uint64_t tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (uint64_t)0x7fffffffffffffffU; + uint64_t *x = init; + uint64_t *z = init + (uint32_t)4U; + z[0U] = (uint64_t)1U; + z[1U] = (uint64_t)0U; + z[2U] = (uint64_t)0U; + z[3U] = (uint64_t)0U; + x[0U] = tmp[0U]; + x[1U] = tmp[1U]; + x[2U] = tmp[2U]; + x[3U] = tmp[3U]; + montgomery_ladder(init, priv, init); + encode_point(out, init); +} + +void +Hacl_Curve25519_64_secret_to_public(uint8_t *pub, uint8_t *priv) +{ + uint8_t basepoint[32U] = { 0U }; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { + uint8_t *os = basepoint; + uint8_t x = g25519[i]; + os[i] = x; + } + Hacl_Curve25519_64_scalarmult(pub, priv, basepoint); +} + +bool +Hacl_Curve25519_64_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub) +{ + uint8_t zeros[32U] = { 0U }; + Hacl_Curve25519_64_scalarmult(out, priv, pub); + uint8_t res = (uint8_t)255U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { + uint8_t uu____0 = FStar_UInt8_eq_mask(out[i], zeros[i]); + res = uu____0 & res; + } + uint8_t z = res; + bool r = z == (uint8_t)255U; + return !r; +} diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519_64.h b/security/nss/lib/freebl/verified/Hacl_Curve25519_64.h new file mode 100644 index 0000000000..e9dec2b9a0 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Curve25519_64.h @@ -0,0 +1,49 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Curve25519_64_H +#define __Hacl_Curve25519_64_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +void Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub); + +void Hacl_Curve25519_64_secret_to_public(uint8_t *pub, uint8_t *priv); + +bool Hacl_Curve25519_64_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Curve25519_64_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Krmllib.h b/security/nss/lib/freebl/verified/Hacl_Krmllib.h new file mode 100644 index 0000000000..453492b5cc --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Krmllib.h @@ -0,0 +1,81 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Krmllib_H +#define __Hacl_Krmllib_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +static inline uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b); + +static inline uint32_t FStar_UInt32_gte_mask(uint32_t a, uint32_t b); + +static inline uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b); + +static inline uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b); + +static inline uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b); + +static inline uint16_t FStar_UInt16_eq_mask(uint16_t a, uint16_t b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s); + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s); + +static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a); + +static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a); + +static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y); + +static inline void store128_be(uint8_t *x0, FStar_UInt128_uint128 x1); + +static inline FStar_UInt128_uint128 load128_be(uint8_t *x0); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Krmllib_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_128.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.c new file mode 100644 index 0000000000..ae8570c751 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.c @@ -0,0 +1,1616 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "internal/Hacl_Poly1305_128.h" + +void +Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b) +{ + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(b); + Lib_IntVector_Intrinsics_vec128 + b2 = Lib_IntVector_Intrinsics_vec128_load64_le(b + (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2); + Lib_IntVector_Intrinsics_vec128 hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + f00 = + Lib_IntVector_Intrinsics_vec128_and(lo, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f10 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f02 = f00; + Lib_IntVector_Intrinsics_vec128 f12 = f10; + Lib_IntVector_Intrinsics_vec128 f22 = f20; + Lib_IntVector_Intrinsics_vec128 f32 = f30; + Lib_IntVector_Intrinsics_vec128 f42 = f40; + e[0U] = f02; + e[1U] = f12; + e[2U] = f22; + e[3U] = f32; + e[4U] = f42; + uint64_t b10 = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b10); + Lib_IntVector_Intrinsics_vec128 f43 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f43, mask); + Lib_IntVector_Intrinsics_vec128 acc0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 acc1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 acc2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 acc3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 acc4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 e0 = e[0U]; + Lib_IntVector_Intrinsics_vec128 e1 = e[1U]; + Lib_IntVector_Intrinsics_vec128 e2 = e[2U]; + Lib_IntVector_Intrinsics_vec128 e3 = e[3U]; + Lib_IntVector_Intrinsics_vec128 e4 = e[4U]; + Lib_IntVector_Intrinsics_vec128 + f0 = Lib_IntVector_Intrinsics_vec128_insert64(acc0, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 + f1 = Lib_IntVector_Intrinsics_vec128_insert64(acc1, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 + f2 = Lib_IntVector_Intrinsics_vec128_insert64(acc2, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 + f3 = Lib_IntVector_Intrinsics_vec128_insert64(acc3, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 + f4 = Lib_IntVector_Intrinsics_vec128_insert64(acc4, (uint64_t)0U, (uint32_t)1U); + Lib_IntVector_Intrinsics_vec128 f01 = Lib_IntVector_Intrinsics_vec128_add64(f0, e0); + Lib_IntVector_Intrinsics_vec128 f11 = Lib_IntVector_Intrinsics_vec128_add64(f1, e1); + Lib_IntVector_Intrinsics_vec128 f21 = Lib_IntVector_Intrinsics_vec128_add64(f2, e2); + Lib_IntVector_Intrinsics_vec128 f31 = Lib_IntVector_Intrinsics_vec128_add64(f3, e3); + Lib_IntVector_Intrinsics_vec128 f41 = Lib_IntVector_Intrinsics_vec128_add64(f4, e4); + Lib_IntVector_Intrinsics_vec128 acc01 = f01; + Lib_IntVector_Intrinsics_vec128 acc11 = f11; + Lib_IntVector_Intrinsics_vec128 acc21 = f21; + Lib_IntVector_Intrinsics_vec128 acc31 = f31; + Lib_IntVector_Intrinsics_vec128 acc41 = f41; + acc[0U] = acc01; + acc[1U] = acc11; + acc[2U] = acc21; + acc[3U] = acc31; + acc[4U] = acc41; +} + +void +Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize( + Lib_IntVector_Intrinsics_vec128 *out, + Lib_IntVector_Intrinsics_vec128 *p) +{ + Lib_IntVector_Intrinsics_vec128 *r = p; + Lib_IntVector_Intrinsics_vec128 *r2 = p + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec128 a0 = out[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = out[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = out[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = out[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = out[4U]; + Lib_IntVector_Intrinsics_vec128 r10 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r11 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r12 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r13 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r14 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r20 = r2[0U]; + Lib_IntVector_Intrinsics_vec128 r21 = r2[1U]; + Lib_IntVector_Intrinsics_vec128 r22 = r2[2U]; + Lib_IntVector_Intrinsics_vec128 r23 = r2[3U]; + Lib_IntVector_Intrinsics_vec128 r24 = r2[4U]; + Lib_IntVector_Intrinsics_vec128 + r201 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r20, r10); + Lib_IntVector_Intrinsics_vec128 + r211 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r21, r11); + Lib_IntVector_Intrinsics_vec128 + r221 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r22, r12); + Lib_IntVector_Intrinsics_vec128 + r231 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r23, r13); + Lib_IntVector_Intrinsics_vec128 + r241 = Lib_IntVector_Intrinsics_vec128_interleave_low64(r24, r14); + Lib_IntVector_Intrinsics_vec128 + r251 = Lib_IntVector_Intrinsics_vec128_smul64(r211, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 + r252 = Lib_IntVector_Intrinsics_vec128_smul64(r221, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 + r253 = Lib_IntVector_Intrinsics_vec128_smul64(r231, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 + r254 = Lib_IntVector_Intrinsics_vec128_smul64(r241, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_mul64(r201, a0); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_mul64(r211, a0); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_mul64(r221, a0); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_mul64(r231, a0); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_mul64(r241, a0); + Lib_IntVector_Intrinsics_vec128 + a02 = + Lib_IntVector_Intrinsics_vec128_add64(a01, + Lib_IntVector_Intrinsics_vec128_mul64(r254, a1)); + Lib_IntVector_Intrinsics_vec128 + a12 = + Lib_IntVector_Intrinsics_vec128_add64(a11, + Lib_IntVector_Intrinsics_vec128_mul64(r201, a1)); + Lib_IntVector_Intrinsics_vec128 + a22 = + Lib_IntVector_Intrinsics_vec128_add64(a21, + Lib_IntVector_Intrinsics_vec128_mul64(r211, a1)); + Lib_IntVector_Intrinsics_vec128 + a32 = + Lib_IntVector_Intrinsics_vec128_add64(a31, + Lib_IntVector_Intrinsics_vec128_mul64(r221, a1)); + Lib_IntVector_Intrinsics_vec128 + a42 = + Lib_IntVector_Intrinsics_vec128_add64(a41, + Lib_IntVector_Intrinsics_vec128_mul64(r231, a1)); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r253, a2)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r254, a2)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r201, a2)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r211, a2)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r221, a2)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r252, a3)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r253, a3)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r254, a3)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r201, a3)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r211, a3)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r251, a4)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r252, a4)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r253, a4)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r254, a4)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r201, a4)); + Lib_IntVector_Intrinsics_vec128 t0 = a05; + Lib_IntVector_Intrinsics_vec128 t1 = a15; + Lib_IntVector_Intrinsics_vec128 t2 = a25; + Lib_IntVector_Intrinsics_vec128 t3 = a35; + Lib_IntVector_Intrinsics_vec128 t4 = a45; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o10 = x12; + Lib_IntVector_Intrinsics_vec128 o20 = x21; + Lib_IntVector_Intrinsics_vec128 o30 = x32; + Lib_IntVector_Intrinsics_vec128 o40 = x42; + Lib_IntVector_Intrinsics_vec128 + o01 = + Lib_IntVector_Intrinsics_vec128_add64(o0, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o0, o0)); + Lib_IntVector_Intrinsics_vec128 + o11 = + Lib_IntVector_Intrinsics_vec128_add64(o10, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o10, o10)); + Lib_IntVector_Intrinsics_vec128 + o21 = + Lib_IntVector_Intrinsics_vec128_add64(o20, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o20, o20)); + Lib_IntVector_Intrinsics_vec128 + o31 = + Lib_IntVector_Intrinsics_vec128_add64(o30, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o30, o30)); + Lib_IntVector_Intrinsics_vec128 + o41 = + Lib_IntVector_Intrinsics_vec128_add64(o40, + Lib_IntVector_Intrinsics_vec128_interleave_high64(o40, o40)); + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(o01, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l0 = Lib_IntVector_Intrinsics_vec128_add64(o11, c0); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(o21, c1); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(o31, c2); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(o41, c3); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + o00 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 o1 = tmp1; + Lib_IntVector_Intrinsics_vec128 o2 = tmp2; + Lib_IntVector_Intrinsics_vec128 o3 = tmp3; + Lib_IntVector_Intrinsics_vec128 o4 = tmp4; + out[0U] = o00; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +void +Hacl_Poly1305_128_poly1305_init(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *key) +{ + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + uint8_t *kr = key; + acc[0U] = Lib_IntVector_Intrinsics_vec128_zero; + acc[1U] = Lib_IntVector_Intrinsics_vec128_zero; + acc[2U] = Lib_IntVector_Intrinsics_vec128_zero; + acc[3U] = Lib_IntVector_Intrinsics_vec128_zero; + acc[4U] = Lib_IntVector_Intrinsics_vec128_zero; + uint64_t u0 = load64_le(kr); + uint64_t lo = u0; + uint64_t u = load64_le(kr + (uint32_t)8U); + uint64_t hi = u; + uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU; + uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU; + uint64_t lo1 = lo & mask0; + uint64_t hi1 = hi & mask1; + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *rn = pre + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec128 *rn_5 = pre + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec128 r_vec0 = Lib_IntVector_Intrinsics_vec128_load64(lo1); + Lib_IntVector_Intrinsics_vec128 r_vec1 = Lib_IntVector_Intrinsics_vec128_load64(hi1); + Lib_IntVector_Intrinsics_vec128 + f00 = + Lib_IntVector_Intrinsics_vec128_and(r_vec0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f15 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(r_vec1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(r_vec1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f0 = f00; + Lib_IntVector_Intrinsics_vec128 f1 = f15; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f4 = f40; + r[0U] = f0; + r[1U] = f1; + r[2U] = f2; + r[3U] = f3; + r[4U] = f4; + Lib_IntVector_Intrinsics_vec128 f200 = r[0U]; + Lib_IntVector_Intrinsics_vec128 f210 = r[1U]; + Lib_IntVector_Intrinsics_vec128 f220 = r[2U]; + Lib_IntVector_Intrinsics_vec128 f230 = r[3U]; + Lib_IntVector_Intrinsics_vec128 f240 = r[4U]; + r5[0U] = Lib_IntVector_Intrinsics_vec128_smul64(f200, (uint64_t)5U); + r5[1U] = Lib_IntVector_Intrinsics_vec128_smul64(f210, (uint64_t)5U); + r5[2U] = Lib_IntVector_Intrinsics_vec128_smul64(f220, (uint64_t)5U); + r5[3U] = Lib_IntVector_Intrinsics_vec128_smul64(f230, (uint64_t)5U); + r5[4U] = Lib_IntVector_Intrinsics_vec128_smul64(f240, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = r[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = r[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = r[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = r[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = r[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec128 + a01 = + Lib_IntVector_Intrinsics_vec128_add64(a0, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f11)); + Lib_IntVector_Intrinsics_vec128 + a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, Lib_IntVector_Intrinsics_vec128_mul64(r0, f11)); + Lib_IntVector_Intrinsics_vec128 + a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, Lib_IntVector_Intrinsics_vec128_mul64(r1, f11)); + Lib_IntVector_Intrinsics_vec128 + a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, Lib_IntVector_Intrinsics_vec128_mul64(r2, f11)); + Lib_IntVector_Intrinsics_vec128 + a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, Lib_IntVector_Intrinsics_vec128_mul64(r3, f11)); + Lib_IntVector_Intrinsics_vec128 + a02 = + Lib_IntVector_Intrinsics_vec128_add64(a01, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f12)); + Lib_IntVector_Intrinsics_vec128 + a12 = + Lib_IntVector_Intrinsics_vec128_add64(a11, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f12)); + Lib_IntVector_Intrinsics_vec128 + a22 = + Lib_IntVector_Intrinsics_vec128_add64(a21, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f12)); + Lib_IntVector_Intrinsics_vec128 + a32 = + Lib_IntVector_Intrinsics_vec128_add64(a31, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f12)); + Lib_IntVector_Intrinsics_vec128 + a42 = + Lib_IntVector_Intrinsics_vec128_add64(a41, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f12)); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f13)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f13)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f13)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f13)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f13)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r51, f14)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f14)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f14)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f14)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f14)); + Lib_IntVector_Intrinsics_vec128 t0 = a04; + Lib_IntVector_Intrinsics_vec128 t1 = a14; + Lib_IntVector_Intrinsics_vec128 t2 = a24; + Lib_IntVector_Intrinsics_vec128 t3 = a34; + Lib_IntVector_Intrinsics_vec128 t4 = a44; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + rn[0U] = o0; + rn[1U] = o1; + rn[2U] = o2; + rn[3U] = o3; + rn[4U] = o4; + Lib_IntVector_Intrinsics_vec128 f201 = rn[0U]; + Lib_IntVector_Intrinsics_vec128 f21 = rn[1U]; + Lib_IntVector_Intrinsics_vec128 f22 = rn[2U]; + Lib_IntVector_Intrinsics_vec128 f23 = rn[3U]; + Lib_IntVector_Intrinsics_vec128 f24 = rn[4U]; + rn_5[0U] = Lib_IntVector_Intrinsics_vec128_smul64(f201, (uint64_t)5U); + rn_5[1U] = Lib_IntVector_Intrinsics_vec128_smul64(f21, (uint64_t)5U); + rn_5[2U] = Lib_IntVector_Intrinsics_vec128_smul64(f22, (uint64_t)5U); + rn_5[3U] = Lib_IntVector_Intrinsics_vec128_smul64(f23, (uint64_t)5U); + rn_5[4U] = Lib_IntVector_Intrinsics_vec128_smul64(f24, (uint64_t)5U); +} + +void +Hacl_Poly1305_128_poly1305_update1(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *text) +{ + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint64_t u0 = load64_le(text); + uint64_t lo = u0; + uint64_t u = load64_le(text + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t0 = a06; + Lib_IntVector_Intrinsics_vec128 t1 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t0, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; +} + +void +Hacl_Poly1305_128_poly1305_update( + Lib_IntVector_Intrinsics_vec128 *ctx, + uint32_t len, + uint8_t *text) +{ + Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + uint32_t sz_block = (uint32_t)32U; + uint32_t len0 = len / sz_block * sz_block; + uint8_t *t0 = text; + if (len0 > (uint32_t)0U) { + uint32_t bs = (uint32_t)32U; + uint8_t *text0 = t0; + Hacl_Impl_Poly1305_Field32xN_128_load_acc2(acc, text0); + uint32_t len1 = len0 - bs; + uint8_t *text1 = t0 + bs; + uint32_t nb = len1 / bs; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = text1 + i * bs; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(block); + Lib_IntVector_Intrinsics_vec128 + b2 = Lib_IntVector_Intrinsics_vec128_load64_le(block + (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 lo = Lib_IntVector_Intrinsics_vec128_interleave_low64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + hi = Lib_IntVector_Intrinsics_vec128_interleave_high64(b1, b2); + Lib_IntVector_Intrinsics_vec128 + f00 = + Lib_IntVector_Intrinsics_vec128_and(lo, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f15 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f25 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(lo, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(hi, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(hi, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(hi, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f0 = f00; + Lib_IntVector_Intrinsics_vec128 f1 = f15; + Lib_IntVector_Intrinsics_vec128 f2 = f25; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f0; + e[1U] = f1; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *rn = pre + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec128 *rn5 = pre + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec128 r0 = rn[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = rn[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = rn[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = rn[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = rn[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = rn5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = rn5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = rn5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = rn5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 f110 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 f120 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 f130 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 f140 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = Lib_IntVector_Intrinsics_vec128_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec128 a1 = Lib_IntVector_Intrinsics_vec128_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec128 a2 = Lib_IntVector_Intrinsics_vec128_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec128 a3 = Lib_IntVector_Intrinsics_vec128_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec128 a4 = Lib_IntVector_Intrinsics_vec128_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec128 + a01 = + Lib_IntVector_Intrinsics_vec128_add64(a0, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f110)); + Lib_IntVector_Intrinsics_vec128 + a11 = + Lib_IntVector_Intrinsics_vec128_add64(a1, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f110)); + Lib_IntVector_Intrinsics_vec128 + a21 = + Lib_IntVector_Intrinsics_vec128_add64(a2, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f110)); + Lib_IntVector_Intrinsics_vec128 + a31 = + Lib_IntVector_Intrinsics_vec128_add64(a3, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f110)); + Lib_IntVector_Intrinsics_vec128 + a41 = + Lib_IntVector_Intrinsics_vec128_add64(a4, + Lib_IntVector_Intrinsics_vec128_mul64(r3, f110)); + Lib_IntVector_Intrinsics_vec128 + a02 = + Lib_IntVector_Intrinsics_vec128_add64(a01, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f120)); + Lib_IntVector_Intrinsics_vec128 + a12 = + Lib_IntVector_Intrinsics_vec128_add64(a11, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f120)); + Lib_IntVector_Intrinsics_vec128 + a22 = + Lib_IntVector_Intrinsics_vec128_add64(a21, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f120)); + Lib_IntVector_Intrinsics_vec128 + a32 = + Lib_IntVector_Intrinsics_vec128_add64(a31, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f120)); + Lib_IntVector_Intrinsics_vec128 + a42 = + Lib_IntVector_Intrinsics_vec128_add64(a41, + Lib_IntVector_Intrinsics_vec128_mul64(r2, f120)); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f130)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f130)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f130)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f130)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r1, f130)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r51, f140)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r52, f140)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r53, f140)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r54, f140)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r0, f140)); + Lib_IntVector_Intrinsics_vec128 t01 = a04; + Lib_IntVector_Intrinsics_vec128 t1 = a14; + Lib_IntVector_Intrinsics_vec128 t2 = a24; + Lib_IntVector_Intrinsics_vec128 t3 = a34; + Lib_IntVector_Intrinsics_vec128 t4 = a44; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t1, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o00 = x02; + Lib_IntVector_Intrinsics_vec128 o10 = x12; + Lib_IntVector_Intrinsics_vec128 o20 = x21; + Lib_IntVector_Intrinsics_vec128 o30 = x32; + Lib_IntVector_Intrinsics_vec128 o40 = x42; + acc[0U] = o00; + acc[1U] = o10; + acc[2U] = o20; + acc[3U] = o30; + acc[4U] = o40; + Lib_IntVector_Intrinsics_vec128 f100 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 f20 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f21 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f22 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f23 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f24 = e[4U]; + Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_add64(f100, f20); + Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_add64(f11, f21); + Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_add64(f12, f22); + Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_add64(f13, f23); + Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_add64(f14, f24); + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize(acc, pre); + } + uint32_t len1 = len - len0; + uint8_t *t1 = text + len0; + uint32_t nb = len1 / (uint32_t)16U; + uint32_t rem = len1 % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = t1 + i * (uint32_t)16U; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec128_or(f4, mask); + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t01 = a06; + Lib_IntVector_Intrinsics_vec128 t11 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + if (rem > (uint32_t)0U) { + uint8_t *last = t1 + nb * (uint32_t)16U; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem * sizeof(uint8_t)); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec128 f0 = Lib_IntVector_Intrinsics_vec128_load64(lo); + Lib_IntVector_Intrinsics_vec128 f1 = Lib_IntVector_Intrinsics_vec128_load64(hi); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_and(f0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f110 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f20 = + Lib_IntVector_Intrinsics_vec128_or(Lib_IntVector_Intrinsics_vec128_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec128_shift_left64(Lib_IntVector_Intrinsics_vec128_and(f1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec128 + f30 = + Lib_IntVector_Intrinsics_vec128_and(Lib_IntVector_Intrinsics_vec128_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + f40 = Lib_IntVector_Intrinsics_vec128_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec128 f01 = f010; + Lib_IntVector_Intrinsics_vec128 f111 = f110; + Lib_IntVector_Intrinsics_vec128 f2 = f20; + Lib_IntVector_Intrinsics_vec128 f3 = f30; + Lib_IntVector_Intrinsics_vec128 f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem * (uint32_t)8U % (uint32_t)26U; + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_load64(b); + Lib_IntVector_Intrinsics_vec128 fi = e[rem * (uint32_t)8U / (uint32_t)26U]; + e[rem * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec128_or(fi, mask); + Lib_IntVector_Intrinsics_vec128 *r = pre; + Lib_IntVector_Intrinsics_vec128 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec128 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec128 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec128 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec128 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec128 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec128 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec128 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec128 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec128 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec128 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec128 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec128 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec128 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec128 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec128 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 a01 = Lib_IntVector_Intrinsics_vec128_add64(a0, f10); + Lib_IntVector_Intrinsics_vec128 a11 = Lib_IntVector_Intrinsics_vec128_add64(a1, f11); + Lib_IntVector_Intrinsics_vec128 a21 = Lib_IntVector_Intrinsics_vec128_add64(a2, f12); + Lib_IntVector_Intrinsics_vec128 a31 = Lib_IntVector_Intrinsics_vec128_add64(a3, f13); + Lib_IntVector_Intrinsics_vec128 a41 = Lib_IntVector_Intrinsics_vec128_add64(a4, f14); + Lib_IntVector_Intrinsics_vec128 a02 = Lib_IntVector_Intrinsics_vec128_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec128 a12 = Lib_IntVector_Intrinsics_vec128_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec128 a22 = Lib_IntVector_Intrinsics_vec128_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec128 a32 = Lib_IntVector_Intrinsics_vec128_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec128 a42 = Lib_IntVector_Intrinsics_vec128_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec128 + a03 = + Lib_IntVector_Intrinsics_vec128_add64(a02, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec128 + a13 = + Lib_IntVector_Intrinsics_vec128_add64(a12, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec128 + a23 = + Lib_IntVector_Intrinsics_vec128_add64(a22, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec128 + a33 = + Lib_IntVector_Intrinsics_vec128_add64(a32, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec128 + a43 = + Lib_IntVector_Intrinsics_vec128_add64(a42, + Lib_IntVector_Intrinsics_vec128_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec128 + a04 = + Lib_IntVector_Intrinsics_vec128_add64(a03, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec128 + a14 = + Lib_IntVector_Intrinsics_vec128_add64(a13, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec128 + a24 = + Lib_IntVector_Intrinsics_vec128_add64(a23, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec128 + a34 = + Lib_IntVector_Intrinsics_vec128_add64(a33, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec128 + a44 = + Lib_IntVector_Intrinsics_vec128_add64(a43, + Lib_IntVector_Intrinsics_vec128_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec128 + a05 = + Lib_IntVector_Intrinsics_vec128_add64(a04, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec128 + a15 = + Lib_IntVector_Intrinsics_vec128_add64(a14, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec128 + a25 = + Lib_IntVector_Intrinsics_vec128_add64(a24, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec128 + a35 = + Lib_IntVector_Intrinsics_vec128_add64(a34, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec128 + a45 = + Lib_IntVector_Intrinsics_vec128_add64(a44, + Lib_IntVector_Intrinsics_vec128_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec128 + a06 = + Lib_IntVector_Intrinsics_vec128_add64(a05, + Lib_IntVector_Intrinsics_vec128_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec128 + a16 = + Lib_IntVector_Intrinsics_vec128_add64(a15, + Lib_IntVector_Intrinsics_vec128_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec128 + a26 = + Lib_IntVector_Intrinsics_vec128_add64(a25, + Lib_IntVector_Intrinsics_vec128_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec128 + a36 = + Lib_IntVector_Intrinsics_vec128_add64(a35, + Lib_IntVector_Intrinsics_vec128_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec128 + a46 = + Lib_IntVector_Intrinsics_vec128_add64(a45, + Lib_IntVector_Intrinsics_vec128_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec128 t01 = a06; + Lib_IntVector_Intrinsics_vec128 t11 = a16; + Lib_IntVector_Intrinsics_vec128 t2 = a26; + Lib_IntVector_Intrinsics_vec128 t3 = a36; + Lib_IntVector_Intrinsics_vec128 t4 = a46; + Lib_IntVector_Intrinsics_vec128 + mask26 = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + z0 = Lib_IntVector_Intrinsics_vec128_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z1 = Lib_IntVector_Intrinsics_vec128_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_and(t01, mask26); + Lib_IntVector_Intrinsics_vec128 x3 = Lib_IntVector_Intrinsics_vec128_and(t3, mask26); + Lib_IntVector_Intrinsics_vec128 x1 = Lib_IntVector_Intrinsics_vec128_add64(t11, z0); + Lib_IntVector_Intrinsics_vec128 x4 = Lib_IntVector_Intrinsics_vec128_add64(t4, z1); + Lib_IntVector_Intrinsics_vec128 + z01 = Lib_IntVector_Intrinsics_vec128_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z11 = Lib_IntVector_Intrinsics_vec128_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + t = Lib_IntVector_Intrinsics_vec128_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec128 z12 = Lib_IntVector_Intrinsics_vec128_add64(z11, t); + Lib_IntVector_Intrinsics_vec128 x11 = Lib_IntVector_Intrinsics_vec128_and(x1, mask26); + Lib_IntVector_Intrinsics_vec128 x41 = Lib_IntVector_Intrinsics_vec128_and(x4, mask26); + Lib_IntVector_Intrinsics_vec128 x2 = Lib_IntVector_Intrinsics_vec128_add64(t2, z01); + Lib_IntVector_Intrinsics_vec128 x01 = Lib_IntVector_Intrinsics_vec128_add64(x0, z12); + Lib_IntVector_Intrinsics_vec128 + z02 = Lib_IntVector_Intrinsics_vec128_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + z13 = Lib_IntVector_Intrinsics_vec128_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x21 = Lib_IntVector_Intrinsics_vec128_and(x2, mask26); + Lib_IntVector_Intrinsics_vec128 x02 = Lib_IntVector_Intrinsics_vec128_and(x01, mask26); + Lib_IntVector_Intrinsics_vec128 x31 = Lib_IntVector_Intrinsics_vec128_add64(x3, z02); + Lib_IntVector_Intrinsics_vec128 x12 = Lib_IntVector_Intrinsics_vec128_add64(x11, z13); + Lib_IntVector_Intrinsics_vec128 + z03 = Lib_IntVector_Intrinsics_vec128_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 x32 = Lib_IntVector_Intrinsics_vec128_and(x31, mask26); + Lib_IntVector_Intrinsics_vec128 x42 = Lib_IntVector_Intrinsics_vec128_add64(x41, z03); + Lib_IntVector_Intrinsics_vec128 o0 = x02; + Lib_IntVector_Intrinsics_vec128 o1 = x12; + Lib_IntVector_Intrinsics_vec128 o2 = x21; + Lib_IntVector_Intrinsics_vec128 o3 = x32; + Lib_IntVector_Intrinsics_vec128 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +void +Hacl_Poly1305_128_poly1305_finish( + uint8_t *tag, + uint8_t *key, + Lib_IntVector_Intrinsics_vec128 *ctx) +{ + Lib_IntVector_Intrinsics_vec128 *acc = ctx; + uint8_t *ks = key + (uint32_t)16U; + Lib_IntVector_Intrinsics_vec128 f0 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 f13 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 f23 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 f33 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 f40 = acc[4U]; + Lib_IntVector_Intrinsics_vec128 + l0 = Lib_IntVector_Intrinsics_vec128_add64(f0, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp00 = + Lib_IntVector_Intrinsics_vec128_and(l0, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c00 = Lib_IntVector_Intrinsics_vec128_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l1 = Lib_IntVector_Intrinsics_vec128_add64(f13, c00); + Lib_IntVector_Intrinsics_vec128 + tmp10 = + Lib_IntVector_Intrinsics_vec128_and(l1, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c10 = Lib_IntVector_Intrinsics_vec128_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l2 = Lib_IntVector_Intrinsics_vec128_add64(f23, c10); + Lib_IntVector_Intrinsics_vec128 + tmp20 = + Lib_IntVector_Intrinsics_vec128_and(l2, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c20 = Lib_IntVector_Intrinsics_vec128_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l3 = Lib_IntVector_Intrinsics_vec128_add64(f33, c20); + Lib_IntVector_Intrinsics_vec128 + tmp30 = + Lib_IntVector_Intrinsics_vec128_and(l3, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c30 = Lib_IntVector_Intrinsics_vec128_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l4 = Lib_IntVector_Intrinsics_vec128_add64(f40, c30); + Lib_IntVector_Intrinsics_vec128 + tmp40 = + Lib_IntVector_Intrinsics_vec128_and(l4, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c40 = Lib_IntVector_Intrinsics_vec128_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + f010 = + Lib_IntVector_Intrinsics_vec128_add64(tmp00, + Lib_IntVector_Intrinsics_vec128_smul64(c40, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 f110 = tmp10; + Lib_IntVector_Intrinsics_vec128 f210 = tmp20; + Lib_IntVector_Intrinsics_vec128 f310 = tmp30; + Lib_IntVector_Intrinsics_vec128 f410 = tmp40; + Lib_IntVector_Intrinsics_vec128 + l = Lib_IntVector_Intrinsics_vec128_add64(f010, Lib_IntVector_Intrinsics_vec128_zero); + Lib_IntVector_Intrinsics_vec128 + tmp0 = + Lib_IntVector_Intrinsics_vec128_and(l, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c0 = Lib_IntVector_Intrinsics_vec128_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l5 = Lib_IntVector_Intrinsics_vec128_add64(f110, c0); + Lib_IntVector_Intrinsics_vec128 + tmp1 = + Lib_IntVector_Intrinsics_vec128_and(l5, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c1 = Lib_IntVector_Intrinsics_vec128_shift_right64(l5, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l6 = Lib_IntVector_Intrinsics_vec128_add64(f210, c1); + Lib_IntVector_Intrinsics_vec128 + tmp2 = + Lib_IntVector_Intrinsics_vec128_and(l6, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c2 = Lib_IntVector_Intrinsics_vec128_shift_right64(l6, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l7 = Lib_IntVector_Intrinsics_vec128_add64(f310, c2); + Lib_IntVector_Intrinsics_vec128 + tmp3 = + Lib_IntVector_Intrinsics_vec128_and(l7, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c3 = Lib_IntVector_Intrinsics_vec128_shift_right64(l7, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 l8 = Lib_IntVector_Intrinsics_vec128_add64(f410, c3); + Lib_IntVector_Intrinsics_vec128 + tmp4 = + Lib_IntVector_Intrinsics_vec128_and(l8, + Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec128 + c4 = Lib_IntVector_Intrinsics_vec128_shift_right64(l8, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec128 + f02 = + Lib_IntVector_Intrinsics_vec128_add64(tmp0, + Lib_IntVector_Intrinsics_vec128_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec128 f12 = tmp1; + Lib_IntVector_Intrinsics_vec128 f22 = tmp2; + Lib_IntVector_Intrinsics_vec128 f32 = tmp3; + Lib_IntVector_Intrinsics_vec128 f42 = tmp4; + Lib_IntVector_Intrinsics_vec128 + mh = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec128 + ml = Lib_IntVector_Intrinsics_vec128_load64((uint64_t)0x3fffffbU); + Lib_IntVector_Intrinsics_vec128 mask = Lib_IntVector_Intrinsics_vec128_eq64(f42, mh); + Lib_IntVector_Intrinsics_vec128 + mask1 = + Lib_IntVector_Intrinsics_vec128_and(mask, + Lib_IntVector_Intrinsics_vec128_eq64(f32, mh)); + Lib_IntVector_Intrinsics_vec128 + mask2 = + Lib_IntVector_Intrinsics_vec128_and(mask1, + Lib_IntVector_Intrinsics_vec128_eq64(f22, mh)); + Lib_IntVector_Intrinsics_vec128 + mask3 = + Lib_IntVector_Intrinsics_vec128_and(mask2, + Lib_IntVector_Intrinsics_vec128_eq64(f12, mh)); + Lib_IntVector_Intrinsics_vec128 + mask4 = + Lib_IntVector_Intrinsics_vec128_and(mask3, + Lib_IntVector_Intrinsics_vec128_lognot(Lib_IntVector_Intrinsics_vec128_gt64(ml, f02))); + Lib_IntVector_Intrinsics_vec128 ph = Lib_IntVector_Intrinsics_vec128_and(mask4, mh); + Lib_IntVector_Intrinsics_vec128 pl = Lib_IntVector_Intrinsics_vec128_and(mask4, ml); + Lib_IntVector_Intrinsics_vec128 o0 = Lib_IntVector_Intrinsics_vec128_sub64(f02, pl); + Lib_IntVector_Intrinsics_vec128 o1 = Lib_IntVector_Intrinsics_vec128_sub64(f12, ph); + Lib_IntVector_Intrinsics_vec128 o2 = Lib_IntVector_Intrinsics_vec128_sub64(f22, ph); + Lib_IntVector_Intrinsics_vec128 o3 = Lib_IntVector_Intrinsics_vec128_sub64(f32, ph); + Lib_IntVector_Intrinsics_vec128 o4 = Lib_IntVector_Intrinsics_vec128_sub64(f42, ph); + Lib_IntVector_Intrinsics_vec128 f011 = o0; + Lib_IntVector_Intrinsics_vec128 f111 = o1; + Lib_IntVector_Intrinsics_vec128 f211 = o2; + Lib_IntVector_Intrinsics_vec128 f311 = o3; + Lib_IntVector_Intrinsics_vec128 f411 = o4; + acc[0U] = f011; + acc[1U] = f111; + acc[2U] = f211; + acc[3U] = f311; + acc[4U] = f411; + Lib_IntVector_Intrinsics_vec128 f00 = acc[0U]; + Lib_IntVector_Intrinsics_vec128 f1 = acc[1U]; + Lib_IntVector_Intrinsics_vec128 f2 = acc[2U]; + Lib_IntVector_Intrinsics_vec128 f3 = acc[3U]; + Lib_IntVector_Intrinsics_vec128 f4 = acc[4U]; + uint64_t f01 = Lib_IntVector_Intrinsics_vec128_extract64(f00, (uint32_t)0U); + uint64_t f112 = Lib_IntVector_Intrinsics_vec128_extract64(f1, (uint32_t)0U); + uint64_t f212 = Lib_IntVector_Intrinsics_vec128_extract64(f2, (uint32_t)0U); + uint64_t f312 = Lib_IntVector_Intrinsics_vec128_extract64(f3, (uint32_t)0U); + uint64_t f41 = Lib_IntVector_Intrinsics_vec128_extract64(f4, (uint32_t)0U); + uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U; + uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U; + uint64_t f10 = lo; + uint64_t f11 = hi; + uint64_t u0 = load64_le(ks); + uint64_t lo0 = u0; + uint64_t u = load64_le(ks + (uint32_t)8U); + uint64_t hi0 = u; + uint64_t f20 = lo0; + uint64_t f21 = hi0; + uint64_t r0 = f10 + f20; + uint64_t r1 = f11 + f21; + uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U; + uint64_t r11 = r1 + c; + uint64_t f30 = r0; + uint64_t f31 = r11; + store64_le(tag, f30); + store64_le(tag + (uint32_t)8U, f31); +} + +void +Hacl_Poly1305_128_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key) +{ + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 ctx[25U] KRML_POST_ALIGN(16) = { 0U }; + Hacl_Poly1305_128_poly1305_init(ctx, key); + Hacl_Poly1305_128_poly1305_update(ctx, len, text); + Hacl_Poly1305_128_poly1305_finish(tag, key, ctx); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_128.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.h new file mode 100644 index 0000000000..03069fdb4e --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_128.h @@ -0,0 +1,63 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Poly1305_128_H +#define __Hacl_Poly1305_128_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" +#include "libintvector.h" +typedef Lib_IntVector_Intrinsics_vec128 *Hacl_Poly1305_128_poly1305_ctx; + +void Hacl_Poly1305_128_poly1305_init(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *key); + +void Hacl_Poly1305_128_poly1305_update1(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *text); + +void +Hacl_Poly1305_128_poly1305_update( + Lib_IntVector_Intrinsics_vec128 *ctx, + uint32_t len, + uint8_t *text); + +void +Hacl_Poly1305_128_poly1305_finish( + uint8_t *tag, + uint8_t *key, + Lib_IntVector_Intrinsics_vec128 *ctx); + +void Hacl_Poly1305_128_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Poly1305_128_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_256.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.c new file mode 100644 index 0000000000..c5a2a5908e --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.c @@ -0,0 +1,2087 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "internal/Hacl_Poly1305_256.h" + +void +Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b) +{ + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(b); + Lib_IntVector_Intrinsics_vec256 + hi = Lib_IntVector_Intrinsics_vec256_load64_le(b + (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi); + Lib_IntVector_Intrinsics_vec256 + m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi); + Lib_IntVector_Intrinsics_vec256 + m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U); + Lib_IntVector_Intrinsics_vec256 + m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U); + Lib_IntVector_Intrinsics_vec256 m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1); + Lib_IntVector_Intrinsics_vec256 t0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1); + Lib_IntVector_Intrinsics_vec256 t3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3); + Lib_IntVector_Intrinsics_vec256 + t2 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)4U); + Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t2, mask26); + Lib_IntVector_Intrinsics_vec256 + t1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t1, mask26); + Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); + Lib_IntVector_Intrinsics_vec256 + t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)30U); + Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask26); + Lib_IntVector_Intrinsics_vec256 + o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 o0 = o5; + Lib_IntVector_Intrinsics_vec256 o1 = o10; + Lib_IntVector_Intrinsics_vec256 o2 = o20; + Lib_IntVector_Intrinsics_vec256 o3 = o30; + Lib_IntVector_Intrinsics_vec256 o4 = o40; + e[0U] = o0; + e[1U] = o1; + e[2U] = o2; + e[3U] = o3; + e[4U] = o4; + uint64_t b1 = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b1); + Lib_IntVector_Intrinsics_vec256 f40 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f40, mask); + Lib_IntVector_Intrinsics_vec256 acc0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 acc1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 acc2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 acc3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 acc4 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 e0 = e[0U]; + Lib_IntVector_Intrinsics_vec256 e1 = e[1U]; + Lib_IntVector_Intrinsics_vec256 e2 = e[2U]; + Lib_IntVector_Intrinsics_vec256 e3 = e[3U]; + Lib_IntVector_Intrinsics_vec256 e4 = e[4U]; + Lib_IntVector_Intrinsics_vec256 r0 = Lib_IntVector_Intrinsics_vec256_zero; + Lib_IntVector_Intrinsics_vec256 r1 = Lib_IntVector_Intrinsics_vec256_zero; + Lib_IntVector_Intrinsics_vec256 r2 = Lib_IntVector_Intrinsics_vec256_zero; + Lib_IntVector_Intrinsics_vec256 r3 = Lib_IntVector_Intrinsics_vec256_zero; + Lib_IntVector_Intrinsics_vec256 r4 = Lib_IntVector_Intrinsics_vec256_zero; + Lib_IntVector_Intrinsics_vec256 + r01 = + Lib_IntVector_Intrinsics_vec256_insert64(r0, + Lib_IntVector_Intrinsics_vec256_extract64(acc0, (uint32_t)0U), + (uint32_t)0U); + Lib_IntVector_Intrinsics_vec256 + r11 = + Lib_IntVector_Intrinsics_vec256_insert64(r1, + Lib_IntVector_Intrinsics_vec256_extract64(acc1, (uint32_t)0U), + (uint32_t)0U); + Lib_IntVector_Intrinsics_vec256 + r21 = + Lib_IntVector_Intrinsics_vec256_insert64(r2, + Lib_IntVector_Intrinsics_vec256_extract64(acc2, (uint32_t)0U), + (uint32_t)0U); + Lib_IntVector_Intrinsics_vec256 + r31 = + Lib_IntVector_Intrinsics_vec256_insert64(r3, + Lib_IntVector_Intrinsics_vec256_extract64(acc3, (uint32_t)0U), + (uint32_t)0U); + Lib_IntVector_Intrinsics_vec256 + r41 = + Lib_IntVector_Intrinsics_vec256_insert64(r4, + Lib_IntVector_Intrinsics_vec256_extract64(acc4, (uint32_t)0U), + (uint32_t)0U); + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_add64(r01, e0); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_add64(r11, e1); + Lib_IntVector_Intrinsics_vec256 f2 = Lib_IntVector_Intrinsics_vec256_add64(r21, e2); + Lib_IntVector_Intrinsics_vec256 f3 = Lib_IntVector_Intrinsics_vec256_add64(r31, e3); + Lib_IntVector_Intrinsics_vec256 f4 = Lib_IntVector_Intrinsics_vec256_add64(r41, e4); + Lib_IntVector_Intrinsics_vec256 acc01 = f0; + Lib_IntVector_Intrinsics_vec256 acc11 = f1; + Lib_IntVector_Intrinsics_vec256 acc21 = f2; + Lib_IntVector_Intrinsics_vec256 acc31 = f3; + Lib_IntVector_Intrinsics_vec256 acc41 = f4; + acc[0U] = acc01; + acc[1U] = acc11; + acc[2U] = acc21; + acc[3U] = acc31; + acc[4U] = acc41; +} + +void +Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize( + Lib_IntVector_Intrinsics_vec256 *out, + Lib_IntVector_Intrinsics_vec256 *p) +{ + Lib_IntVector_Intrinsics_vec256 *r = p; + Lib_IntVector_Intrinsics_vec256 *r_5 = p + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *r4 = p + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec256 a0 = out[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = out[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = out[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = out[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = out[4U]; + Lib_IntVector_Intrinsics_vec256 r10 = r[0U]; + Lib_IntVector_Intrinsics_vec256 r11 = r[1U]; + Lib_IntVector_Intrinsics_vec256 r12 = r[2U]; + Lib_IntVector_Intrinsics_vec256 r13 = r[3U]; + Lib_IntVector_Intrinsics_vec256 r14 = r[4U]; + Lib_IntVector_Intrinsics_vec256 r151 = r_5[1U]; + Lib_IntVector_Intrinsics_vec256 r152 = r_5[2U]; + Lib_IntVector_Intrinsics_vec256 r153 = r_5[3U]; + Lib_IntVector_Intrinsics_vec256 r154 = r_5[4U]; + Lib_IntVector_Intrinsics_vec256 r40 = r4[0U]; + Lib_IntVector_Intrinsics_vec256 r41 = r4[1U]; + Lib_IntVector_Intrinsics_vec256 r42 = r4[2U]; + Lib_IntVector_Intrinsics_vec256 r43 = r4[3U]; + Lib_IntVector_Intrinsics_vec256 r44 = r4[4U]; + Lib_IntVector_Intrinsics_vec256 a010 = Lib_IntVector_Intrinsics_vec256_mul64(r10, r10); + Lib_IntVector_Intrinsics_vec256 a110 = Lib_IntVector_Intrinsics_vec256_mul64(r11, r10); + Lib_IntVector_Intrinsics_vec256 a210 = Lib_IntVector_Intrinsics_vec256_mul64(r12, r10); + Lib_IntVector_Intrinsics_vec256 a310 = Lib_IntVector_Intrinsics_vec256_mul64(r13, r10); + Lib_IntVector_Intrinsics_vec256 a410 = Lib_IntVector_Intrinsics_vec256_mul64(r14, r10); + Lib_IntVector_Intrinsics_vec256 + a020 = + Lib_IntVector_Intrinsics_vec256_add64(a010, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r11)); + Lib_IntVector_Intrinsics_vec256 + a120 = + Lib_IntVector_Intrinsics_vec256_add64(a110, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r11)); + Lib_IntVector_Intrinsics_vec256 + a220 = + Lib_IntVector_Intrinsics_vec256_add64(a210, + Lib_IntVector_Intrinsics_vec256_mul64(r11, r11)); + Lib_IntVector_Intrinsics_vec256 + a320 = + Lib_IntVector_Intrinsics_vec256_add64(a310, + Lib_IntVector_Intrinsics_vec256_mul64(r12, r11)); + Lib_IntVector_Intrinsics_vec256 + a420 = + Lib_IntVector_Intrinsics_vec256_add64(a410, + Lib_IntVector_Intrinsics_vec256_mul64(r13, r11)); + Lib_IntVector_Intrinsics_vec256 + a030 = + Lib_IntVector_Intrinsics_vec256_add64(a020, + Lib_IntVector_Intrinsics_vec256_mul64(r153, r12)); + Lib_IntVector_Intrinsics_vec256 + a130 = + Lib_IntVector_Intrinsics_vec256_add64(a120, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r12)); + Lib_IntVector_Intrinsics_vec256 + a230 = + Lib_IntVector_Intrinsics_vec256_add64(a220, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r12)); + Lib_IntVector_Intrinsics_vec256 + a330 = + Lib_IntVector_Intrinsics_vec256_add64(a320, + Lib_IntVector_Intrinsics_vec256_mul64(r11, r12)); + Lib_IntVector_Intrinsics_vec256 + a430 = + Lib_IntVector_Intrinsics_vec256_add64(a420, + Lib_IntVector_Intrinsics_vec256_mul64(r12, r12)); + Lib_IntVector_Intrinsics_vec256 + a040 = + Lib_IntVector_Intrinsics_vec256_add64(a030, + Lib_IntVector_Intrinsics_vec256_mul64(r152, r13)); + Lib_IntVector_Intrinsics_vec256 + a140 = + Lib_IntVector_Intrinsics_vec256_add64(a130, + Lib_IntVector_Intrinsics_vec256_mul64(r153, r13)); + Lib_IntVector_Intrinsics_vec256 + a240 = + Lib_IntVector_Intrinsics_vec256_add64(a230, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r13)); + Lib_IntVector_Intrinsics_vec256 + a340 = + Lib_IntVector_Intrinsics_vec256_add64(a330, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r13)); + Lib_IntVector_Intrinsics_vec256 + a440 = + Lib_IntVector_Intrinsics_vec256_add64(a430, + Lib_IntVector_Intrinsics_vec256_mul64(r11, r13)); + Lib_IntVector_Intrinsics_vec256 + a050 = + Lib_IntVector_Intrinsics_vec256_add64(a040, + Lib_IntVector_Intrinsics_vec256_mul64(r151, r14)); + Lib_IntVector_Intrinsics_vec256 + a150 = + Lib_IntVector_Intrinsics_vec256_add64(a140, + Lib_IntVector_Intrinsics_vec256_mul64(r152, r14)); + Lib_IntVector_Intrinsics_vec256 + a250 = + Lib_IntVector_Intrinsics_vec256_add64(a240, + Lib_IntVector_Intrinsics_vec256_mul64(r153, r14)); + Lib_IntVector_Intrinsics_vec256 + a350 = + Lib_IntVector_Intrinsics_vec256_add64(a340, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r14)); + Lib_IntVector_Intrinsics_vec256 + a450 = + Lib_IntVector_Intrinsics_vec256_add64(a440, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r14)); + Lib_IntVector_Intrinsics_vec256 t00 = a050; + Lib_IntVector_Intrinsics_vec256 t10 = a150; + Lib_IntVector_Intrinsics_vec256 t20 = a250; + Lib_IntVector_Intrinsics_vec256 t30 = a350; + Lib_IntVector_Intrinsics_vec256 t40 = a450; + Lib_IntVector_Intrinsics_vec256 + mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z00 = Lib_IntVector_Intrinsics_vec256_shift_right64(t00, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x00 = Lib_IntVector_Intrinsics_vec256_and(t00, mask260); + Lib_IntVector_Intrinsics_vec256 x30 = Lib_IntVector_Intrinsics_vec256_and(t30, mask260); + Lib_IntVector_Intrinsics_vec256 x10 = Lib_IntVector_Intrinsics_vec256_add64(t10, z00); + Lib_IntVector_Intrinsics_vec256 x40 = Lib_IntVector_Intrinsics_vec256_add64(t40, z10); + Lib_IntVector_Intrinsics_vec256 + z010 = Lib_IntVector_Intrinsics_vec256_shift_right64(x10, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z110 = Lib_IntVector_Intrinsics_vec256_shift_right64(x40, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t5 = Lib_IntVector_Intrinsics_vec256_shift_left64(z110, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z110, t5); + Lib_IntVector_Intrinsics_vec256 x110 = Lib_IntVector_Intrinsics_vec256_and(x10, mask260); + Lib_IntVector_Intrinsics_vec256 x410 = Lib_IntVector_Intrinsics_vec256_and(x40, mask260); + Lib_IntVector_Intrinsics_vec256 x20 = Lib_IntVector_Intrinsics_vec256_add64(t20, z010); + Lib_IntVector_Intrinsics_vec256 x010 = Lib_IntVector_Intrinsics_vec256_add64(x00, z12); + Lib_IntVector_Intrinsics_vec256 + z020 = Lib_IntVector_Intrinsics_vec256_shift_right64(x20, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z130 = Lib_IntVector_Intrinsics_vec256_shift_right64(x010, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x210 = Lib_IntVector_Intrinsics_vec256_and(x20, mask260); + Lib_IntVector_Intrinsics_vec256 x020 = Lib_IntVector_Intrinsics_vec256_and(x010, mask260); + Lib_IntVector_Intrinsics_vec256 x310 = Lib_IntVector_Intrinsics_vec256_add64(x30, z020); + Lib_IntVector_Intrinsics_vec256 x120 = Lib_IntVector_Intrinsics_vec256_add64(x110, z130); + Lib_IntVector_Intrinsics_vec256 + z030 = Lib_IntVector_Intrinsics_vec256_shift_right64(x310, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x320 = Lib_IntVector_Intrinsics_vec256_and(x310, mask260); + Lib_IntVector_Intrinsics_vec256 x420 = Lib_IntVector_Intrinsics_vec256_add64(x410, z030); + Lib_IntVector_Intrinsics_vec256 r20 = x020; + Lib_IntVector_Intrinsics_vec256 r21 = x120; + Lib_IntVector_Intrinsics_vec256 r22 = x210; + Lib_IntVector_Intrinsics_vec256 r23 = x320; + Lib_IntVector_Intrinsics_vec256 r24 = x420; + Lib_IntVector_Intrinsics_vec256 a011 = Lib_IntVector_Intrinsics_vec256_mul64(r10, r20); + Lib_IntVector_Intrinsics_vec256 a111 = Lib_IntVector_Intrinsics_vec256_mul64(r11, r20); + Lib_IntVector_Intrinsics_vec256 a211 = Lib_IntVector_Intrinsics_vec256_mul64(r12, r20); + Lib_IntVector_Intrinsics_vec256 a311 = Lib_IntVector_Intrinsics_vec256_mul64(r13, r20); + Lib_IntVector_Intrinsics_vec256 a411 = Lib_IntVector_Intrinsics_vec256_mul64(r14, r20); + Lib_IntVector_Intrinsics_vec256 + a021 = + Lib_IntVector_Intrinsics_vec256_add64(a011, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r21)); + Lib_IntVector_Intrinsics_vec256 + a121 = + Lib_IntVector_Intrinsics_vec256_add64(a111, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r21)); + Lib_IntVector_Intrinsics_vec256 + a221 = + Lib_IntVector_Intrinsics_vec256_add64(a211, + Lib_IntVector_Intrinsics_vec256_mul64(r11, r21)); + Lib_IntVector_Intrinsics_vec256 + a321 = + Lib_IntVector_Intrinsics_vec256_add64(a311, + Lib_IntVector_Intrinsics_vec256_mul64(r12, r21)); + Lib_IntVector_Intrinsics_vec256 + a421 = + Lib_IntVector_Intrinsics_vec256_add64(a411, + Lib_IntVector_Intrinsics_vec256_mul64(r13, r21)); + Lib_IntVector_Intrinsics_vec256 + a031 = + Lib_IntVector_Intrinsics_vec256_add64(a021, + Lib_IntVector_Intrinsics_vec256_mul64(r153, r22)); + Lib_IntVector_Intrinsics_vec256 + a131 = + Lib_IntVector_Intrinsics_vec256_add64(a121, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r22)); + Lib_IntVector_Intrinsics_vec256 + a231 = + Lib_IntVector_Intrinsics_vec256_add64(a221, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r22)); + Lib_IntVector_Intrinsics_vec256 + a331 = + Lib_IntVector_Intrinsics_vec256_add64(a321, + Lib_IntVector_Intrinsics_vec256_mul64(r11, r22)); + Lib_IntVector_Intrinsics_vec256 + a431 = + Lib_IntVector_Intrinsics_vec256_add64(a421, + Lib_IntVector_Intrinsics_vec256_mul64(r12, r22)); + Lib_IntVector_Intrinsics_vec256 + a041 = + Lib_IntVector_Intrinsics_vec256_add64(a031, + Lib_IntVector_Intrinsics_vec256_mul64(r152, r23)); + Lib_IntVector_Intrinsics_vec256 + a141 = + Lib_IntVector_Intrinsics_vec256_add64(a131, + Lib_IntVector_Intrinsics_vec256_mul64(r153, r23)); + Lib_IntVector_Intrinsics_vec256 + a241 = + Lib_IntVector_Intrinsics_vec256_add64(a231, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r23)); + Lib_IntVector_Intrinsics_vec256 + a341 = + Lib_IntVector_Intrinsics_vec256_add64(a331, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r23)); + Lib_IntVector_Intrinsics_vec256 + a441 = + Lib_IntVector_Intrinsics_vec256_add64(a431, + Lib_IntVector_Intrinsics_vec256_mul64(r11, r23)); + Lib_IntVector_Intrinsics_vec256 + a051 = + Lib_IntVector_Intrinsics_vec256_add64(a041, + Lib_IntVector_Intrinsics_vec256_mul64(r151, r24)); + Lib_IntVector_Intrinsics_vec256 + a151 = + Lib_IntVector_Intrinsics_vec256_add64(a141, + Lib_IntVector_Intrinsics_vec256_mul64(r152, r24)); + Lib_IntVector_Intrinsics_vec256 + a251 = + Lib_IntVector_Intrinsics_vec256_add64(a241, + Lib_IntVector_Intrinsics_vec256_mul64(r153, r24)); + Lib_IntVector_Intrinsics_vec256 + a351 = + Lib_IntVector_Intrinsics_vec256_add64(a341, + Lib_IntVector_Intrinsics_vec256_mul64(r154, r24)); + Lib_IntVector_Intrinsics_vec256 + a451 = + Lib_IntVector_Intrinsics_vec256_add64(a441, + Lib_IntVector_Intrinsics_vec256_mul64(r10, r24)); + Lib_IntVector_Intrinsics_vec256 t01 = a051; + Lib_IntVector_Intrinsics_vec256 t11 = a151; + Lib_IntVector_Intrinsics_vec256 t21 = a251; + Lib_IntVector_Intrinsics_vec256 t31 = a351; + Lib_IntVector_Intrinsics_vec256 t41 = a451; + Lib_IntVector_Intrinsics_vec256 + mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z04 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z14 = Lib_IntVector_Intrinsics_vec256_shift_right64(t31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x03 = Lib_IntVector_Intrinsics_vec256_and(t01, mask261); + Lib_IntVector_Intrinsics_vec256 x33 = Lib_IntVector_Intrinsics_vec256_and(t31, mask261); + Lib_IntVector_Intrinsics_vec256 x13 = Lib_IntVector_Intrinsics_vec256_add64(t11, z04); + Lib_IntVector_Intrinsics_vec256 x43 = Lib_IntVector_Intrinsics_vec256_add64(t41, z14); + Lib_IntVector_Intrinsics_vec256 + z011 = Lib_IntVector_Intrinsics_vec256_shift_right64(x13, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z111 = Lib_IntVector_Intrinsics_vec256_shift_right64(x43, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t6 = Lib_IntVector_Intrinsics_vec256_shift_left64(z111, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z120 = Lib_IntVector_Intrinsics_vec256_add64(z111, t6); + Lib_IntVector_Intrinsics_vec256 x111 = Lib_IntVector_Intrinsics_vec256_and(x13, mask261); + Lib_IntVector_Intrinsics_vec256 x411 = Lib_IntVector_Intrinsics_vec256_and(x43, mask261); + Lib_IntVector_Intrinsics_vec256 x22 = Lib_IntVector_Intrinsics_vec256_add64(t21, z011); + Lib_IntVector_Intrinsics_vec256 x011 = Lib_IntVector_Intrinsics_vec256_add64(x03, z120); + Lib_IntVector_Intrinsics_vec256 + z021 = Lib_IntVector_Intrinsics_vec256_shift_right64(x22, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z131 = Lib_IntVector_Intrinsics_vec256_shift_right64(x011, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x211 = Lib_IntVector_Intrinsics_vec256_and(x22, mask261); + Lib_IntVector_Intrinsics_vec256 x021 = Lib_IntVector_Intrinsics_vec256_and(x011, mask261); + Lib_IntVector_Intrinsics_vec256 x311 = Lib_IntVector_Intrinsics_vec256_add64(x33, z021); + Lib_IntVector_Intrinsics_vec256 x121 = Lib_IntVector_Intrinsics_vec256_add64(x111, z131); + Lib_IntVector_Intrinsics_vec256 + z031 = Lib_IntVector_Intrinsics_vec256_shift_right64(x311, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x321 = Lib_IntVector_Intrinsics_vec256_and(x311, mask261); + Lib_IntVector_Intrinsics_vec256 x421 = Lib_IntVector_Intrinsics_vec256_add64(x411, z031); + Lib_IntVector_Intrinsics_vec256 r30 = x021; + Lib_IntVector_Intrinsics_vec256 r31 = x121; + Lib_IntVector_Intrinsics_vec256 r32 = x211; + Lib_IntVector_Intrinsics_vec256 r33 = x321; + Lib_IntVector_Intrinsics_vec256 r34 = x421; + Lib_IntVector_Intrinsics_vec256 + v12120 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r20, r10); + Lib_IntVector_Intrinsics_vec256 + v34340 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r40, r30); + Lib_IntVector_Intrinsics_vec256 + r12340 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34340, v12120); + Lib_IntVector_Intrinsics_vec256 + v12121 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r21, r11); + Lib_IntVector_Intrinsics_vec256 + v34341 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r41, r31); + Lib_IntVector_Intrinsics_vec256 + r12341 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34341, v12121); + Lib_IntVector_Intrinsics_vec256 + v12122 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r22, r12); + Lib_IntVector_Intrinsics_vec256 + v34342 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r42, r32); + Lib_IntVector_Intrinsics_vec256 + r12342 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34342, v12122); + Lib_IntVector_Intrinsics_vec256 + v12123 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r23, r13); + Lib_IntVector_Intrinsics_vec256 + v34343 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r43, r33); + Lib_IntVector_Intrinsics_vec256 + r12343 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34343, v12123); + Lib_IntVector_Intrinsics_vec256 + v12124 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r24, r14); + Lib_IntVector_Intrinsics_vec256 + v34344 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r44, r34); + Lib_IntVector_Intrinsics_vec256 + r12344 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34344, v12124); + Lib_IntVector_Intrinsics_vec256 + r123451 = Lib_IntVector_Intrinsics_vec256_smul64(r12341, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec256 + r123452 = Lib_IntVector_Intrinsics_vec256_smul64(r12342, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec256 + r123453 = Lib_IntVector_Intrinsics_vec256_smul64(r12343, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec256 + r123454 = Lib_IntVector_Intrinsics_vec256_smul64(r12344, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_mul64(r12340, a0); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_mul64(r12341, a0); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_mul64(r12342, a0); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_mul64(r12343, a0); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_mul64(r12344, a0); + Lib_IntVector_Intrinsics_vec256 + a02 = + Lib_IntVector_Intrinsics_vec256_add64(a01, + Lib_IntVector_Intrinsics_vec256_mul64(r123454, a1)); + Lib_IntVector_Intrinsics_vec256 + a12 = + Lib_IntVector_Intrinsics_vec256_add64(a11, + Lib_IntVector_Intrinsics_vec256_mul64(r12340, a1)); + Lib_IntVector_Intrinsics_vec256 + a22 = + Lib_IntVector_Intrinsics_vec256_add64(a21, + Lib_IntVector_Intrinsics_vec256_mul64(r12341, a1)); + Lib_IntVector_Intrinsics_vec256 + a32 = + Lib_IntVector_Intrinsics_vec256_add64(a31, + Lib_IntVector_Intrinsics_vec256_mul64(r12342, a1)); + Lib_IntVector_Intrinsics_vec256 + a42 = + Lib_IntVector_Intrinsics_vec256_add64(a41, + Lib_IntVector_Intrinsics_vec256_mul64(r12343, a1)); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r123453, a2)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r123454, a2)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r12340, a2)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r12341, a2)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r12342, a2)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r123452, a3)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r123453, a3)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r123454, a3)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r12340, a3)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r12341, a3)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r123451, a4)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r123452, a4)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r123453, a4)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r123454, a4)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r12340, a4)); + Lib_IntVector_Intrinsics_vec256 t0 = a05; + Lib_IntVector_Intrinsics_vec256 t1 = a15; + Lib_IntVector_Intrinsics_vec256 t2 = a25; + Lib_IntVector_Intrinsics_vec256 t3 = a35; + Lib_IntVector_Intrinsics_vec256 t4 = a45; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z121 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z121); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o10 = x12; + Lib_IntVector_Intrinsics_vec256 o20 = x21; + Lib_IntVector_Intrinsics_vec256 o30 = x32; + Lib_IntVector_Intrinsics_vec256 o40 = x42; + Lib_IntVector_Intrinsics_vec256 + v00 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o0, o0); + Lib_IntVector_Intrinsics_vec256 v10 = Lib_IntVector_Intrinsics_vec256_add64(o0, v00); + Lib_IntVector_Intrinsics_vec256 + v10h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v10, v10); + Lib_IntVector_Intrinsics_vec256 v20 = Lib_IntVector_Intrinsics_vec256_add64(v10, v10h); + Lib_IntVector_Intrinsics_vec256 + v01 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o10, o10); + Lib_IntVector_Intrinsics_vec256 v11 = Lib_IntVector_Intrinsics_vec256_add64(o10, v01); + Lib_IntVector_Intrinsics_vec256 + v11h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v11, v11); + Lib_IntVector_Intrinsics_vec256 v21 = Lib_IntVector_Intrinsics_vec256_add64(v11, v11h); + Lib_IntVector_Intrinsics_vec256 + v02 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o20, o20); + Lib_IntVector_Intrinsics_vec256 v12 = Lib_IntVector_Intrinsics_vec256_add64(o20, v02); + Lib_IntVector_Intrinsics_vec256 + v12h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v12, v12); + Lib_IntVector_Intrinsics_vec256 v22 = Lib_IntVector_Intrinsics_vec256_add64(v12, v12h); + Lib_IntVector_Intrinsics_vec256 + v03 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o30, o30); + Lib_IntVector_Intrinsics_vec256 v13 = Lib_IntVector_Intrinsics_vec256_add64(o30, v03); + Lib_IntVector_Intrinsics_vec256 + v13h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v13, v13); + Lib_IntVector_Intrinsics_vec256 v23 = Lib_IntVector_Intrinsics_vec256_add64(v13, v13h); + Lib_IntVector_Intrinsics_vec256 + v04 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o40, o40); + Lib_IntVector_Intrinsics_vec256 v14 = Lib_IntVector_Intrinsics_vec256_add64(o40, v04); + Lib_IntVector_Intrinsics_vec256 + v14h = Lib_IntVector_Intrinsics_vec256_interleave_high64(v14, v14); + Lib_IntVector_Intrinsics_vec256 v24 = Lib_IntVector_Intrinsics_vec256_add64(v14, v14h); + Lib_IntVector_Intrinsics_vec256 + l = Lib_IntVector_Intrinsics_vec256_add64(v20, Lib_IntVector_Intrinsics_vec256_zero); + Lib_IntVector_Intrinsics_vec256 + tmp0 = + Lib_IntVector_Intrinsics_vec256_and(l, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c0 = Lib_IntVector_Intrinsics_vec256_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l0 = Lib_IntVector_Intrinsics_vec256_add64(v21, c0); + Lib_IntVector_Intrinsics_vec256 + tmp1 = + Lib_IntVector_Intrinsics_vec256_and(l0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c1 = Lib_IntVector_Intrinsics_vec256_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l1 = Lib_IntVector_Intrinsics_vec256_add64(v22, c1); + Lib_IntVector_Intrinsics_vec256 + tmp2 = + Lib_IntVector_Intrinsics_vec256_and(l1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c2 = Lib_IntVector_Intrinsics_vec256_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l2 = Lib_IntVector_Intrinsics_vec256_add64(v23, c2); + Lib_IntVector_Intrinsics_vec256 + tmp3 = + Lib_IntVector_Intrinsics_vec256_and(l2, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c3 = Lib_IntVector_Intrinsics_vec256_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l3 = Lib_IntVector_Intrinsics_vec256_add64(v24, c3); + Lib_IntVector_Intrinsics_vec256 + tmp4 = + Lib_IntVector_Intrinsics_vec256_and(l3, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c4 = Lib_IntVector_Intrinsics_vec256_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + o00 = + Lib_IntVector_Intrinsics_vec256_add64(tmp0, + Lib_IntVector_Intrinsics_vec256_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec256 o1 = tmp1; + Lib_IntVector_Intrinsics_vec256 o2 = tmp2; + Lib_IntVector_Intrinsics_vec256 o3 = tmp3; + Lib_IntVector_Intrinsics_vec256 o4 = tmp4; + out[0U] = o00; + out[1U] = o1; + out[2U] = o2; + out[3U] = o3; + out[4U] = o4; +} + +void +Hacl_Poly1305_256_poly1305_init(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *key) +{ + Lib_IntVector_Intrinsics_vec256 *acc = ctx; + Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; + uint8_t *kr = key; + acc[0U] = Lib_IntVector_Intrinsics_vec256_zero; + acc[1U] = Lib_IntVector_Intrinsics_vec256_zero; + acc[2U] = Lib_IntVector_Intrinsics_vec256_zero; + acc[3U] = Lib_IntVector_Intrinsics_vec256_zero; + acc[4U] = Lib_IntVector_Intrinsics_vec256_zero; + uint64_t u0 = load64_le(kr); + uint64_t lo = u0; + uint64_t u = load64_le(kr + (uint32_t)8U); + uint64_t hi = u; + uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU; + uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU; + uint64_t lo1 = lo & mask0; + uint64_t hi1 = hi & mask1; + Lib_IntVector_Intrinsics_vec256 *r = pre; + Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *rn = pre + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec256 *rn_5 = pre + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec256 r_vec0 = Lib_IntVector_Intrinsics_vec256_load64(lo1); + Lib_IntVector_Intrinsics_vec256 r_vec1 = Lib_IntVector_Intrinsics_vec256_load64(hi1); + Lib_IntVector_Intrinsics_vec256 + f00 = + Lib_IntVector_Intrinsics_vec256_and(r_vec0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f15 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(r_vec1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f0 = f00; + Lib_IntVector_Intrinsics_vec256 f1 = f15; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f4 = f40; + r[0U] = f0; + r[1U] = f1; + r[2U] = f2; + r[3U] = f3; + r[4U] = f4; + Lib_IntVector_Intrinsics_vec256 f200 = r[0U]; + Lib_IntVector_Intrinsics_vec256 f210 = r[1U]; + Lib_IntVector_Intrinsics_vec256 f220 = r[2U]; + Lib_IntVector_Intrinsics_vec256 f230 = r[3U]; + Lib_IntVector_Intrinsics_vec256 f240 = r[4U]; + r5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f200, (uint64_t)5U); + r5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f210, (uint64_t)5U); + r5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f220, (uint64_t)5U); + r5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f230, (uint64_t)5U); + r5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f240, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec256 r10 = r[1U]; + Lib_IntVector_Intrinsics_vec256 r20 = r[2U]; + Lib_IntVector_Intrinsics_vec256 r30 = r[3U]; + Lib_IntVector_Intrinsics_vec256 r40 = r[4U]; + Lib_IntVector_Intrinsics_vec256 r510 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r520 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r530 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r540 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f100 = r[0U]; + Lib_IntVector_Intrinsics_vec256 f110 = r[1U]; + Lib_IntVector_Intrinsics_vec256 f120 = r[2U]; + Lib_IntVector_Intrinsics_vec256 f130 = r[3U]; + Lib_IntVector_Intrinsics_vec256 f140 = r[4U]; + Lib_IntVector_Intrinsics_vec256 a00 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f100); + Lib_IntVector_Intrinsics_vec256 a10 = Lib_IntVector_Intrinsics_vec256_mul64(r10, f100); + Lib_IntVector_Intrinsics_vec256 a20 = Lib_IntVector_Intrinsics_vec256_mul64(r20, f100); + Lib_IntVector_Intrinsics_vec256 a30 = Lib_IntVector_Intrinsics_vec256_mul64(r30, f100); + Lib_IntVector_Intrinsics_vec256 a40 = Lib_IntVector_Intrinsics_vec256_mul64(r40, f100); + Lib_IntVector_Intrinsics_vec256 + a010 = + Lib_IntVector_Intrinsics_vec256_add64(a00, + Lib_IntVector_Intrinsics_vec256_mul64(r540, f110)); + Lib_IntVector_Intrinsics_vec256 + a110 = + Lib_IntVector_Intrinsics_vec256_add64(a10, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f110)); + Lib_IntVector_Intrinsics_vec256 + a210 = + Lib_IntVector_Intrinsics_vec256_add64(a20, + Lib_IntVector_Intrinsics_vec256_mul64(r10, f110)); + Lib_IntVector_Intrinsics_vec256 + a310 = + Lib_IntVector_Intrinsics_vec256_add64(a30, + Lib_IntVector_Intrinsics_vec256_mul64(r20, f110)); + Lib_IntVector_Intrinsics_vec256 + a410 = + Lib_IntVector_Intrinsics_vec256_add64(a40, + Lib_IntVector_Intrinsics_vec256_mul64(r30, f110)); + Lib_IntVector_Intrinsics_vec256 + a020 = + Lib_IntVector_Intrinsics_vec256_add64(a010, + Lib_IntVector_Intrinsics_vec256_mul64(r530, f120)); + Lib_IntVector_Intrinsics_vec256 + a120 = + Lib_IntVector_Intrinsics_vec256_add64(a110, + Lib_IntVector_Intrinsics_vec256_mul64(r540, f120)); + Lib_IntVector_Intrinsics_vec256 + a220 = + Lib_IntVector_Intrinsics_vec256_add64(a210, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f120)); + Lib_IntVector_Intrinsics_vec256 + a320 = + Lib_IntVector_Intrinsics_vec256_add64(a310, + Lib_IntVector_Intrinsics_vec256_mul64(r10, f120)); + Lib_IntVector_Intrinsics_vec256 + a420 = + Lib_IntVector_Intrinsics_vec256_add64(a410, + Lib_IntVector_Intrinsics_vec256_mul64(r20, f120)); + Lib_IntVector_Intrinsics_vec256 + a030 = + Lib_IntVector_Intrinsics_vec256_add64(a020, + Lib_IntVector_Intrinsics_vec256_mul64(r520, f130)); + Lib_IntVector_Intrinsics_vec256 + a130 = + Lib_IntVector_Intrinsics_vec256_add64(a120, + Lib_IntVector_Intrinsics_vec256_mul64(r530, f130)); + Lib_IntVector_Intrinsics_vec256 + a230 = + Lib_IntVector_Intrinsics_vec256_add64(a220, + Lib_IntVector_Intrinsics_vec256_mul64(r540, f130)); + Lib_IntVector_Intrinsics_vec256 + a330 = + Lib_IntVector_Intrinsics_vec256_add64(a320, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f130)); + Lib_IntVector_Intrinsics_vec256 + a430 = + Lib_IntVector_Intrinsics_vec256_add64(a420, + Lib_IntVector_Intrinsics_vec256_mul64(r10, f130)); + Lib_IntVector_Intrinsics_vec256 + a040 = + Lib_IntVector_Intrinsics_vec256_add64(a030, + Lib_IntVector_Intrinsics_vec256_mul64(r510, f140)); + Lib_IntVector_Intrinsics_vec256 + a140 = + Lib_IntVector_Intrinsics_vec256_add64(a130, + Lib_IntVector_Intrinsics_vec256_mul64(r520, f140)); + Lib_IntVector_Intrinsics_vec256 + a240 = + Lib_IntVector_Intrinsics_vec256_add64(a230, + Lib_IntVector_Intrinsics_vec256_mul64(r530, f140)); + Lib_IntVector_Intrinsics_vec256 + a340 = + Lib_IntVector_Intrinsics_vec256_add64(a330, + Lib_IntVector_Intrinsics_vec256_mul64(r540, f140)); + Lib_IntVector_Intrinsics_vec256 + a440 = + Lib_IntVector_Intrinsics_vec256_add64(a430, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f140)); + Lib_IntVector_Intrinsics_vec256 t00 = a040; + Lib_IntVector_Intrinsics_vec256 t10 = a140; + Lib_IntVector_Intrinsics_vec256 t20 = a240; + Lib_IntVector_Intrinsics_vec256 t30 = a340; + Lib_IntVector_Intrinsics_vec256 t40 = a440; + Lib_IntVector_Intrinsics_vec256 + mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z00 = Lib_IntVector_Intrinsics_vec256_shift_right64(t00, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x00 = Lib_IntVector_Intrinsics_vec256_and(t00, mask260); + Lib_IntVector_Intrinsics_vec256 x30 = Lib_IntVector_Intrinsics_vec256_and(t30, mask260); + Lib_IntVector_Intrinsics_vec256 x10 = Lib_IntVector_Intrinsics_vec256_add64(t10, z00); + Lib_IntVector_Intrinsics_vec256 x40 = Lib_IntVector_Intrinsics_vec256_add64(t40, z10); + Lib_IntVector_Intrinsics_vec256 + z010 = Lib_IntVector_Intrinsics_vec256_shift_right64(x10, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z110 = Lib_IntVector_Intrinsics_vec256_shift_right64(x40, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t5 = Lib_IntVector_Intrinsics_vec256_shift_left64(z110, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z110, t5); + Lib_IntVector_Intrinsics_vec256 x110 = Lib_IntVector_Intrinsics_vec256_and(x10, mask260); + Lib_IntVector_Intrinsics_vec256 x410 = Lib_IntVector_Intrinsics_vec256_and(x40, mask260); + Lib_IntVector_Intrinsics_vec256 x20 = Lib_IntVector_Intrinsics_vec256_add64(t20, z010); + Lib_IntVector_Intrinsics_vec256 x010 = Lib_IntVector_Intrinsics_vec256_add64(x00, z12); + Lib_IntVector_Intrinsics_vec256 + z020 = Lib_IntVector_Intrinsics_vec256_shift_right64(x20, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z130 = Lib_IntVector_Intrinsics_vec256_shift_right64(x010, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x210 = Lib_IntVector_Intrinsics_vec256_and(x20, mask260); + Lib_IntVector_Intrinsics_vec256 x020 = Lib_IntVector_Intrinsics_vec256_and(x010, mask260); + Lib_IntVector_Intrinsics_vec256 x310 = Lib_IntVector_Intrinsics_vec256_add64(x30, z020); + Lib_IntVector_Intrinsics_vec256 x120 = Lib_IntVector_Intrinsics_vec256_add64(x110, z130); + Lib_IntVector_Intrinsics_vec256 + z030 = Lib_IntVector_Intrinsics_vec256_shift_right64(x310, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x320 = Lib_IntVector_Intrinsics_vec256_and(x310, mask260); + Lib_IntVector_Intrinsics_vec256 x420 = Lib_IntVector_Intrinsics_vec256_add64(x410, z030); + Lib_IntVector_Intrinsics_vec256 o00 = x020; + Lib_IntVector_Intrinsics_vec256 o10 = x120; + Lib_IntVector_Intrinsics_vec256 o20 = x210; + Lib_IntVector_Intrinsics_vec256 o30 = x320; + Lib_IntVector_Intrinsics_vec256 o40 = x420; + rn[0U] = o00; + rn[1U] = o10; + rn[2U] = o20; + rn[3U] = o30; + rn[4U] = o40; + Lib_IntVector_Intrinsics_vec256 f201 = rn[0U]; + Lib_IntVector_Intrinsics_vec256 f211 = rn[1U]; + Lib_IntVector_Intrinsics_vec256 f221 = rn[2U]; + Lib_IntVector_Intrinsics_vec256 f231 = rn[3U]; + Lib_IntVector_Intrinsics_vec256 f241 = rn[4U]; + rn_5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f201, (uint64_t)5U); + rn_5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f211, (uint64_t)5U); + rn_5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f221, (uint64_t)5U); + rn_5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f231, (uint64_t)5U); + rn_5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f241, (uint64_t)5U); + Lib_IntVector_Intrinsics_vec256 r00 = rn[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = rn[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = rn[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = rn[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = rn[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = rn_5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = rn_5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = rn_5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = rn_5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = rn[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = rn[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = rn[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = rn[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = rn[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r00, f10); + Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec256 + a01 = + Lib_IntVector_Intrinsics_vec256_add64(a0, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f11)); + Lib_IntVector_Intrinsics_vec256 + a11 = + Lib_IntVector_Intrinsics_vec256_add64(a1, + Lib_IntVector_Intrinsics_vec256_mul64(r00, f11)); + Lib_IntVector_Intrinsics_vec256 + a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, Lib_IntVector_Intrinsics_vec256_mul64(r1, f11)); + Lib_IntVector_Intrinsics_vec256 + a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, Lib_IntVector_Intrinsics_vec256_mul64(r2, f11)); + Lib_IntVector_Intrinsics_vec256 + a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, Lib_IntVector_Intrinsics_vec256_mul64(r3, f11)); + Lib_IntVector_Intrinsics_vec256 + a02 = + Lib_IntVector_Intrinsics_vec256_add64(a01, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f12)); + Lib_IntVector_Intrinsics_vec256 + a12 = + Lib_IntVector_Intrinsics_vec256_add64(a11, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f12)); + Lib_IntVector_Intrinsics_vec256 + a22 = + Lib_IntVector_Intrinsics_vec256_add64(a21, + Lib_IntVector_Intrinsics_vec256_mul64(r00, f12)); + Lib_IntVector_Intrinsics_vec256 + a32 = + Lib_IntVector_Intrinsics_vec256_add64(a31, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f12)); + Lib_IntVector_Intrinsics_vec256 + a42 = + Lib_IntVector_Intrinsics_vec256_add64(a41, + Lib_IntVector_Intrinsics_vec256_mul64(r2, f12)); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r52, f13)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f13)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f13)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r00, f13)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f13)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r51, f14)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r52, f14)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f14)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f14)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r00, f14)); + Lib_IntVector_Intrinsics_vec256 t0 = a04; + Lib_IntVector_Intrinsics_vec256 t1 = a14; + Lib_IntVector_Intrinsics_vec256 t2 = a24; + Lib_IntVector_Intrinsics_vec256 t3 = a34; + Lib_IntVector_Intrinsics_vec256 t4 = a44; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z120 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z120); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + rn[0U] = o0; + rn[1U] = o1; + rn[2U] = o2; + rn[3U] = o3; + rn[4U] = o4; + Lib_IntVector_Intrinsics_vec256 f202 = rn[0U]; + Lib_IntVector_Intrinsics_vec256 f21 = rn[1U]; + Lib_IntVector_Intrinsics_vec256 f22 = rn[2U]; + Lib_IntVector_Intrinsics_vec256 f23 = rn[3U]; + Lib_IntVector_Intrinsics_vec256 f24 = rn[4U]; + rn_5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f202, (uint64_t)5U); + rn_5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f21, (uint64_t)5U); + rn_5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f22, (uint64_t)5U); + rn_5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f23, (uint64_t)5U); + rn_5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f24, (uint64_t)5U); +} + +void +Hacl_Poly1305_256_poly1305_update1(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *text) +{ + Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *acc = ctx; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint64_t u0 = load64_le(text); + uint64_t lo = u0; + uint64_t u = load64_le(text + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *r = pre; + Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t0 = a06; + Lib_IntVector_Intrinsics_vec256 t1 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; +} + +void +Hacl_Poly1305_256_poly1305_update( + Lib_IntVector_Intrinsics_vec256 *ctx, + uint32_t len, + uint8_t *text) +{ + Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 *acc = ctx; + uint32_t sz_block = (uint32_t)64U; + uint32_t len0 = len / sz_block * sz_block; + uint8_t *t0 = text; + if (len0 > (uint32_t)0U) { + uint32_t bs = (uint32_t)64U; + uint8_t *text0 = t0; + Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc, text0); + uint32_t len1 = len0 - bs; + uint8_t *text1 = t0 + bs; + uint32_t nb = len1 / bs; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = text1 + i * bs; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block); + Lib_IntVector_Intrinsics_vec256 + hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 + mask260 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi); + Lib_IntVector_Intrinsics_vec256 + m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi); + Lib_IntVector_Intrinsics_vec256 + m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U); + Lib_IntVector_Intrinsics_vec256 + m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U); + Lib_IntVector_Intrinsics_vec256 + m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1); + Lib_IntVector_Intrinsics_vec256 + t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1); + Lib_IntVector_Intrinsics_vec256 + t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3); + Lib_IntVector_Intrinsics_vec256 + t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U); + Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask260); + Lib_IntVector_Intrinsics_vec256 + t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask260); + Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask260); + Lib_IntVector_Intrinsics_vec256 + t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U); + Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask260); + Lib_IntVector_Intrinsics_vec256 + o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 o00 = o5; + Lib_IntVector_Intrinsics_vec256 o11 = o10; + Lib_IntVector_Intrinsics_vec256 o21 = o20; + Lib_IntVector_Intrinsics_vec256 o31 = o30; + Lib_IntVector_Intrinsics_vec256 o41 = o40; + e[0U] = o00; + e[1U] = o11; + e[2U] = o21; + e[3U] = o31; + e[4U] = o41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *rn = pre + (uint32_t)10U; + Lib_IntVector_Intrinsics_vec256 *rn5 = pre + (uint32_t)15U; + Lib_IntVector_Intrinsics_vec256 r0 = rn[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = rn[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = rn[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = rn[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = rn[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 f110 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 f120 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 f130 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 f140 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10); + Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10); + Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10); + Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10); + Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10); + Lib_IntVector_Intrinsics_vec256 + a01 = + Lib_IntVector_Intrinsics_vec256_add64(a0, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f110)); + Lib_IntVector_Intrinsics_vec256 + a11 = + Lib_IntVector_Intrinsics_vec256_add64(a1, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f110)); + Lib_IntVector_Intrinsics_vec256 + a21 = + Lib_IntVector_Intrinsics_vec256_add64(a2, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f110)); + Lib_IntVector_Intrinsics_vec256 + a31 = + Lib_IntVector_Intrinsics_vec256_add64(a3, + Lib_IntVector_Intrinsics_vec256_mul64(r2, f110)); + Lib_IntVector_Intrinsics_vec256 + a41 = + Lib_IntVector_Intrinsics_vec256_add64(a4, + Lib_IntVector_Intrinsics_vec256_mul64(r3, f110)); + Lib_IntVector_Intrinsics_vec256 + a02 = + Lib_IntVector_Intrinsics_vec256_add64(a01, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f120)); + Lib_IntVector_Intrinsics_vec256 + a12 = + Lib_IntVector_Intrinsics_vec256_add64(a11, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f120)); + Lib_IntVector_Intrinsics_vec256 + a22 = + Lib_IntVector_Intrinsics_vec256_add64(a21, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f120)); + Lib_IntVector_Intrinsics_vec256 + a32 = + Lib_IntVector_Intrinsics_vec256_add64(a31, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f120)); + Lib_IntVector_Intrinsics_vec256 + a42 = + Lib_IntVector_Intrinsics_vec256_add64(a41, + Lib_IntVector_Intrinsics_vec256_mul64(r2, f120)); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r52, f130)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f130)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f130)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f130)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r1, f130)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r51, f140)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r52, f140)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r53, f140)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r54, f140)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r0, f140)); + Lib_IntVector_Intrinsics_vec256 t01 = a04; + Lib_IntVector_Intrinsics_vec256 t1 = a14; + Lib_IntVector_Intrinsics_vec256 t2 = a24; + Lib_IntVector_Intrinsics_vec256 t3 = a34; + Lib_IntVector_Intrinsics_vec256 t4 = a44; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o01 = x02; + Lib_IntVector_Intrinsics_vec256 o12 = x12; + Lib_IntVector_Intrinsics_vec256 o22 = x21; + Lib_IntVector_Intrinsics_vec256 o32 = x32; + Lib_IntVector_Intrinsics_vec256 o42 = x42; + acc[0U] = o01; + acc[1U] = o12; + acc[2U] = o22; + acc[3U] = o32; + acc[4U] = o42; + Lib_IntVector_Intrinsics_vec256 f100 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 f20 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f21 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f22 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f23 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f24 = e[4U]; + Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20); + Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21); + Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22); + Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23); + Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24); + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc, pre); + } + uint32_t len1 = len - len0; + uint8_t *t1 = text + len0; + uint32_t nb = len1 / (uint32_t)16U; + uint32_t rem = len1 % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = t1 + i * (uint32_t)16U; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; + e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); + Lib_IntVector_Intrinsics_vec256 *r = pre; + Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t01 = a06; + Lib_IntVector_Intrinsics_vec256 t11 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + if (rem > (uint32_t)0U) { + uint8_t *last = t1 + nb * (uint32_t)16U; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem * sizeof(uint8_t)); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); + Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_and(f0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f110 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)26U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f20 = + Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, + (uint32_t)52U), + Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), + (uint32_t)12U)); + Lib_IntVector_Intrinsics_vec256 + f30 = + Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, + (uint32_t)14U), + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); + Lib_IntVector_Intrinsics_vec256 f01 = f010; + Lib_IntVector_Intrinsics_vec256 f111 = f110; + Lib_IntVector_Intrinsics_vec256 f2 = f20; + Lib_IntVector_Intrinsics_vec256 f3 = f30; + Lib_IntVector_Intrinsics_vec256 f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem * (uint32_t)8U % (uint32_t)26U; + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); + Lib_IntVector_Intrinsics_vec256 fi = e[rem * (uint32_t)8U / (uint32_t)26U]; + e[rem * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask); + Lib_IntVector_Intrinsics_vec256 *r = pre; + Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; + Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; + Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; + Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; + Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; + Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; + Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; + Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; + Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; + Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; + Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; + Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; + Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; + Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; + Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; + Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); + Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); + Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); + Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); + Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); + Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); + Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); + Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); + Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); + Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); + Lib_IntVector_Intrinsics_vec256 + a03 = + Lib_IntVector_Intrinsics_vec256_add64(a02, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); + Lib_IntVector_Intrinsics_vec256 + a13 = + Lib_IntVector_Intrinsics_vec256_add64(a12, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); + Lib_IntVector_Intrinsics_vec256 + a23 = + Lib_IntVector_Intrinsics_vec256_add64(a22, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); + Lib_IntVector_Intrinsics_vec256 + a33 = + Lib_IntVector_Intrinsics_vec256_add64(a32, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); + Lib_IntVector_Intrinsics_vec256 + a43 = + Lib_IntVector_Intrinsics_vec256_add64(a42, + Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); + Lib_IntVector_Intrinsics_vec256 + a04 = + Lib_IntVector_Intrinsics_vec256_add64(a03, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); + Lib_IntVector_Intrinsics_vec256 + a14 = + Lib_IntVector_Intrinsics_vec256_add64(a13, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); + Lib_IntVector_Intrinsics_vec256 + a24 = + Lib_IntVector_Intrinsics_vec256_add64(a23, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); + Lib_IntVector_Intrinsics_vec256 + a34 = + Lib_IntVector_Intrinsics_vec256_add64(a33, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); + Lib_IntVector_Intrinsics_vec256 + a44 = + Lib_IntVector_Intrinsics_vec256_add64(a43, + Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); + Lib_IntVector_Intrinsics_vec256 + a05 = + Lib_IntVector_Intrinsics_vec256_add64(a04, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); + Lib_IntVector_Intrinsics_vec256 + a15 = + Lib_IntVector_Intrinsics_vec256_add64(a14, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); + Lib_IntVector_Intrinsics_vec256 + a25 = + Lib_IntVector_Intrinsics_vec256_add64(a24, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); + Lib_IntVector_Intrinsics_vec256 + a35 = + Lib_IntVector_Intrinsics_vec256_add64(a34, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); + Lib_IntVector_Intrinsics_vec256 + a45 = + Lib_IntVector_Intrinsics_vec256_add64(a44, + Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); + Lib_IntVector_Intrinsics_vec256 + a06 = + Lib_IntVector_Intrinsics_vec256_add64(a05, + Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); + Lib_IntVector_Intrinsics_vec256 + a16 = + Lib_IntVector_Intrinsics_vec256_add64(a15, + Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); + Lib_IntVector_Intrinsics_vec256 + a26 = + Lib_IntVector_Intrinsics_vec256_add64(a25, + Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); + Lib_IntVector_Intrinsics_vec256 + a36 = + Lib_IntVector_Intrinsics_vec256_add64(a35, + Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); + Lib_IntVector_Intrinsics_vec256 + a46 = + Lib_IntVector_Intrinsics_vec256_add64(a45, + Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); + Lib_IntVector_Intrinsics_vec256 t01 = a06; + Lib_IntVector_Intrinsics_vec256 t11 = a16; + Lib_IntVector_Intrinsics_vec256 t2 = a26; + Lib_IntVector_Intrinsics_vec256 t3 = a36; + Lib_IntVector_Intrinsics_vec256 t4 = a46; + Lib_IntVector_Intrinsics_vec256 + mask26 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask26); + Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask26); + Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); + Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); + Lib_IntVector_Intrinsics_vec256 + z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); + Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); + Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask26); + Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask26); + Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); + Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); + Lib_IntVector_Intrinsics_vec256 + z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask26); + Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask26); + Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); + Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); + Lib_IntVector_Intrinsics_vec256 + z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask26); + Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); + Lib_IntVector_Intrinsics_vec256 o0 = x02; + Lib_IntVector_Intrinsics_vec256 o1 = x12; + Lib_IntVector_Intrinsics_vec256 o2 = x21; + Lib_IntVector_Intrinsics_vec256 o3 = x32; + Lib_IntVector_Intrinsics_vec256 o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +void +Hacl_Poly1305_256_poly1305_finish( + uint8_t *tag, + uint8_t *key, + Lib_IntVector_Intrinsics_vec256 *ctx) +{ + Lib_IntVector_Intrinsics_vec256 *acc = ctx; + uint8_t *ks = key + (uint32_t)16U; + Lib_IntVector_Intrinsics_vec256 f0 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 f13 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 f23 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 f33 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 f40 = acc[4U]; + Lib_IntVector_Intrinsics_vec256 + l0 = Lib_IntVector_Intrinsics_vec256_add64(f0, Lib_IntVector_Intrinsics_vec256_zero); + Lib_IntVector_Intrinsics_vec256 + tmp00 = + Lib_IntVector_Intrinsics_vec256_and(l0, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c00 = Lib_IntVector_Intrinsics_vec256_shift_right64(l0, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l1 = Lib_IntVector_Intrinsics_vec256_add64(f13, c00); + Lib_IntVector_Intrinsics_vec256 + tmp10 = + Lib_IntVector_Intrinsics_vec256_and(l1, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c10 = Lib_IntVector_Intrinsics_vec256_shift_right64(l1, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l2 = Lib_IntVector_Intrinsics_vec256_add64(f23, c10); + Lib_IntVector_Intrinsics_vec256 + tmp20 = + Lib_IntVector_Intrinsics_vec256_and(l2, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c20 = Lib_IntVector_Intrinsics_vec256_shift_right64(l2, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l3 = Lib_IntVector_Intrinsics_vec256_add64(f33, c20); + Lib_IntVector_Intrinsics_vec256 + tmp30 = + Lib_IntVector_Intrinsics_vec256_and(l3, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c30 = Lib_IntVector_Intrinsics_vec256_shift_right64(l3, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l4 = Lib_IntVector_Intrinsics_vec256_add64(f40, c30); + Lib_IntVector_Intrinsics_vec256 + tmp40 = + Lib_IntVector_Intrinsics_vec256_and(l4, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c40 = Lib_IntVector_Intrinsics_vec256_shift_right64(l4, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + f010 = + Lib_IntVector_Intrinsics_vec256_add64(tmp00, + Lib_IntVector_Intrinsics_vec256_smul64(c40, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec256 f110 = tmp10; + Lib_IntVector_Intrinsics_vec256 f210 = tmp20; + Lib_IntVector_Intrinsics_vec256 f310 = tmp30; + Lib_IntVector_Intrinsics_vec256 f410 = tmp40; + Lib_IntVector_Intrinsics_vec256 + l = Lib_IntVector_Intrinsics_vec256_add64(f010, Lib_IntVector_Intrinsics_vec256_zero); + Lib_IntVector_Intrinsics_vec256 + tmp0 = + Lib_IntVector_Intrinsics_vec256_and(l, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c0 = Lib_IntVector_Intrinsics_vec256_shift_right64(l, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l5 = Lib_IntVector_Intrinsics_vec256_add64(f110, c0); + Lib_IntVector_Intrinsics_vec256 + tmp1 = + Lib_IntVector_Intrinsics_vec256_and(l5, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c1 = Lib_IntVector_Intrinsics_vec256_shift_right64(l5, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l6 = Lib_IntVector_Intrinsics_vec256_add64(f210, c1); + Lib_IntVector_Intrinsics_vec256 + tmp2 = + Lib_IntVector_Intrinsics_vec256_and(l6, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c2 = Lib_IntVector_Intrinsics_vec256_shift_right64(l6, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l7 = Lib_IntVector_Intrinsics_vec256_add64(f310, c2); + Lib_IntVector_Intrinsics_vec256 + tmp3 = + Lib_IntVector_Intrinsics_vec256_and(l7, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c3 = Lib_IntVector_Intrinsics_vec256_shift_right64(l7, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 l8 = Lib_IntVector_Intrinsics_vec256_add64(f410, c3); + Lib_IntVector_Intrinsics_vec256 + tmp4 = + Lib_IntVector_Intrinsics_vec256_and(l8, + Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); + Lib_IntVector_Intrinsics_vec256 + c4 = Lib_IntVector_Intrinsics_vec256_shift_right64(l8, (uint32_t)26U); + Lib_IntVector_Intrinsics_vec256 + f02 = + Lib_IntVector_Intrinsics_vec256_add64(tmp0, + Lib_IntVector_Intrinsics_vec256_smul64(c4, (uint64_t)5U)); + Lib_IntVector_Intrinsics_vec256 f12 = tmp1; + Lib_IntVector_Intrinsics_vec256 f22 = tmp2; + Lib_IntVector_Intrinsics_vec256 f32 = tmp3; + Lib_IntVector_Intrinsics_vec256 f42 = tmp4; + Lib_IntVector_Intrinsics_vec256 + mh = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); + Lib_IntVector_Intrinsics_vec256 + ml = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffffbU); + Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_eq64(f42, mh); + Lib_IntVector_Intrinsics_vec256 + mask1 = + Lib_IntVector_Intrinsics_vec256_and(mask, + Lib_IntVector_Intrinsics_vec256_eq64(f32, mh)); + Lib_IntVector_Intrinsics_vec256 + mask2 = + Lib_IntVector_Intrinsics_vec256_and(mask1, + Lib_IntVector_Intrinsics_vec256_eq64(f22, mh)); + Lib_IntVector_Intrinsics_vec256 + mask3 = + Lib_IntVector_Intrinsics_vec256_and(mask2, + Lib_IntVector_Intrinsics_vec256_eq64(f12, mh)); + Lib_IntVector_Intrinsics_vec256 + mask4 = + Lib_IntVector_Intrinsics_vec256_and(mask3, + Lib_IntVector_Intrinsics_vec256_lognot(Lib_IntVector_Intrinsics_vec256_gt64(ml, f02))); + Lib_IntVector_Intrinsics_vec256 ph = Lib_IntVector_Intrinsics_vec256_and(mask4, mh); + Lib_IntVector_Intrinsics_vec256 pl = Lib_IntVector_Intrinsics_vec256_and(mask4, ml); + Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_sub64(f02, pl); + Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_sub64(f12, ph); + Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_sub64(f22, ph); + Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_sub64(f32, ph); + Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_sub64(f42, ph); + Lib_IntVector_Intrinsics_vec256 f011 = o0; + Lib_IntVector_Intrinsics_vec256 f111 = o1; + Lib_IntVector_Intrinsics_vec256 f211 = o2; + Lib_IntVector_Intrinsics_vec256 f311 = o3; + Lib_IntVector_Intrinsics_vec256 f411 = o4; + acc[0U] = f011; + acc[1U] = f111; + acc[2U] = f211; + acc[3U] = f311; + acc[4U] = f411; + Lib_IntVector_Intrinsics_vec256 f00 = acc[0U]; + Lib_IntVector_Intrinsics_vec256 f1 = acc[1U]; + Lib_IntVector_Intrinsics_vec256 f2 = acc[2U]; + Lib_IntVector_Intrinsics_vec256 f3 = acc[3U]; + Lib_IntVector_Intrinsics_vec256 f4 = acc[4U]; + uint64_t f01 = Lib_IntVector_Intrinsics_vec256_extract64(f00, (uint32_t)0U); + uint64_t f112 = Lib_IntVector_Intrinsics_vec256_extract64(f1, (uint32_t)0U); + uint64_t f212 = Lib_IntVector_Intrinsics_vec256_extract64(f2, (uint32_t)0U); + uint64_t f312 = Lib_IntVector_Intrinsics_vec256_extract64(f3, (uint32_t)0U); + uint64_t f41 = Lib_IntVector_Intrinsics_vec256_extract64(f4, (uint32_t)0U); + uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U; + uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U; + uint64_t f10 = lo; + uint64_t f11 = hi; + uint64_t u0 = load64_le(ks); + uint64_t lo0 = u0; + uint64_t u = load64_le(ks + (uint32_t)8U); + uint64_t hi0 = u; + uint64_t f20 = lo0; + uint64_t f21 = hi0; + uint64_t r0 = f10 + f20; + uint64_t r1 = f11 + f21; + uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U; + uint64_t r11 = r1 + c; + uint64_t f30 = r0; + uint64_t f31 = r11; + store64_le(tag, f30); + store64_le(tag + (uint32_t)8U, f31); +} + +void +Hacl_Poly1305_256_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key) +{ + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U }; + Hacl_Poly1305_256_poly1305_init(ctx, key); + Hacl_Poly1305_256_poly1305_update(ctx, len, text); + Hacl_Poly1305_256_poly1305_finish(tag, key, ctx); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_256.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.h new file mode 100644 index 0000000000..d9bf5fd831 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_256.h @@ -0,0 +1,63 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Poly1305_256_H +#define __Hacl_Poly1305_256_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" +#include "libintvector.h" +typedef Lib_IntVector_Intrinsics_vec256 *Hacl_Poly1305_256_poly1305_ctx; + +void Hacl_Poly1305_256_poly1305_init(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *key); + +void Hacl_Poly1305_256_poly1305_update1(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *text); + +void +Hacl_Poly1305_256_poly1305_update( + Lib_IntVector_Intrinsics_vec256 *ctx, + uint32_t len, + uint8_t *text); + +void +Hacl_Poly1305_256_poly1305_finish( + uint8_t *tag, + uint8_t *key, + Lib_IntVector_Intrinsics_vec256 *ctx); + +void Hacl_Poly1305_256_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Poly1305_256_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c new file mode 100644 index 0000000000..8de2eca7f1 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c @@ -0,0 +1,574 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Poly1305_32.h" + +#include "internal/Hacl_Krmllib.h" + +void +Hacl_Poly1305_32_poly1305_init(uint64_t *ctx, uint8_t *key) +{ + uint64_t *acc = ctx; + uint64_t *pre = ctx + (uint32_t)5U; + uint8_t *kr = key; + acc[0U] = (uint64_t)0U; + acc[1U] = (uint64_t)0U; + acc[2U] = (uint64_t)0U; + acc[3U] = (uint64_t)0U; + acc[4U] = (uint64_t)0U; + uint64_t u0 = load64_le(kr); + uint64_t lo = u0; + uint64_t u = load64_le(kr + (uint32_t)8U); + uint64_t hi = u; + uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU; + uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU; + uint64_t lo1 = lo & mask0; + uint64_t hi1 = hi & mask1; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t *rn = pre + (uint32_t)10U; + uint64_t *rn_5 = pre + (uint32_t)15U; + uint64_t r_vec0 = lo1; + uint64_t r_vec1 = hi1; + uint64_t f00 = r_vec0 & (uint64_t)0x3ffffffU; + uint64_t f10 = r_vec0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = r_vec0 >> (uint32_t)52U | (r_vec1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = r_vec1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = r_vec1 >> (uint32_t)40U; + uint64_t f0 = f00; + uint64_t f1 = f10; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f4 = f40; + r[0U] = f0; + r[1U] = f1; + r[2U] = f2; + r[3U] = f3; + r[4U] = f4; + uint64_t f200 = r[0U]; + uint64_t f21 = r[1U]; + uint64_t f22 = r[2U]; + uint64_t f23 = r[3U]; + uint64_t f24 = r[4U]; + r5[0U] = f200 * (uint64_t)5U; + r5[1U] = f21 * (uint64_t)5U; + r5[2U] = f22 * (uint64_t)5U; + r5[3U] = f23 * (uint64_t)5U; + r5[4U] = f24 * (uint64_t)5U; + rn[0U] = r[0U]; + rn[1U] = r[1U]; + rn[2U] = r[2U]; + rn[3U] = r[3U]; + rn[4U] = r[4U]; + rn_5[0U] = r5[0U]; + rn_5[1U] = r5[1U]; + rn_5[2U] = r5[2U]; + rn_5[3U] = r5[3U]; + rn_5[4U] = r5[4U]; +} + +void +Hacl_Poly1305_32_poly1305_update1(uint64_t *ctx, uint8_t *text) +{ + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(text); + uint64_t lo = u0; + uint64_t u = load64_le(text + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; +} + +void +Hacl_Poly1305_32_poly1305_update(uint64_t *ctx, uint32_t len, uint8_t *text) +{ + uint64_t *pre = ctx + (uint32_t)5U; + uint64_t *acc = ctx; + uint32_t nb = len / (uint32_t)16U; + uint32_t rem = len % (uint32_t)16U; + for (uint32_t i = (uint32_t)0U; i < nb; i++) { + uint8_t *block = text + i * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint64_t u0 = load64_le(block); + uint64_t lo = u0; + uint64_t u = load64_le(block + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f41 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f41; + uint64_t b = (uint64_t)0x1000000U; + uint64_t mask = b; + uint64_t f4 = e[4U]; + e[4U] = f4 | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + } + if (rem > (uint32_t)0U) { + uint8_t *last = text + nb * (uint32_t)16U; + uint64_t e[5U] = { 0U }; + uint8_t tmp[16U] = { 0U }; + memcpy(tmp, last, rem * sizeof(uint8_t)); + uint64_t u0 = load64_le(tmp); + uint64_t lo = u0; + uint64_t u = load64_le(tmp + (uint32_t)8U); + uint64_t hi = u; + uint64_t f0 = lo; + uint64_t f1 = hi; + uint64_t f010 = f0 & (uint64_t)0x3ffffffU; + uint64_t f110 = f0 >> (uint32_t)26U & (uint64_t)0x3ffffffU; + uint64_t f20 = f0 >> (uint32_t)52U | (f1 & (uint64_t)0x3fffU) << (uint32_t)12U; + uint64_t f30 = f1 >> (uint32_t)14U & (uint64_t)0x3ffffffU; + uint64_t f40 = f1 >> (uint32_t)40U; + uint64_t f01 = f010; + uint64_t f111 = f110; + uint64_t f2 = f20; + uint64_t f3 = f30; + uint64_t f4 = f40; + e[0U] = f01; + e[1U] = f111; + e[2U] = f2; + e[3U] = f3; + e[4U] = f4; + uint64_t b = (uint64_t)1U << rem * (uint32_t)8U % (uint32_t)26U; + uint64_t mask = b; + uint64_t fi = e[rem * (uint32_t)8U / (uint32_t)26U]; + e[rem * (uint32_t)8U / (uint32_t)26U] = fi | mask; + uint64_t *r = pre; + uint64_t *r5 = pre + (uint32_t)5U; + uint64_t r0 = r[0U]; + uint64_t r1 = r[1U]; + uint64_t r2 = r[2U]; + uint64_t r3 = r[3U]; + uint64_t r4 = r[4U]; + uint64_t r51 = r5[1U]; + uint64_t r52 = r5[2U]; + uint64_t r53 = r5[3U]; + uint64_t r54 = r5[4U]; + uint64_t f10 = e[0U]; + uint64_t f11 = e[1U]; + uint64_t f12 = e[2U]; + uint64_t f13 = e[3U]; + uint64_t f14 = e[4U]; + uint64_t a0 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t a3 = acc[3U]; + uint64_t a4 = acc[4U]; + uint64_t a01 = a0 + f10; + uint64_t a11 = a1 + f11; + uint64_t a21 = a2 + f12; + uint64_t a31 = a3 + f13; + uint64_t a41 = a4 + f14; + uint64_t a02 = r0 * a01; + uint64_t a12 = r1 * a01; + uint64_t a22 = r2 * a01; + uint64_t a32 = r3 * a01; + uint64_t a42 = r4 * a01; + uint64_t a03 = a02 + r54 * a11; + uint64_t a13 = a12 + r0 * a11; + uint64_t a23 = a22 + r1 * a11; + uint64_t a33 = a32 + r2 * a11; + uint64_t a43 = a42 + r3 * a11; + uint64_t a04 = a03 + r53 * a21; + uint64_t a14 = a13 + r54 * a21; + uint64_t a24 = a23 + r0 * a21; + uint64_t a34 = a33 + r1 * a21; + uint64_t a44 = a43 + r2 * a21; + uint64_t a05 = a04 + r52 * a31; + uint64_t a15 = a14 + r53 * a31; + uint64_t a25 = a24 + r54 * a31; + uint64_t a35 = a34 + r0 * a31; + uint64_t a45 = a44 + r1 * a31; + uint64_t a06 = a05 + r51 * a41; + uint64_t a16 = a15 + r52 * a41; + uint64_t a26 = a25 + r53 * a41; + uint64_t a36 = a35 + r54 * a41; + uint64_t a46 = a45 + r0 * a41; + uint64_t t0 = a06; + uint64_t t1 = a16; + uint64_t t2 = a26; + uint64_t t3 = a36; + uint64_t t4 = a46; + uint64_t mask26 = (uint64_t)0x3ffffffU; + uint64_t z0 = t0 >> (uint32_t)26U; + uint64_t z1 = t3 >> (uint32_t)26U; + uint64_t x0 = t0 & mask26; + uint64_t x3 = t3 & mask26; + uint64_t x1 = t1 + z0; + uint64_t x4 = t4 + z1; + uint64_t z01 = x1 >> (uint32_t)26U; + uint64_t z11 = x4 >> (uint32_t)26U; + uint64_t t = z11 << (uint32_t)2U; + uint64_t z12 = z11 + t; + uint64_t x11 = x1 & mask26; + uint64_t x41 = x4 & mask26; + uint64_t x2 = t2 + z01; + uint64_t x01 = x0 + z12; + uint64_t z02 = x2 >> (uint32_t)26U; + uint64_t z13 = x01 >> (uint32_t)26U; + uint64_t x21 = x2 & mask26; + uint64_t x02 = x01 & mask26; + uint64_t x31 = x3 + z02; + uint64_t x12 = x11 + z13; + uint64_t z03 = x31 >> (uint32_t)26U; + uint64_t x32 = x31 & mask26; + uint64_t x42 = x41 + z03; + uint64_t o0 = x02; + uint64_t o1 = x12; + uint64_t o2 = x21; + uint64_t o3 = x32; + uint64_t o4 = x42; + acc[0U] = o0; + acc[1U] = o1; + acc[2U] = o2; + acc[3U] = o3; + acc[4U] = o4; + return; + } +} + +void +Hacl_Poly1305_32_poly1305_finish(uint8_t *tag, uint8_t *key, uint64_t *ctx) +{ + uint64_t *acc = ctx; + uint8_t *ks = key + (uint32_t)16U; + uint64_t f0 = acc[0U]; + uint64_t f13 = acc[1U]; + uint64_t f23 = acc[2U]; + uint64_t f33 = acc[3U]; + uint64_t f40 = acc[4U]; + uint64_t l0 = f0 + (uint64_t)0U; + uint64_t tmp00 = l0 & (uint64_t)0x3ffffffU; + uint64_t c00 = l0 >> (uint32_t)26U; + uint64_t l1 = f13 + c00; + uint64_t tmp10 = l1 & (uint64_t)0x3ffffffU; + uint64_t c10 = l1 >> (uint32_t)26U; + uint64_t l2 = f23 + c10; + uint64_t tmp20 = l2 & (uint64_t)0x3ffffffU; + uint64_t c20 = l2 >> (uint32_t)26U; + uint64_t l3 = f33 + c20; + uint64_t tmp30 = l3 & (uint64_t)0x3ffffffU; + uint64_t c30 = l3 >> (uint32_t)26U; + uint64_t l4 = f40 + c30; + uint64_t tmp40 = l4 & (uint64_t)0x3ffffffU; + uint64_t c40 = l4 >> (uint32_t)26U; + uint64_t f010 = tmp00 + c40 * (uint64_t)5U; + uint64_t f110 = tmp10; + uint64_t f210 = tmp20; + uint64_t f310 = tmp30; + uint64_t f410 = tmp40; + uint64_t l = f010 + (uint64_t)0U; + uint64_t tmp0 = l & (uint64_t)0x3ffffffU; + uint64_t c0 = l >> (uint32_t)26U; + uint64_t l5 = f110 + c0; + uint64_t tmp1 = l5 & (uint64_t)0x3ffffffU; + uint64_t c1 = l5 >> (uint32_t)26U; + uint64_t l6 = f210 + c1; + uint64_t tmp2 = l6 & (uint64_t)0x3ffffffU; + uint64_t c2 = l6 >> (uint32_t)26U; + uint64_t l7 = f310 + c2; + uint64_t tmp3 = l7 & (uint64_t)0x3ffffffU; + uint64_t c3 = l7 >> (uint32_t)26U; + uint64_t l8 = f410 + c3; + uint64_t tmp4 = l8 & (uint64_t)0x3ffffffU; + uint64_t c4 = l8 >> (uint32_t)26U; + uint64_t f02 = tmp0 + c4 * (uint64_t)5U; + uint64_t f12 = tmp1; + uint64_t f22 = tmp2; + uint64_t f32 = tmp3; + uint64_t f42 = tmp4; + uint64_t mh = (uint64_t)0x3ffffffU; + uint64_t ml = (uint64_t)0x3fffffbU; + uint64_t mask = FStar_UInt64_eq_mask(f42, mh); + uint64_t mask1 = mask & FStar_UInt64_eq_mask(f32, mh); + uint64_t mask2 = mask1 & FStar_UInt64_eq_mask(f22, mh); + uint64_t mask3 = mask2 & FStar_UInt64_eq_mask(f12, mh); + uint64_t mask4 = mask3 & ~~FStar_UInt64_gte_mask(f02, ml); + uint64_t ph = mask4 & mh; + uint64_t pl = mask4 & ml; + uint64_t o0 = f02 - pl; + uint64_t o1 = f12 - ph; + uint64_t o2 = f22 - ph; + uint64_t o3 = f32 - ph; + uint64_t o4 = f42 - ph; + uint64_t f011 = o0; + uint64_t f111 = o1; + uint64_t f211 = o2; + uint64_t f311 = o3; + uint64_t f411 = o4; + acc[0U] = f011; + acc[1U] = f111; + acc[2U] = f211; + acc[3U] = f311; + acc[4U] = f411; + uint64_t f00 = acc[0U]; + uint64_t f1 = acc[1U]; + uint64_t f2 = acc[2U]; + uint64_t f3 = acc[3U]; + uint64_t f4 = acc[4U]; + uint64_t f01 = f00; + uint64_t f112 = f1; + uint64_t f212 = f2; + uint64_t f312 = f3; + uint64_t f41 = f4; + uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U; + uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U; + uint64_t f10 = lo; + uint64_t f11 = hi; + uint64_t u0 = load64_le(ks); + uint64_t lo0 = u0; + uint64_t u = load64_le(ks + (uint32_t)8U); + uint64_t hi0 = u; + uint64_t f20 = lo0; + uint64_t f21 = hi0; + uint64_t r0 = f10 + f20; + uint64_t r1 = f11 + f21; + uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U; + uint64_t r11 = r1 + c; + uint64_t f30 = r0; + uint64_t f31 = r11; + store64_le(tag, f30); + store64_le(tag + (uint32_t)8U, f31); +} + +void +Hacl_Poly1305_32_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key) +{ + uint64_t ctx[25U] = { 0U }; + Hacl_Poly1305_32_poly1305_init(ctx, key); + Hacl_Poly1305_32_poly1305_update(ctx, len, text); + Hacl_Poly1305_32_poly1305_finish(tag, key, ctx); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h new file mode 100644 index 0000000000..84a2f606b1 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h @@ -0,0 +1,55 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Poly1305_32_H +#define __Hacl_Poly1305_32_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +typedef uint64_t *Hacl_Poly1305_32_poly1305_ctx; + +void Hacl_Poly1305_32_poly1305_init(uint64_t *ctx, uint8_t *key); + +void Hacl_Poly1305_32_poly1305_update1(uint64_t *ctx, uint8_t *text); + +void Hacl_Poly1305_32_poly1305_update(uint64_t *ctx, uint32_t len, uint8_t *text); + +void Hacl_Poly1305_32_poly1305_finish(uint8_t *tag, uint8_t *key, uint64_t *ctx); + +void Hacl_Poly1305_32_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Poly1305_32_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/config.h b/security/nss/lib/freebl/verified/config.h new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/security/nss/lib/freebl/verified/config.h diff --git a/security/nss/lib/freebl/verified/curve25519-inline.h b/security/nss/lib/freebl/verified/curve25519-inline.h new file mode 100644 index 0000000000..690e75a1b9 --- /dev/null +++ b/security/nss/lib/freebl/verified/curve25519-inline.h @@ -0,0 +1,942 @@ +#ifdef __GNUC__ +#if defined(__x86_64__) || defined(_M_X64) +#pragma once +#include <inttypes.h> + +// Computes the addition of four-element f1 with value in f2 +// and returns the carry (if any) +static inline void +add_scalar(uint64_t *out, uint64_t *f1, uint64_t f2) +{ + __asm__ volatile( + // Clear registers to propagate the carry bit + " xor %%r8d, %%r8d;" + " xor %%r9d, %%r9d;" + " xor %%r10d, %%r10d;" + " xor %%r11d, %%r11d;" + " xor %%eax, %%eax;" + + // Begin addition chain + " addq 0(%2), %0;" + " movq %0, 0(%1);" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%1);" + " adcxq 16(%2), %%r9;" + " movq %%r9, 16(%1);" + " adcxq 24(%2), %%r10;" + " movq %%r10, 24(%1);" + + // Return the carry bit in a register + " adcx %%r11, %%rax;" + : "+&r"(f2) + : "r"(out), "r"(f1) + : "%rax", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +// Computes the field addition of two field elements +static inline void +fadd(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + __asm__ volatile( + // Compute the raw addition of f1 + f2 + " movq 0(%0), %%r8;" + " addq 0(%2), %%r8;" + " movq 8(%0), %%r9;" + " adcxq 8(%2), %%r9;" + " movq 16(%0), %%r10;" + " adcxq 16(%2), %%r10;" + " movq 24(%0), %%r11;" + " adcxq 24(%2), %%r11;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute carry*38 + " mov $0, %%rax;" + " mov $38, %0;" + " cmovc %0, %%rax;" + + // Step 2: Add carry*38 to the original sum + " xor %%ecx, %%ecx;" + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %0, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2) + : "r"(out), "r"(f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +// Computes the field substraction of two field elements +static inline void +fsub(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + __asm__ volatile( + // Compute the raw substraction of f1-f2 + " movq 0(%1), %%r8;" + " subq 0(%2), %%r8;" + " movq 8(%1), %%r9;" + " sbbq 8(%2), %%r9;" + " movq 16(%1), %%r10;" + " sbbq 16(%2), %%r10;" + " movq 24(%1), %%r11;" + " sbbq 24(%2), %%r11;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute carry*38 + " mov $0, %%rax;" + " mov $38, %%rcx;" + " cmovc %%rcx, %%rax;" + + // Step 2: Substract carry*38 from the original difference + " sub %%rax, %%r8;" + " sbb $0, %%r9;" + " sbb $0, %%r10;" + " sbb $0, %%r11;" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rcx, %%rax;" + " sub %%rax, %%r8;" + + // Store the result + " movq %%r8, 0(%0);" + " movq %%r9, 8(%0);" + " movq %%r10, 16(%0);" + " movq %%r11, 24(%0);" + : + : "r"(out), "r"(f1), "r"(f2) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +// Computes a field multiplication: out <- f1 * f2 +// Uses the 8-element buffer tmp for intermediate results +static inline void +fmul(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) +{ + __asm__ volatile( + + /////// Compute the raw multiplication: tmp <- src1 * src2 ////// + + // Compute src1[0] * src2 + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + // Compute src1[1] * src2 + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[2] * src2 + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[3] * src2 + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + // Line up pointers + " mov %2, %0;" + " mov %3, %2;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "memory", "cc"); +} + +// Computes two field multiplications: +// out[0] <- f1[0] * f2[0] +// out[1] <- f1[1] * f2[1] +// Uses the 16-element buffer tmp for intermediate results: +static inline void +fmul2(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) +{ + __asm__ volatile( + + /////// Compute the raw multiplication tmp[0] <- f1[0] * f2[0] ////// + + // Compute src1[0] * src2 + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + // Compute src1[1] * src2 + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[2] * src2 + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[3] * src2 + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + /////// Compute the raw multiplication tmp[1] <- f1[1] * f2[1] ////// + + // Compute src1[0] * src2 + " movq 32(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 64(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 72(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + // Compute src1[1] * src2 + " movq 40(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 72(%2), %%r8;" + " movq %%r8, 72(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 80(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[2] * src2 + " movq 48(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 80(%2), %%r8;" + " movq %%r8, 80(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 88(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[3] * src2 + " movq 56(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 88(%2), %%r8;" + " movq %%r8, 88(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 96(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 104(%2);" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 112(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 120(%2);" + + // Line up pointers + " mov %2, %0;" + " mov %3, %2;" + + /////// Wrap the results back into the field ////// + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 40(%2);" + " adcx %1, %%r10;" + " movq %%r10, 48(%2);" + " adcx %1, %%r11;" + " movq %%r11, 56(%2);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "memory", "cc"); +} + +// Computes the field multiplication of four-element f1 with value in f2 +// Requires f2 to be smaller than 2^17 +static inline void +fmul_scalar(uint64_t *out, uint64_t *f1, uint64_t f2) +{ + register uint64_t f2_r __asm__("rdx") = f2; + + __asm__ volatile( + // Compute the raw multiplication of f1*f2 + " mulxq 0(%2), %%r8, %%rcx;" // f1[0]*f2 + " mulxq 8(%2), %%r9, %%rbx;" // f1[1]*f2 + " add %%rcx, %%r9;" + " mov $0, %%rcx;" + " mulxq 16(%2), %%r10, %%r13;" // f1[2]*f2 + " adcx %%rbx, %%r10;" + " mulxq 24(%2), %%r11, %%rax;" // f1[3]*f2 + " adcx %%r13, %%r11;" + " adcx %%rcx, %%rax;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute carry*38 + " mov $38, %%rdx;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2_r) + : "r"(out), "r"(f1) + : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13", "memory", "cc"); +} + +// Computes p1 <- bit ? p2 : p1 in constant time +static inline void +cswap2(uint64_t bit, uint64_t *p1, uint64_t *p2) +{ + __asm__ volatile( + // Transfer bit into CF flag + " add $18446744073709551615, %0;" + + // cswap p1[0], p2[0] + " movq 0(%1), %%r8;" + " movq 0(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 0(%1);" + " movq %%r9, 0(%2);" + + // cswap p1[1], p2[1] + " movq 8(%1), %%r8;" + " movq 8(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 8(%1);" + " movq %%r9, 8(%2);" + + // cswap p1[2], p2[2] + " movq 16(%1), %%r8;" + " movq 16(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 16(%1);" + " movq %%r9, 16(%2);" + + // cswap p1[3], p2[3] + " movq 24(%1), %%r8;" + " movq 24(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 24(%1);" + " movq %%r9, 24(%2);" + + // cswap p1[4], p2[4] + " movq 32(%1), %%r8;" + " movq 32(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 32(%1);" + " movq %%r9, 32(%2);" + + // cswap p1[5], p2[5] + " movq 40(%1), %%r8;" + " movq 40(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 40(%1);" + " movq %%r9, 40(%2);" + + // cswap p1[6], p2[6] + " movq 48(%1), %%r8;" + " movq 48(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 48(%1);" + " movq %%r9, 48(%2);" + + // cswap p1[7], p2[7] + " movq 56(%1), %%r8;" + " movq 56(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 56(%1);" + " movq %%r9, 56(%2);" + : "+&r"(bit) + : "r"(p1), "r"(p2) + : "%r8", "%r9", "%r10", "memory", "cc"); +} + +// Computes the square of a field element: out <- f * f +// Uses the 8-element buffer tmp for intermediate results +static inline void +fsqr(uint64_t *out, uint64_t *f, uint64_t *tmp) +{ + __asm__ volatile( + + /////// Compute the raw multiplication: tmp <- f * f ////// + + // Step 1: Compute all partial products + " movq 0(%0), %%rdx;" // f[0] + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" // f[1]*f[0] + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" // f[2]*f[0] + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" // f[3]*f[0] + " movq 24(%0), %%rdx;" // f[3] + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" // f[1]*f[3] + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" // f[2]*f[3] + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" // f1 + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" // f[2]*f[1] + + // Step 2: Compute two parallel carry chains + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + // Step 3: Compute intermediate squares + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[0]^2 + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[1]^2 + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[2]^2 + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[3]^2 + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + // Line up pointers + " mov %1, %0;" + " mov %2, %1;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15", "memory", "cc"); +} + +// Computes two field squarings: +// out[0] <- f[0] * f[0] +// out[1] <- f[1] * f[1] +// Uses the 16-element buffer tmp for intermediate results +static inline void +fsqr2(uint64_t *out, uint64_t *f, uint64_t *tmp) +{ + __asm__ volatile( + // Step 1: Compute all partial products + " movq 0(%0), %%rdx;" // f[0] + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" // f[1]*f[0] + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" // f[2]*f[0] + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" // f[3]*f[0] + " movq 24(%0), %%rdx;" // f[3] + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" // f[1]*f[3] + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" // f[2]*f[3] + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" // f1 + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" // f[2]*f[1] + + // Step 2: Compute two parallel carry chains + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + // Step 3: Compute intermediate squares + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[0]^2 + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[1]^2 + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[2]^2 + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[3]^2 + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + // Step 1: Compute all partial products + " movq 32(%0), %%rdx;" // f[0] + " mulxq 40(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" // f[1]*f[0] + " mulxq 48(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" // f[2]*f[0] + " mulxq 56(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" // f[3]*f[0] + " movq 56(%0), %%rdx;" // f[3] + " mulxq 40(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" // f[1]*f[3] + " mulxq 48(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" // f[2]*f[3] + " movq 40(%0), %%rdx;" + " adcx %%r15, %%r13;" // f1 + " mulxq 48(%0), %%rax, %%rcx;" + " mov $0, %%r14;" // f[2]*f[1] + + // Step 2: Compute two parallel carry chains + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + // Step 3: Compute intermediate squares + " movq 32(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[0]^2 + " movq %%rax, 64(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 72(%1);" + " movq 40(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[1]^2 + " adcx %%rax, %%r9;" + " movq %%r9, 80(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 88(%1);" + " movq 48(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[2]^2 + " adcx %%rax, %%r11;" + " movq %%r11, 96(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 104(%1);" + " movq 56(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[3]^2 + " adcx %%rax, %%r13;" + " movq %%r13, 112(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 120(%1);" + + // Line up pointers + " mov %1, %0;" + " mov %2, %1;" + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 40(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 48(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 56(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%1);" + : "+&r"(f), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15", "memory", "cc"); +} + +#endif /* defined(__x86_64__) || defined(_M_X64) */ +#endif /* __GNUC__ */ diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Bignum.h b/security/nss/lib/freebl/verified/internal/Hacl_Bignum.h new file mode 100644 index 0000000000..6080d37873 --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Bignum.h @@ -0,0 +1,312 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Bignum_H +#define __internal_Hacl_Bignum_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "internal/Hacl_Krmllib.h" +#include "../Hacl_Bignum.h" +#include "lib_intrinsics.h" +void +Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32( + uint32_t aLen, + uint32_t *a, + uint32_t *b, + uint32_t *tmp, + uint32_t *res); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64( + uint32_t aLen, + uint64_t *a, + uint64_t *b, + uint64_t *tmp, + uint64_t *res); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32( + uint32_t aLen, + uint32_t *a, + uint32_t *tmp, + uint32_t *res); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64( + uint32_t aLen, + uint64_t *a, + uint64_t *tmp, + uint64_t *res); + +void +Hacl_Bignum_bn_add_mod_n_u32( + uint32_t len1, + uint32_t *n, + uint32_t *a, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_bn_add_mod_n_u64( + uint32_t len1, + uint64_t *n, + uint64_t *a, + uint64_t *b, + uint64_t *res); + +void +Hacl_Bignum_bn_sub_mod_n_u32( + uint32_t len1, + uint32_t *n, + uint32_t *a, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_bn_sub_mod_n_u64( + uint32_t len1, + uint64_t *n, + uint64_t *a, + uint64_t *b, + uint64_t *res); + +uint32_t Hacl_Bignum_ModInvLimb_mod_inv_uint32(uint32_t n0); + +uint64_t Hacl_Bignum_ModInvLimb_mod_inv_uint64(uint64_t n0); + +uint32_t Hacl_Bignum_Montgomery_bn_check_modulus_u32(uint32_t len, uint32_t *n); + +void +Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *res); + +void +Hacl_Bignum_Montgomery_bn_mont_reduction_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv, + uint32_t *c, + uint32_t *res); + +void +Hacl_Bignum_Montgomery_bn_to_mont_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv, + uint32_t *r2, + uint32_t *a, + uint32_t *aM); + +void +Hacl_Bignum_Montgomery_bn_from_mont_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *a); + +void +Hacl_Bignum_Montgomery_bn_mont_mul_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *bM, + uint32_t *resM); + +void +Hacl_Bignum_Montgomery_bn_mont_sqr_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *resM); + +uint64_t Hacl_Bignum_Montgomery_bn_check_modulus_u64(uint32_t len, uint64_t *n); + +void +Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *res); + +void +Hacl_Bignum_Montgomery_bn_mont_reduction_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv, + uint64_t *c, + uint64_t *res); + +void +Hacl_Bignum_Montgomery_bn_to_mont_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv, + uint64_t *r2, + uint64_t *a, + uint64_t *aM); + +void +Hacl_Bignum_Montgomery_bn_from_mont_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *a); + +void +Hacl_Bignum_Montgomery_bn_mont_mul_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *bM, + uint64_t *resM); + +void +Hacl_Bignum_Montgomery_bn_mont_sqr_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *resM); + +uint32_t +Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32( + uint32_t len, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32( + uint32_t len, + uint32_t *n, + uint32_t mu, + uint32_t *r2, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32( + uint32_t len, + uint32_t *n, + uint32_t mu, + uint32_t *r2, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res); + +uint64_t +Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64( + uint32_t len, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64( + uint32_t len, + uint64_t *n, + uint64_t mu, + uint64_t *r2, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64( + uint32_t len, + uint64_t *n, + uint64_t mu, + uint64_t *r2, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Bignum_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Chacha20.h b/security/nss/lib/freebl/verified/internal/Hacl_Chacha20.h new file mode 100644 index 0000000000..51ecfeef39 --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Chacha20.h @@ -0,0 +1,50 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Chacha20_H +#define __internal_Hacl_Chacha20_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Chacha20.h" + +extern const uint32_t Hacl_Impl_Chacha20_Vec_chacha20_constants[4U]; + +void Hacl_Impl_Chacha20_chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n, uint32_t ctr); + +void +Hacl_Impl_Chacha20_chacha20_update(uint32_t *ctx, uint32_t len, uint8_t *out, uint8_t *text); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Chacha20_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Curve25519_51.h b/security/nss/lib/freebl/verified/internal/Hacl_Curve25519_51.h new file mode 100644 index 0000000000..d7d05e89f5 --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Curve25519_51.h @@ -0,0 +1,53 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Curve25519_51_H +#define __internal_Hacl_Curve25519_51_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "internal/Hacl_Krmllib.h" +#include "../Hacl_Curve25519_51.h" + +void +Hacl_Curve25519_51_fsquare_times( + uint64_t *o, + uint64_t *inp, + FStar_UInt128_uint128 *tmp, + uint32_t n); + +void Hacl_Curve25519_51_finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tmp); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Curve25519_51_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA1.h b/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA1.h new file mode 100644 index 0000000000..02ee03247a --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA1.h @@ -0,0 +1,49 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Hash_SHA1_H +#define __internal_Hacl_Hash_SHA1_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Hash_SHA1.h" + +void Hacl_Hash_Core_SHA1_legacy_init(uint32_t *s); + +void Hacl_Hash_Core_SHA1_legacy_update(uint32_t *h, uint8_t *l); + +void Hacl_Hash_Core_SHA1_legacy_finish(uint32_t *s, uint8_t *dst); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Hash_SHA1_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA2.h b/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA2.h new file mode 100644 index 0000000000..ed9894e717 --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Hash_SHA2.h @@ -0,0 +1,65 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Hash_SHA2_H +#define __internal_Hacl_Hash_SHA2_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Hash_SHA2.h" + +void Hacl_Hash_Core_SHA2_init_224(uint32_t *s); + +void Hacl_Hash_Core_SHA2_init_256(uint32_t *s); + +void Hacl_Hash_Core_SHA2_init_384(uint64_t *s); + +void Hacl_Hash_Core_SHA2_init_512(uint64_t *s); + +void Hacl_Hash_Core_SHA2_update_384(uint64_t *hash, uint8_t *block); + +void Hacl_Hash_Core_SHA2_update_512(uint64_t *hash, uint8_t *block); + +void Hacl_Hash_Core_SHA2_pad_256(uint64_t len, uint8_t *dst); + +void Hacl_Hash_Core_SHA2_finish_224(uint32_t *s, uint8_t *dst); + +void Hacl_Hash_Core_SHA2_finish_256(uint32_t *s, uint8_t *dst); + +void Hacl_Hash_Core_SHA2_finish_384(uint64_t *s, uint8_t *dst); + +void Hacl_Hash_Core_SHA2_finish_512(uint64_t *s, uint8_t *dst); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Hash_SHA2_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Krmllib.h b/security/nss/lib/freebl/verified/internal/Hacl_Krmllib.h new file mode 100644 index 0000000000..3778437448 --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Krmllib.h @@ -0,0 +1,45 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Krmllib_H +#define __internal_Hacl_Krmllib_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Krmllib.h" + +uint32_t LowStar_Vector_new_capacity(uint32_t cap); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Krmllib_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_128.h b/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_128.h new file mode 100644 index 0000000000..d5f257302d --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_128.h @@ -0,0 +1,51 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Poly1305_128_H +#define __internal_Hacl_Poly1305_128_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Poly1305_128.h" +#include "libintvector.h" +void +Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b); + +void +Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize( + Lib_IntVector_Intrinsics_vec128 *out, + Lib_IntVector_Intrinsics_vec128 *p); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Poly1305_128_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_256.h b/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_256.h new file mode 100644 index 0000000000..9b10379237 --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Poly1305_256.h @@ -0,0 +1,51 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Poly1305_256_H +#define __internal_Hacl_Poly1305_256_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Poly1305_256.h" +#include "libintvector.h" +void +Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b); + +void +Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize( + Lib_IntVector_Intrinsics_vec256 *out, + Lib_IntVector_Intrinsics_vec256 *p); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Poly1305_256_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Hacl_Spec.h b/security/nss/lib/freebl/verified/internal/Hacl_Spec.h new file mode 100644 index 0000000000..cf5376abab --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Hacl_Spec.h @@ -0,0 +1,59 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Spec_H +#define __internal_Hacl_Spec_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Spec.h" + +#define Spec_ECDSA_NoHash 0 +#define Spec_ECDSA_Hash 1 + +typedef uint8_t Spec_ECDSA_hash_alg_ecdsa_tags; + +typedef struct Spec_ECDSA_hash_alg_ecdsa_s { + Spec_ECDSA_hash_alg_ecdsa_tags tag; + Spec_Hash_Definitions_hash_alg _0; +} Spec_ECDSA_hash_alg_ecdsa; + +#define Spec_Cipher_Expansion_Hacl_CHACHA20 0 +#define Spec_Cipher_Expansion_Vale_AES128 1 +#define Spec_Cipher_Expansion_Vale_AES256 2 + +typedef uint8_t Spec_Cipher_Expansion_impl; + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Spec_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/internal/Vale.h b/security/nss/lib/freebl/verified/internal/Vale.h new file mode 100644 index 0000000000..400650e95f --- /dev/null +++ b/security/nss/lib/freebl/verified/internal/Vale.h @@ -0,0 +1,184 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Vale_H +#define __internal_Vale_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +extern uint64_t add_scalar_e(uint64_t *x0, uint64_t *x1, uint64_t x2); + +extern uint64_t fadd_e(uint64_t *x0, uint64_t *x1, uint64_t *x2); + +extern uint64_t sha256_update(uint32_t *x0, uint8_t *x1, uint64_t x2, uint32_t *x3); + +extern uint64_t x64_poly1305(uint8_t *x0, uint8_t *x1, uint64_t x2, uint64_t x3); + +extern uint64_t check_aesni(); + +extern uint64_t check_sha(); + +extern uint64_t check_adx_bmi2(); + +extern uint64_t check_avx(); + +extern uint64_t check_avx2(); + +extern uint64_t check_movbe(); + +extern uint64_t check_sse(); + +extern uint64_t check_rdrand(); + +extern uint64_t check_avx512(); + +extern uint64_t check_osxsave(); + +extern uint64_t check_avx_xcr0(); + +extern uint64_t check_avx512_xcr0(); + +extern uint64_t +gcm128_decrypt_opt( + uint8_t *x0, + uint64_t x1, + uint64_t x2, + uint8_t *x3, + uint8_t *x4, + uint8_t *x5, + uint8_t *x6, + uint8_t *x7, + uint8_t *x8, + uint64_t x9, + uint8_t *x10, + uint8_t *x11, + uint64_t x12, + uint8_t *x13, + uint64_t x14, + uint8_t *x15, + uint8_t *x16); + +extern uint64_t +gcm256_decrypt_opt( + uint8_t *x0, + uint64_t x1, + uint64_t x2, + uint8_t *x3, + uint8_t *x4, + uint8_t *x5, + uint8_t *x6, + uint8_t *x7, + uint8_t *x8, + uint64_t x9, + uint8_t *x10, + uint8_t *x11, + uint64_t x12, + uint8_t *x13, + uint64_t x14, + uint8_t *x15, + uint8_t *x16); + +extern uint64_t aes128_key_expansion(uint8_t *x0, uint8_t *x1); + +extern uint64_t aes256_key_expansion(uint8_t *x0, uint8_t *x1); + +extern uint64_t +compute_iv_stdcall( + uint8_t *x0, + uint64_t x1, + uint64_t x2, + uint8_t *x3, + uint8_t *x4, + uint8_t *x5); + +extern uint64_t +gcm128_encrypt_opt( + uint8_t *x0, + uint64_t x1, + uint64_t x2, + uint8_t *x3, + uint8_t *x4, + uint8_t *x5, + uint8_t *x6, + uint8_t *x7, + uint8_t *x8, + uint64_t x9, + uint8_t *x10, + uint8_t *x11, + uint64_t x12, + uint8_t *x13, + uint64_t x14, + uint8_t *x15, + uint8_t *x16); + +extern uint64_t +gcm256_encrypt_opt( + uint8_t *x0, + uint64_t x1, + uint64_t x2, + uint8_t *x3, + uint8_t *x4, + uint8_t *x5, + uint8_t *x6, + uint8_t *x7, + uint8_t *x8, + uint64_t x9, + uint8_t *x10, + uint8_t *x11, + uint64_t x12, + uint8_t *x13, + uint64_t x14, + uint8_t *x15, + uint8_t *x16); + +extern uint64_t aes128_keyhash_init(uint8_t *x0, uint8_t *x1); + +extern uint64_t aes256_keyhash_init(uint8_t *x0, uint8_t *x1); + +extern uint64_t cswap2_e(uint64_t x0, uint64_t *x1, uint64_t *x2); + +extern uint64_t fsqr_e(uint64_t *x0, uint64_t *x1, uint64_t *x2); + +extern uint64_t fsqr2_e(uint64_t *x0, uint64_t *x1, uint64_t *x2); + +extern uint64_t fmul_e(uint64_t *x0, uint64_t *x1, uint64_t *x2, uint64_t *x3); + +extern uint64_t fmul2_e(uint64_t *x0, uint64_t *x1, uint64_t *x2, uint64_t *x3); + +extern uint64_t fmul_scalar_e(uint64_t *x0, uint64_t *x1, uint64_t x2); + +extern uint64_t fsub_e(uint64_t *x0, uint64_t *x1, uint64_t *x2); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Vale_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/c_endianness.h b/security/nss/lib/freebl/verified/karamel/include/krml/c_endianness.h new file mode 100644 index 0000000000..21d7e1b4f9 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/c_endianness.h @@ -0,0 +1,13 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_ENDIAN_H +#define __KRML_ENDIAN_H + +#ifdef __GNUC__ +#warning "c_endianness.h is deprecated, include lowstar_endianness.h instead" +#endif + +#include "lowstar_endianness.h" + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/fstar_int.h b/security/nss/lib/freebl/verified/karamel/include/krml/fstar_int.h new file mode 100644 index 0000000000..c7a5afb50a --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/fstar_int.h @@ -0,0 +1,89 @@ +#ifndef __FSTAR_INT_H +#define __FSTAR_INT_H + +#include "internal/types.h" + +/* + * Arithmetic Shift Right operator + * + * In all C standards, a >> b is implementation-defined when a has a signed + * type and a negative value. See e.g. 6.5.7 in + * http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2310.pdf + * + * GCC, MSVC, and Clang implement a >> b as an arithmetic shift. + * + * GCC: https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Integers-implementation.html#Integers-implementation + * MSVC: https://docs.microsoft.com/en-us/cpp/cpp/left-shift-and-right-shift-operators-input-and-output?view=vs-2019#right-shifts + * Clang: tested that Clang 7, 8 and 9 compile this to an arithmetic shift + * + * We implement arithmetic shift right simply as >> in these compilers + * and bail out in others. + */ + +#if !(defined(_MSC_VER) || defined(__GNUC__) || (defined(__clang__) && (__clang_major__ >= 7))) + +static inline int8_t +FStar_Int8_shift_arithmetic_right(int8_t a, uint32_t b) +{ + do { + KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n"); + KRML_HOST_EXIT(255); + } while (0); +} + +static inline int16_t +FStar_Int16_shift_arithmetic_right(int16_t a, uint32_t b) +{ + do { + KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n"); + KRML_HOST_EXIT(255); + } while (0); +} + +static inline int32_t +FStar_Int32_shift_arithmetic_right(int32_t a, uint32_t b) +{ + do { + KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n"); + KRML_HOST_EXIT(255); + } while (0); +} + +static inline int64_t +FStar_Int64_shift_arithmetic_right(int64_t a, uint32_t b) +{ + do { + KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n"); + KRML_HOST_EXIT(255); + } while (0); +} + +#else + +static inline int8_t +FStar_Int8_shift_arithmetic_right(int8_t a, uint32_t b) +{ + return (a >> b); +} + +static inline int16_t +FStar_Int16_shift_arithmetic_right(int16_t a, uint32_t b) +{ + return (a >> b); +} + +static inline int32_t +FStar_Int32_shift_arithmetic_right(int32_t a, uint32_t b) +{ + return (a >> b); +} + +static inline int64_t +FStar_Int64_shift_arithmetic_right(int64_t a, uint32_t b) +{ + return (a >> b); +} + +#endif /* !(defined(_MSC_VER) ... ) */ + +#endif /* __FSTAR_INT_H */ diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/builtin.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/builtin.h new file mode 100644 index 0000000000..f55e5f824e --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/builtin.h @@ -0,0 +1,16 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_BUILTIN_H +#define __KRML_BUILTIN_H + +/* For alloca, when using KaRaMeL's -falloca */ +#if (defined(_WIN32) || defined(_WIN64)) +#include <malloc.h> +#endif + +/* If some globals need to be initialized before the main, then karamel will + * generate and try to link last a function with this type: */ +void krmlinit_globals(void); + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/callconv.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/callconv.h new file mode 100644 index 0000000000..0d250c4450 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/callconv.h @@ -0,0 +1,46 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_CALLCONV_H +#define __KRML_CALLCONV_H + +/******************************************************************************/ +/* Some macros to ease compatibility */ +/******************************************************************************/ + +/* We want to generate __cdecl safely without worrying about it being undefined. + * When using MSVC, these are always defined. When using MinGW, these are + * defined too. They have no meaning for other platforms, so we define them to + * be empty macros in other situations. */ +#ifndef _MSC_VER +#ifndef __cdecl +#define __cdecl +#endif +#ifndef __stdcall +#define __stdcall +#endif +#ifndef __fastcall +#define __fastcall +#endif +#endif + +/* Since KaRaMeL emits the inline keyword unconditionally, we follow the + * guidelines at https://gcc.gnu.org/onlinedocs/gcc/Inline.html and make this + * __inline__ to ensure the code compiles with -std=c90 and earlier. */ +#ifdef __GNUC__ +#define inline __inline__ +#endif + +/* GCC-specific attribute syntax; everyone else gets the standard C inline + * attribute. */ +#ifdef __GNU_C__ +#ifndef __clang__ +#define force_inline inline __attribute__((always_inline)) +#else +#define force_inline inline +#endif +#else +#define force_inline inline +#endif + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/compat.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/compat.h new file mode 100644 index 0000000000..964d1c52aa --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/compat.h @@ -0,0 +1,32 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef KRML_COMPAT_H +#define KRML_COMPAT_H + +#include <inttypes.h> + +/* A series of macros that define C implementations of types that are not Low*, + * to facilitate porting programs to Low*. */ + +typedef struct { + uint32_t length; + const char *data; +} FStar_Bytes_bytes; + +typedef int32_t Prims_pos, Prims_nat, Prims_nonzero, Prims_int, + krml_checked_int_t; + +#define RETURN_OR(x) \ + do { \ + int64_t __ret = x; \ + if (__ret < INT32_MIN || INT32_MAX < __ret) { \ + KRML_HOST_PRINTF( \ + "Prims.{int,nat,pos} integer overflow at %s:%d\n", __FILE__, \ + __LINE__); \ + KRML_HOST_EXIT(252); \ + } \ + return (int32_t)__ret; \ + } while (0) + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/debug.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/debug.h new file mode 100644 index 0000000000..f70006bd3f --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/debug.h @@ -0,0 +1,57 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_DEBUG_H +#define __KRML_DEBUG_H + +#include <inttypes.h> + +#include "krml/internal/target.h" + +/******************************************************************************/ +/* Debugging helpers - intended only for KaRaMeL developers */ +/******************************************************************************/ + +/* In support of "-wasm -d force-c": we might need this function to be + * forward-declared, because the dependency on WasmSupport appears very late, + * after SimplifyWasm, and sadly, after the topological order has been done. */ +void WasmSupport_check_buffer_size(uint32_t s); + +/* A series of GCC atrocities to trace function calls (karamel's [-d c-calls] + * option). Useful when trying to debug, say, Wasm, to compare traces. */ +/* clang-format off */ +#ifdef __GNUC__ +#define KRML_FORMAT(X) _Generic((X), \ + uint8_t : "0x%08" PRIx8, \ + uint16_t: "0x%08" PRIx16, \ + uint32_t: "0x%08" PRIx32, \ + uint64_t: "0x%08" PRIx64, \ + int8_t : "0x%08" PRIx8, \ + int16_t : "0x%08" PRIx16, \ + int32_t : "0x%08" PRIx32, \ + int64_t : "0x%08" PRIx64, \ + default : "%s") + +#define KRML_FORMAT_ARG(X) _Generic((X), \ + uint8_t : X, \ + uint16_t: X, \ + uint32_t: X, \ + uint64_t: X, \ + int8_t : X, \ + int16_t : X, \ + int32_t : X, \ + int64_t : X, \ + default : "unknown") +/* clang-format on */ + +#define KRML_DEBUG_RETURN(X) \ + ({ \ + __auto_type _ret = (X); \ + KRML_HOST_PRINTF("returning: "); \ + KRML_HOST_PRINTF(KRML_FORMAT(_ret), KRML_FORMAT_ARG(_ret)); \ + KRML_HOST_PRINTF(" \n"); \ + _ret; \ + }) +#endif + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/target.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/target.h new file mode 100644 index 0000000000..929abe8081 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/target.h @@ -0,0 +1,333 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_TARGET_H +#define __KRML_TARGET_H + +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <inttypes.h> +#include <limits.h> + +#include "krml/internal/callconv.h" + +/******************************************************************************/ +/* Macros that KaRaMeL will generate. */ +/******************************************************************************/ + +/* For "bare" targets that do not have a C stdlib, the user might want to use + * [-add-early-include '"mydefinitions.h"'] and override these. */ +#ifndef KRML_HOST_PRINTF +#define KRML_HOST_PRINTF printf +#endif + +#if ( \ + (defined __STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + (!(defined KRML_HOST_EPRINTF))) +#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__) +#elif !(defined KRML_HOST_EPRINTF) && defined(_MSC_VER) +#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__) +#endif + +#ifndef KRML_HOST_EXIT +#define KRML_HOST_EXIT exit +#endif + +#ifndef KRML_HOST_MALLOC +#define KRML_HOST_MALLOC malloc +#endif + +#ifndef KRML_HOST_CALLOC +#define KRML_HOST_CALLOC calloc +#endif + +#ifndef KRML_HOST_FREE +#define KRML_HOST_FREE free +#endif + +#ifndef KRML_PRE_ALIGN +#ifdef _MSC_VER +#define KRML_PRE_ALIGN(X) __declspec(align(X)) +#else +#define KRML_PRE_ALIGN(X) +#endif +#endif + +#ifndef KRML_POST_ALIGN +#ifdef _MSC_VER +#define KRML_POST_ALIGN(X) +#else +#define KRML_POST_ALIGN(X) __attribute__((aligned(X))) +#endif +#endif + +#ifndef KRML_ALIGNED_MALLOC +#ifdef _MSC_VER +#define KRML_ALIGNED_MALLOC(X, Y) _aligned_malloc(Y, X) +#else +#define KRML_ALIGNED_MALLOC(X, Y) aligned_alloc(X, Y) +#endif +#endif + +#ifndef KRML_ALIGNED_FREE +#ifdef _MSC_VER +#define KRML_ALIGNED_FREE(X) _aligned_free(X) +#else +#define KRML_ALIGNED_FREE(X) free(X) +#endif +#endif + +#ifndef KRML_HOST_TIME + +#include <time.h> + +/* Prims_nat not yet in scope */ +inline static int32_t +krml_time() +{ + return (int32_t)time(NULL); +} + +#define KRML_HOST_TIME krml_time +#endif + +/* In statement position, exiting is easy. */ +#define KRML_EXIT \ + do { \ + KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \ + KRML_HOST_EXIT(254); \ + } while (0) + +/* In expression position, use the comma-operator and a malloc to return an + * expression of the right size. KaRaMeL passes t as the parameter to the macro. + */ +#define KRML_EABORT(t, msg) \ + (KRML_HOST_PRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, msg), \ + KRML_HOST_EXIT(255), *((t *)KRML_HOST_MALLOC(sizeof(t)))) + +/* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of + * *elements*. Do an ugly, run-time check (some of which KaRaMeL can eliminate). + */ + +#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4)) +#define _KRML_CHECK_SIZE_PRAGMA \ + _Pragma("GCC diagnostic ignored \"-Wtype-limits\"") +#else +#define _KRML_CHECK_SIZE_PRAGMA +#endif + +#define KRML_CHECK_SIZE(size_elt, sz) \ + do { \ + _KRML_CHECK_SIZE_PRAGMA \ + if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) { \ + KRML_HOST_PRINTF( \ + "Maximum allocatable size exceeded, aborting before overflow at " \ + "%s:%d\n", \ + __FILE__, __LINE__); \ + KRML_HOST_EXIT(253); \ + } \ + } while (0) + +#if defined(_MSC_VER) && _MSC_VER < 1900 +#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) _snprintf_s(buf, sz, _TRUNCATE, fmt, arg) +#else +#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) snprintf(buf, sz, fmt, arg) +#endif + +#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4)) +#define KRML_DEPRECATED(x) __attribute__((deprecated(x))) +#elif defined(__GNUC__) +/* deprecated attribute is not defined in GCC < 4.5. */ +#define KRML_DEPRECATED(x) +#elif defined(_MSC_VER) +#define KRML_DEPRECATED(x) __declspec(deprecated(x)) +#endif + +/* Macros for prettier unrolling of loops */ +#define KRML_LOOP1(i, n, x) \ + { \ + x \ + i += n; \ + } + +#define KRML_LOOP2(i, n, x) \ + KRML_LOOP1(i, n, x) \ + KRML_LOOP1(i, n, x) + +#define KRML_LOOP3(i, n, x) \ + KRML_LOOP2(i, n, x) \ + KRML_LOOP1(i, n, x) + +#define KRML_LOOP4(i, n, x) \ + KRML_LOOP2(i, n, x) \ + KRML_LOOP2(i, n, x) + +#define KRML_LOOP5(i, n, x) \ + KRML_LOOP4(i, n, x) \ + KRML_LOOP1(i, n, x) + +#define KRML_LOOP6(i, n, x) \ + KRML_LOOP4(i, n, x) \ + KRML_LOOP2(i, n, x) + +#define KRML_LOOP7(i, n, x) \ + KRML_LOOP4(i, n, x) \ + KRML_LOOP3(i, n, x) + +#define KRML_LOOP8(i, n, x) \ + KRML_LOOP4(i, n, x) \ + KRML_LOOP4(i, n, x) + +#define KRML_LOOP9(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP1(i, n, x) + +#define KRML_LOOP10(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP2(i, n, x) + +#define KRML_LOOP11(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP3(i, n, x) + +#define KRML_LOOP12(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP4(i, n, x) + +#define KRML_LOOP13(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP5(i, n, x) + +#define KRML_LOOP14(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP6(i, n, x) + +#define KRML_LOOP15(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP7(i, n, x) + +#define KRML_LOOP16(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP8(i, n, x) + +#define KRML_UNROLL_FOR(i, z, n, k, x) \ + do { \ + uint32_t i = z; \ + KRML_LOOP##n(i, k, x) \ + } while (0) + +#define KRML_ACTUAL_FOR(i, z, n, k, x) \ + do { \ + for (uint32_t i = z; i < n; i += k) { \ + x \ + } \ + } while (0) + +#ifndef KRML_UNROLL_MAX +#define KRML_UNROLL_MAX 16 +#endif + +/* 1 is the number of loop iterations, i.e. (n - z)/k as evaluated by krml */ +#if 0 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR0(i, z, n, k, x) +#else +#define KRML_MAYBE_FOR0(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 1 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR1(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 1, k, x) +#else +#define KRML_MAYBE_FOR1(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 2 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR2(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 2, k, x) +#else +#define KRML_MAYBE_FOR2(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 3 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR3(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 3, k, x) +#else +#define KRML_MAYBE_FOR3(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 4 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR4(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 4, k, x) +#else +#define KRML_MAYBE_FOR4(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 5 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR5(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 5, k, x) +#else +#define KRML_MAYBE_FOR5(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 6 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR6(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 6, k, x) +#else +#define KRML_MAYBE_FOR6(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 7 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR7(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 7, k, x) +#else +#define KRML_MAYBE_FOR7(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 8 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR8(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 8, k, x) +#else +#define KRML_MAYBE_FOR8(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 9 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR9(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 9, k, x) +#else +#define KRML_MAYBE_FOR9(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 10 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR10(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 10, k, x) +#else +#define KRML_MAYBE_FOR10(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 11 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR11(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 11, k, x) +#else +#define KRML_MAYBE_FOR11(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 12 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR12(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 12, k, x) +#else +#define KRML_MAYBE_FOR12(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 13 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR13(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 13, k, x) +#else +#define KRML_MAYBE_FOR13(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 14 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR14(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 14, k, x) +#else +#define KRML_MAYBE_FOR14(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 15 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR15(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 15, k, x) +#else +#define KRML_MAYBE_FOR15(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 16 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR16(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 16, k, x) +#else +#define KRML_MAYBE_FOR16(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/types.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/types.h new file mode 100644 index 0000000000..2cf1887adf --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/types.h @@ -0,0 +1,105 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef KRML_TYPES_H +#define KRML_TYPES_H + +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> + +/* Types which are either abstract, meaning that have to be implemented in C, or + * which are models, meaning that they are swapped out at compile-time for + * hand-written C types (in which case they're marked as noextract). */ + +typedef uint64_t FStar_UInt64_t, FStar_UInt64_t_; +typedef int64_t FStar_Int64_t, FStar_Int64_t_; +typedef uint32_t FStar_UInt32_t, FStar_UInt32_t_; +typedef int32_t FStar_Int32_t, FStar_Int32_t_; +typedef uint16_t FStar_UInt16_t, FStar_UInt16_t_; +typedef int16_t FStar_Int16_t, FStar_Int16_t_; +typedef uint8_t FStar_UInt8_t, FStar_UInt8_t_; +typedef int8_t FStar_Int8_t, FStar_Int8_t_; + +/* Only useful when building krmllib, because it's in the dependency graph of + * FStar.Int.Cast. */ +typedef uint64_t FStar_UInt63_t, FStar_UInt63_t_; +typedef int64_t FStar_Int63_t, FStar_Int63_t_; + +typedef double FStar_Float_float; +typedef uint32_t FStar_Char_char; +typedef FILE *FStar_IO_fd_read, *FStar_IO_fd_write; + +typedef void *FStar_Dyn_dyn; + +typedef const char *C_String_t, *C_String_t_, *C_Compat_String_t, *C_Compat_String_t_; + +typedef int exit_code; +typedef FILE *channel; + +typedef unsigned long long TestLib_cycles; + +typedef uint64_t FStar_Date_dateTime, FStar_Date_timeSpan; + +/* Now Prims.string is no longer illegal with the new model in LowStar.Printf; + * it's operations that produce Prims_string which are illegal. Bring the + * definition into scope by default. */ +typedef const char *Prims_string; + +#if (defined(_MSC_VER) && defined(_M_X64) && !defined(__clang__)) +#define IS_MSVC64 1 +#endif + +/* This code makes a number of assumptions and should be refined. In particular, + * it assumes that: any non-MSVC amd64 compiler supports int128. Maybe it would + * be easier to just test for defined(__SIZEOF_INT128__) only? */ +#if (defined(__x86_64__) || \ + defined(__x86_64) || \ + defined(__aarch64__) || \ + (defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)) || \ + defined(__s390x__) || \ + (defined(_MSC_VER) && defined(_M_X64) && defined(__clang__)) || \ + (defined(__mips__) && defined(__LP64__)) || \ + (defined(__riscv) && __riscv_xlen == 64) || \ + defined(__SIZEOF_INT128__)) +#define HAS_INT128 1 +#endif + +/* The uint128 type is a special case since we offer several implementations of + * it, depending on the compiler and whether the user wants the verified + * implementation or not. */ +#if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64) +#include <emmintrin.h> +typedef __m128i FStar_UInt128_uint128; +#elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128) +typedef unsigned __int128 FStar_UInt128_uint128; +#else +typedef struct FStar_UInt128_uint128_s { + uint64_t low; + uint64_t high; +} FStar_UInt128_uint128; +#endif + +/* The former is defined once, here (otherwise, conflicts for test-c89. The + * latter is for internal use. */ +typedef FStar_UInt128_uint128 FStar_UInt128_t, uint128_t; + +#include "krml/lowstar_endianness.h" + +#endif + +/* Avoid a circular loop: if this header is included via FStar_UInt8_16_32_64, + * then don't bring the uint128 definitions into scope. */ +#ifndef __FStar_UInt_8_16_32_64_H + +#if !defined(KRML_VERIFIED_UINT128) && defined(IS_MSVC64) +#include "fstar_uint128_msvc.h" +#elif !defined(KRML_VERIFIED_UINT128) && defined(HAS_INT128) +#include "fstar_uint128_gcc64.h" +#else +#include "FStar_UInt128_Verified.h" +#include "fstar_uint128_struct_endianness.h" +#endif + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h b/security/nss/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h new file mode 100644 index 0000000000..b44fa3f75d --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h @@ -0,0 +1,5 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +/* This file is automatically included when compiling with -wasm -d force-c */ +#define WasmSupport_check_buffer_size(X) diff --git a/security/nss/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h b/security/nss/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h new file mode 100644 index 0000000000..48e9fd5795 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h @@ -0,0 +1,242 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __LOWSTAR_ENDIANNESS_H +#define __LOWSTAR_ENDIANNESS_H + +#include <string.h> +#include <inttypes.h> + +/******************************************************************************/ +/* Implementing C.fst (part 2: endian-ness macros) */ +/******************************************************************************/ + +/* ... for Linux */ +#if defined(__linux__) || defined(__CYGWIN__) || defined(__USE_SYSTEM_ENDIAN_H__) || defined(__GLIBC__) +#include <endian.h> + +/* ... for OSX */ +#elif defined(__APPLE__) +#include <libkern/OSByteOrder.h> +#define htole64(x) OSSwapHostToLittleInt64(x) +#define le64toh(x) OSSwapLittleToHostInt64(x) +#define htobe64(x) OSSwapHostToBigInt64(x) +#define be64toh(x) OSSwapBigToHostInt64(x) + +#define htole16(x) OSSwapHostToLittleInt16(x) +#define le16toh(x) OSSwapLittleToHostInt16(x) +#define htobe16(x) OSSwapHostToBigInt16(x) +#define be16toh(x) OSSwapBigToHostInt16(x) + +#define htole32(x) OSSwapHostToLittleInt32(x) +#define le32toh(x) OSSwapLittleToHostInt32(x) +#define htobe32(x) OSSwapHostToBigInt32(x) +#define be32toh(x) OSSwapBigToHostInt32(x) + +/* ... for Solaris */ +#elif defined(__sun__) +#include <sys/byteorder.h> +#define htole64(x) LE_64(x) +#define le64toh(x) LE_64(x) +#define htobe64(x) BE_64(x) +#define be64toh(x) BE_64(x) + +#define htole16(x) LE_16(x) +#define le16toh(x) LE_16(x) +#define htobe16(x) BE_16(x) +#define be16toh(x) BE_16(x) + +#define htole32(x) LE_32(x) +#define le32toh(x) LE_32(x) +#define htobe32(x) BE_32(x) +#define be32toh(x) BE_32(x) + +/* ... for the BSDs */ +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) +#include <sys/endian.h> +#elif defined(__OpenBSD__) +#include <endian.h> + +/* ... for Windows (MSVC)... not targeting XBOX 360! */ +#elif defined(_MSC_VER) + +#include <stdlib.h> +#define htobe16(x) _byteswap_ushort(x) +#define htole16(x) (x) +#define be16toh(x) _byteswap_ushort(x) +#define le16toh(x) (x) + +#define htobe32(x) _byteswap_ulong(x) +#define htole32(x) (x) +#define be32toh(x) _byteswap_ulong(x) +#define le32toh(x) (x) + +#define htobe64(x) _byteswap_uint64(x) +#define htole64(x) (x) +#define be64toh(x) _byteswap_uint64(x) +#define le64toh(x) (x) + +/* ... for Windows (GCC-like, e.g. mingw or clang) */ +#elif (defined(_WIN32) || defined(_WIN64)) && \ + (defined(__GNUC__) || defined(__clang__)) + +#define htobe16(x) __builtin_bswap16(x) +#define htole16(x) (x) +#define be16toh(x) __builtin_bswap16(x) +#define le16toh(x) (x) + +#define htobe32(x) __builtin_bswap32(x) +#define htole32(x) (x) +#define be32toh(x) __builtin_bswap32(x) +#define le32toh(x) (x) + +#define htobe64(x) __builtin_bswap64(x) +#define htole64(x) (x) +#define be64toh(x) __builtin_bswap64(x) +#define le64toh(x) (x) + +/* ... generic big-endian fallback code */ +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* byte swapping code inspired by: + * https://github.com/rweather/arduinolibs/blob/master/libraries/Crypto/utility/EndianUtil.h + * */ + +#define htobe32(x) (x) +#define be32toh(x) (x) +#define htole32(x) \ + (__extension__({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ + })) +#define le32toh(x) (htole32((x))) + +#define htobe64(x) (x) +#define be64toh(x) (x) +#define htole64(x) \ + (__extension__({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +#define le64toh(x) (htole64((x))) + +/* ... generic little-endian fallback code */ +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + +#define htole32(x) (x) +#define le32toh(x) (x) +#define htobe32(x) \ + (__extension__({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ + })) +#define be32toh(x) (htobe32((x))) + +#define htole64(x) (x) +#define le64toh(x) (x) +#define htobe64(x) \ + (__extension__({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +#define be64toh(x) (htobe64((x))) + +/* ... couldn't determine endian-ness of the target platform */ +#else +#error "Please define __BYTE_ORDER__!" + +#endif /* defined(__linux__) || ... */ + +/* Loads and stores. These avoid undefined behavior due to unaligned memory + * accesses, via memcpy. */ + +inline static uint16_t +load16(uint8_t *b) +{ + uint16_t x; + memcpy(&x, b, 2); + return x; +} + +inline static uint32_t +load32(uint8_t *b) +{ + uint32_t x; + memcpy(&x, b, 4); + return x; +} + +inline static uint64_t +load64(uint8_t *b) +{ + uint64_t x; + memcpy(&x, b, 8); + return x; +} + +inline static void +store16(uint8_t *b, uint16_t i) +{ + memcpy(b, &i, 2); +} + +inline static void +store32(uint8_t *b, uint32_t i) +{ + memcpy(b, &i, 4); +} + +inline static void +store64(uint8_t *b, uint64_t i) +{ + memcpy(b, &i, 8); +} + +/* Legacy accessors so that this header can serve as an implementation of + * C.Endianness */ +#define load16_le(b) (le16toh(load16(b))) +#define store16_le(b, i) (store16(b, htole16(i))) +#define load16_be(b) (be16toh(load16(b))) +#define store16_be(b, i) (store16(b, htobe16(i))) + +#define load32_le(b) (le32toh(load32(b))) +#define store32_le(b, i) (store32(b, htole32(i))) +#define load32_be(b) (be32toh(load32(b))) +#define store32_be(b, i) (store32(b, htobe32(i))) + +#define load64_le(b) (le64toh(load64(b))) +#define store64_le(b, i) (store64(b, htole64(i))) +#define load64_be(b) (be64toh(load64(b))) +#define store64_be(b, i) (store64(b, htobe64(i))) + +/* Co-existence of LowStar.Endianness and FStar.Endianness generates name + * conflicts, because of course both insist on having no prefixes. Until a + * prefix is added, or until we truly retire FStar.Endianness, solve this issue + * in an elegant way. */ +#define load16_le0 load16_le +#define store16_le0 store16_le +#define load16_be0 load16_be +#define store16_be0 store16_be + +#define load32_le0 load32_le +#define store32_le0 store32_le +#define load32_be0 load32_be +#define store32_be0 store32_be + +#define load64_le0 load64_le +#define store64_le0 store64_le +#define load64_be0 load64_be +#define store64_be0 store64_be + +#define load128_le0 load128_le +#define store128_le0 store128_le +#define load128_be0 load128_be +#define store128_be0 store128_be + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/include/krmllib.h b/security/nss/lib/freebl/verified/karamel/include/krmllib.h new file mode 100644 index 0000000000..1f461f351c --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/include/krmllib.h @@ -0,0 +1,28 @@ +#ifndef __KRMLLIB_H +#define __KRMLLIB_H + +/******************************************************************************/ +/* The all-in-one krmllib.h header */ +/******************************************************************************/ + +/* This is a meta-header that is included by default in KaRaMeL generated + * programs. If you wish to have a more lightweight set of headers, or are + * targeting an environment where controlling these macros yourself is + * important, consider using: + * + * krml -minimal + * + * to disable the inclusion of this file (note: this also disables the default + * argument "-bundle FStar.*"). You can then include the headers of your choice + * one by one, using -add-early-include. */ + +#include "krml/internal/target.h" +#include "krml/internal/callconv.h" +#include "krml/internal/builtin.h" +#include "krml/internal/debug.h" +#include "krml/internal/types.h" + +#include "krml/lowstar_endianness.h" +#include "krml/fstar_int.h" + +#endif /* __KRMLLIB_H */ diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h new file mode 100644 index 0000000000..4affcee353 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h @@ -0,0 +1,75 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. +*/ + +#ifndef __FStar_UInt128_H +#define __FStar_UInt128_H + +#include <inttypes.h> +#include <stdbool.h> +#include "krml/internal/compat.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/types.h" +#include "krml/internal/target.h" +static inline FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a); + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s); + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s); + +static inline bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a); + +static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a); + +static inline FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y); + +static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y); + +#define __FStar_UInt128_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h new file mode 100644 index 0000000000..8f235c3146 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h @@ -0,0 +1,327 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. +*/ + +#ifndef __FStar_UInt128_Verified_H +#define __FStar_UInt128_Verified_H + +#include "FStar_UInt_8_16_32_64.h" +#include <inttypes.h> +#include <stdbool.h> +#include "krml/internal/types.h" +#include "krml/internal/target.h" +static inline uint64_t +FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b) +{ + return (a ^ ((a ^ b) | ((a - b) ^ b))) >> (uint32_t)63U; +} + +static inline uint64_t +FStar_UInt128_carry(uint64_t a, uint64_t b) +{ + return FStar_UInt128_constant_time_carry(a, b); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return FStar_UInt128_sub_mod_impl(a, b); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low & b.low; + lit.high = a.high & b.high; + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low ^ b.low; + lit.high = a.high ^ b.high; + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low | b.low; + lit.high = a.high | b.high; + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_lognot(FStar_UInt128_uint128 a) +{ + FStar_UInt128_uint128 lit; + lit.low = ~a.low; + lit.high = ~a.high; + return lit; +} + +static uint32_t FStar_UInt128_u32_64 = (uint32_t)64U; + +static inline uint64_t +FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s)); +} + +static inline uint64_t +FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_left(hi, lo, s); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) { + return a; + } else { + FStar_UInt128_uint128 lit; + lit.low = a.low << s; + lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s); + return lit; + } +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) +{ + FStar_UInt128_uint128 lit; + lit.low = (uint64_t)0U; + lit.high = a.low << (s - FStar_UInt128_u32_64); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s < FStar_UInt128_u32_64) { + return FStar_UInt128_shift_left_small(a, s); + } else { + return FStar_UInt128_shift_left_large(a, s); + } +} + +static inline uint64_t +FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s)); +} + +static inline uint64_t +FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_right(hi, lo, s); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) { + return a; + } else { + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s); + lit.high = a.high >> s; + return lit; + } +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) +{ + FStar_UInt128_uint128 lit; + lit.low = a.high >> (s - FStar_UInt128_u32_64); + lit.high = (uint64_t)0U; + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s < FStar_UInt128_u32_64) { + return FStar_UInt128_shift_right_small(a, s); + } else { + return FStar_UInt128_shift_right_large(a, s); + } +} + +static inline bool +FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.low == b.low && a.high == b.high; +} + +static inline bool +FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || (a.high == b.high && a.low > b.low); +} + +static inline bool +FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || (a.high == b.high && a.low < b.low); +} + +static inline bool +FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || (a.high == b.high && a.low >= b.low); +} + +static inline bool +FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || (a.high == b.high && a.low <= b.low); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + lit.high = + (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)); + return lit; +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_uint64_to_uint128(uint64_t a) +{ + FStar_UInt128_uint128 lit; + lit.low = a; + lit.high = (uint64_t)0U; + return lit; +} + +static inline uint64_t +FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a) +{ + return a.low; +} + +static inline uint64_t +FStar_UInt128_u64_mod_32(uint64_t a) +{ + return a & (uint64_t)0xffffffffU; +} + +static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U; + +static inline uint64_t +FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_mul32(uint64_t x, uint32_t y) +{ + FStar_UInt128_uint128 lit; + lit.low = + FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32) * (uint64_t)y + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)); + lit.high = + ((x >> FStar_UInt128_u32_32) * (uint64_t)y + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32; + return lit; +} + +static inline uint64_t +FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +static inline FStar_UInt128_uint128 +FStar_UInt128_mul_wide(uint64_t x, uint64_t y) +{ + FStar_UInt128_uint128 lit; + lit.low = + FStar_UInt128_u32_combine_(FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y))); + lit.high = + (x >> FStar_UInt128_u32_32) * (y >> FStar_UInt128_u32_32) + + (((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)) >> FStar_UInt128_u32_32) + + ((FStar_UInt128_u64_mod_32(x) * (y >> FStar_UInt128_u32_32) + + FStar_UInt128_u64_mod_32((x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32))) >> + FStar_UInt128_u32_32); + return lit; +} + +#define __FStar_UInt128_Verified_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h new file mode 100644 index 0000000000..51f3eead1e --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h @@ -0,0 +1,218 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. +*/ + +#ifndef __FStar_UInt_8_16_32_64_H +#define __FStar_UInt_8_16_32_64_H + +#include <inttypes.h> +#include <stdbool.h> +#include "krml/internal/compat.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/types.h" +#include "krml/internal/target.h" +extern Prims_int FStar_UInt64_n; + +extern bool FStar_UInt64_uu___is_Mk(uint64_t projectee); + +extern Prims_int FStar_UInt64___proj__Mk__item__v(uint64_t projectee); + +extern Prims_int FStar_UInt64_v(uint64_t x); + +extern uint64_t FStar_UInt64_uint_to_t(Prims_int x); + +extern uint64_t FStar_UInt64_zero; + +extern uint64_t FStar_UInt64_one; + +extern uint64_t FStar_UInt64_minus(uint64_t a); + +extern uint32_t FStar_UInt64_n_minus_one; + +static inline uint64_t +FStar_UInt64_eq_mask(uint64_t a, uint64_t b) +{ + uint64_t x = a ^ b; + uint64_t minus_x = ~x + (uint64_t)1U; + uint64_t x_or_minus_x = x | minus_x; + uint64_t xnx = x_or_minus_x >> (uint32_t)63U; + return xnx - (uint64_t)1U; +} + +static inline uint64_t +FStar_UInt64_gte_mask(uint64_t a, uint64_t b) +{ + uint64_t x = a; + uint64_t y = b; + uint64_t x_xor_y = x ^ y; + uint64_t x_sub_y = x - y; + uint64_t x_sub_y_xor_y = x_sub_y ^ y; + uint64_t q = x_xor_y | x_sub_y_xor_y; + uint64_t x_xor_q = x ^ q; + uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U; + return x_xor_q_ - (uint64_t)1U; +} + +extern Prims_string FStar_UInt64_to_string(uint64_t uu___); + +extern Prims_string FStar_UInt64_to_string_hex(uint64_t uu___); + +extern Prims_string FStar_UInt64_to_string_hex_pad(uint64_t uu___); + +extern uint64_t FStar_UInt64_of_string(Prims_string uu___); + +extern Prims_int FStar_UInt32_n; + +extern bool FStar_UInt32_uu___is_Mk(uint32_t projectee); + +extern Prims_int FStar_UInt32___proj__Mk__item__v(uint32_t projectee); + +extern Prims_int FStar_UInt32_v(uint32_t x); + +extern uint32_t FStar_UInt32_uint_to_t(Prims_int x); + +extern uint32_t FStar_UInt32_zero; + +extern uint32_t FStar_UInt32_one; + +extern uint32_t FStar_UInt32_minus(uint32_t a); + +extern uint32_t FStar_UInt32_n_minus_one; + +static inline uint32_t +FStar_UInt32_eq_mask(uint32_t a, uint32_t b) +{ + uint32_t x = a ^ b; + uint32_t minus_x = ~x + (uint32_t)1U; + uint32_t x_or_minus_x = x | minus_x; + uint32_t xnx = x_or_minus_x >> (uint32_t)31U; + return xnx - (uint32_t)1U; +} + +static inline uint32_t +FStar_UInt32_gte_mask(uint32_t a, uint32_t b) +{ + uint32_t x = a; + uint32_t y = b; + uint32_t x_xor_y = x ^ y; + uint32_t x_sub_y = x - y; + uint32_t x_sub_y_xor_y = x_sub_y ^ y; + uint32_t q = x_xor_y | x_sub_y_xor_y; + uint32_t x_xor_q = x ^ q; + uint32_t x_xor_q_ = x_xor_q >> (uint32_t)31U; + return x_xor_q_ - (uint32_t)1U; +} + +extern Prims_string FStar_UInt32_to_string(uint32_t uu___); + +extern Prims_string FStar_UInt32_to_string_hex(uint32_t uu___); + +extern Prims_string FStar_UInt32_to_string_hex_pad(uint32_t uu___); + +extern uint32_t FStar_UInt32_of_string(Prims_string uu___); + +extern Prims_int FStar_UInt16_n; + +extern bool FStar_UInt16_uu___is_Mk(uint16_t projectee); + +extern Prims_int FStar_UInt16___proj__Mk__item__v(uint16_t projectee); + +extern Prims_int FStar_UInt16_v(uint16_t x); + +extern uint16_t FStar_UInt16_uint_to_t(Prims_int x); + +extern uint16_t FStar_UInt16_zero; + +extern uint16_t FStar_UInt16_one; + +extern uint16_t FStar_UInt16_minus(uint16_t a); + +extern uint32_t FStar_UInt16_n_minus_one; + +static inline uint16_t +FStar_UInt16_eq_mask(uint16_t a, uint16_t b) +{ + uint16_t x = a ^ b; + uint16_t minus_x = ~x + (uint16_t)1U; + uint16_t x_or_minus_x = x | minus_x; + uint16_t xnx = x_or_minus_x >> (uint32_t)15U; + return xnx - (uint16_t)1U; +} + +static inline uint16_t +FStar_UInt16_gte_mask(uint16_t a, uint16_t b) +{ + uint16_t x = a; + uint16_t y = b; + uint16_t x_xor_y = x ^ y; + uint16_t x_sub_y = x - y; + uint16_t x_sub_y_xor_y = x_sub_y ^ y; + uint16_t q = x_xor_y | x_sub_y_xor_y; + uint16_t x_xor_q = x ^ q; + uint16_t x_xor_q_ = x_xor_q >> (uint32_t)15U; + return x_xor_q_ - (uint16_t)1U; +} + +extern Prims_string FStar_UInt16_to_string(uint16_t uu___); + +extern Prims_string FStar_UInt16_to_string_hex(uint16_t uu___); + +extern Prims_string FStar_UInt16_to_string_hex_pad(uint16_t uu___); + +extern uint16_t FStar_UInt16_of_string(Prims_string uu___); + +extern Prims_int FStar_UInt8_n; + +extern bool FStar_UInt8_uu___is_Mk(uint8_t projectee); + +extern Prims_int FStar_UInt8___proj__Mk__item__v(uint8_t projectee); + +extern Prims_int FStar_UInt8_v(uint8_t x); + +extern uint8_t FStar_UInt8_uint_to_t(Prims_int x); + +extern uint8_t FStar_UInt8_zero; + +extern uint8_t FStar_UInt8_one; + +extern uint8_t FStar_UInt8_minus(uint8_t a); + +extern uint32_t FStar_UInt8_n_minus_one; + +static inline uint8_t +FStar_UInt8_eq_mask(uint8_t a, uint8_t b) +{ + uint8_t x = a ^ b; + uint8_t minus_x = ~x + (uint8_t)1U; + uint8_t x_or_minus_x = x | minus_x; + uint8_t xnx = x_or_minus_x >> (uint32_t)7U; + return xnx - (uint8_t)1U; +} + +static inline uint8_t +FStar_UInt8_gte_mask(uint8_t a, uint8_t b) +{ + uint8_t x = a; + uint8_t y = b; + uint8_t x_xor_y = x ^ y; + uint8_t x_sub_y = x - y; + uint8_t x_sub_y_xor_y = x_sub_y ^ y; + uint8_t q = x_xor_y | x_sub_y_xor_y; + uint8_t x_xor_q = x ^ q; + uint8_t x_xor_q_ = x_xor_q >> (uint32_t)7U; + return x_xor_q_ - (uint8_t)1U; +} + +extern Prims_string FStar_UInt8_to_string(uint8_t uu___); + +extern Prims_string FStar_UInt8_to_string_hex(uint8_t uu___); + +extern Prims_string FStar_UInt8_to_string_hex_pad(uint8_t uu___); + +extern uint8_t FStar_UInt8_of_string(Prims_string uu___); + +typedef uint8_t FStar_UInt8_byte; + +#define __FStar_UInt_8_16_32_64_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h new file mode 100644 index 0000000000..5feb077a48 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h @@ -0,0 +1,25 @@ +/* + Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. +*/ + +#ifndef __LowStar_Endianness_H +#define __LowStar_Endianness_H + +#include "FStar_UInt128.h" +#include <inttypes.h> +#include <stdbool.h> +#include "krml/internal/compat.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/types.h" +#include "krml/internal/target.h" +static inline void store128_le(uint8_t *x0, FStar_UInt128_uint128 x1); + +static inline FStar_UInt128_uint128 load128_le(uint8_t *x0); + +static inline void store128_be(uint8_t *x0, FStar_UInt128_uint128 x1); + +static inline FStar_UInt128_uint128 load128_be(uint8_t *x0); + +#define __LowStar_Endianness_H_DEFINED +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic new file mode 100644 index 0000000000..672b58015c --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic @@ -0,0 +1,56 @@ +# A basic Makefile that KaRaMeL copies in the output directory; this is not +# guaranteed to work and will only work well for very simple projects. This +# Makefile uses: +# - the custom C files passed to your krml invocation +# - the custom C flags passed to your krml invocation +# - the -o option passed to your krml invocation + +include Makefile.include + +ifeq (,$(KRML_HOME)) + $(error please define KRML_HOME to point to the root of your KaRaMeL git checkout) +endif + +CFLAGS += -I. -I $(KRML_HOME)/include -I $(KRML_HOME)/krmllib/dist/minimal +CFLAGS += -Wall -Wextra -Werror -std=c11 -Wno-unused-variable \ + -Wno-unknown-warning-option -Wno-unused-but-set-variable -Wno-unused-function \ + -Wno-unused-parameter -Wno-infinite-recursion \ + -g -fwrapv -D_BSD_SOURCE -D_DEFAULT_SOURCE +ifeq ($(OS),Windows_NT) +CFLAGS += -D__USE_MINGW_ANSI_STDIO +else +CFLAGS += -fPIC +endif +CFLAGS += $(USER_CFLAGS) + +SOURCES += $(ALL_C_FILES) $(USER_C_FILES) +ifneq (,$(BLACKLIST)) + SOURCES := $(filter-out $(BLACKLIST),$(SOURCES)) +endif +OBJS += $(patsubst %.c,%.o,$(SOURCES)) + +all: $(USER_TARGET) + +$(USER_TARGET): $(OBJS) + +AR ?= ar + +%.a: + $(AR) cr $@ $^ + +%.exe: + $(CC) $(CFLAGS) -o $@ $^ $(KRML_HOME)/krmllib/dist/generic/libkrmllib.a + +%.so: + $(CC) $(CFLAGS) -shared -o $@ $^ + +%.d: %.c + @set -e; rm -f $@; \ + $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ + sed 's,\($(notdir $*)\)\.o[ :]*,$(dir $@)\1.o $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ + +include $(patsubst %.c,%.d,$(SOURCES)) + +clean: + rm -rf *.o *.d $(USER_TARGET) diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include new file mode 100644 index 0000000000..ad53217184 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include @@ -0,0 +1,5 @@ +USER_TARGET=libkrmllib.a +USER_CFLAGS= +USER_C_FILES=fstar_uint128.c +ALL_C_FILES= +ALL_H_FILES=FStar_UInt128.h FStar_UInt_8_16_32_64.h LowStar_Endianness.h diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h new file mode 100644 index 0000000000..33cff6b6d4 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h @@ -0,0 +1,225 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +/******************************************************************************/ +/* Machine integers (128-bit arithmetic) */ +/******************************************************************************/ + +/* This header contains two things. + * + * First, an implementation of 128-bit arithmetic suitable for 64-bit GCC and + * Clang, i.e. all the operations from FStar.UInt128. + * + * Second, 128-bit operations from C.Endianness (or LowStar.Endianness), + * suitable for any compiler and platform (via a series of ifdefs). This second + * part is unfortunate, and should be fixed by moving {load,store}128_{be,le} to + * FStar.UInt128 to avoid a maze of preprocessor guards and hand-written code. + * */ + +/* This file is used for both the minimal and generic krmllib distributions. As + * such, it assumes that the machine integers have been bundled the exact same + * way in both cases. */ + +#ifndef FSTAR_UINT128_GCC64 +#define FSTAR_UINT128_GCC64 + +#include "FStar_UInt128.h" +#include "FStar_UInt_8_16_32_64.h" +#include "LowStar_Endianness.h" + +/* GCC + using native unsigned __int128 support */ + +inline static uint128_t +load128_le(uint8_t *b) +{ + uint128_t l = (uint128_t)load64_le(b); + uint128_t h = (uint128_t)load64_le(b + 8); + return (h << 64 | l); +} + +inline static void +store128_le(uint8_t *b, uint128_t n) +{ + store64_le(b, (uint64_t)n); + store64_le(b + 8, (uint64_t)(n >> 64)); +} + +inline static uint128_t +load128_be(uint8_t *b) +{ + uint128_t h = (uint128_t)load64_be(b); + uint128_t l = (uint128_t)load64_be(b + 8); + return (h << 64 | l); +} + +inline static void +store128_be(uint8_t *b, uint128_t n) +{ + store64_be(b, (uint64_t)(n >> 64)); + store64_be(b + 8, (uint64_t)n); +} + +inline static uint128_t +FStar_UInt128_add(uint128_t x, uint128_t y) +{ + return x + y; +} + +inline static uint128_t +FStar_UInt128_mul(uint128_t x, uint128_t y) +{ + return x * y; +} + +inline static uint128_t +FStar_UInt128_add_mod(uint128_t x, uint128_t y) +{ + return x + y; +} + +inline static uint128_t +FStar_UInt128_sub(uint128_t x, uint128_t y) +{ + return x - y; +} + +inline static uint128_t +FStar_UInt128_sub_mod(uint128_t x, uint128_t y) +{ + return x - y; +} + +inline static uint128_t +FStar_UInt128_logand(uint128_t x, uint128_t y) +{ + return x & y; +} + +inline static uint128_t +FStar_UInt128_logor(uint128_t x, uint128_t y) +{ + return x | y; +} + +inline static uint128_t +FStar_UInt128_logxor(uint128_t x, uint128_t y) +{ + return x ^ y; +} + +inline static uint128_t +FStar_UInt128_lognot(uint128_t x) +{ + return ~x; +} + +inline static uint128_t +FStar_UInt128_shift_left(uint128_t x, uint32_t y) +{ + return x << y; +} + +inline static uint128_t +FStar_UInt128_shift_right(uint128_t x, uint32_t y) +{ + return x >> y; +} + +inline static uint128_t +FStar_UInt128_uint64_to_uint128(uint64_t x) +{ + return (uint128_t)x; +} + +inline static uint64_t +FStar_UInt128_uint128_to_uint64(uint128_t x) +{ + return (uint64_t)x; +} + +inline static uint128_t +FStar_UInt128_mul_wide(uint64_t x, uint64_t y) +{ + return ((uint128_t)x) * y; +} + +inline static uint128_t +FStar_UInt128_eq_mask(uint128_t x, uint128_t y) +{ + uint64_t mask = + FStar_UInt64_eq_mask((uint64_t)(x >> 64), (uint64_t)(y >> 64)) & + FStar_UInt64_eq_mask(x, y); + return ((uint128_t)mask) << 64 | mask; +} + +inline static uint128_t +FStar_UInt128_gte_mask(uint128_t x, uint128_t y) +{ + uint64_t mask = + (FStar_UInt64_gte_mask(x >> 64, y >> 64) & + ~(FStar_UInt64_eq_mask(x >> 64, y >> 64))) | + (FStar_UInt64_eq_mask(x >> 64, y >> 64) & FStar_UInt64_gte_mask(x, y)); + return ((uint128_t)mask) << 64 | mask; +} + +inline static uint64_t +FStar_UInt128___proj__Mkuint128__item__low(uint128_t x) +{ + return (uint64_t)x; +} + +inline static uint64_t +FStar_UInt128___proj__Mkuint128__item__high(uint128_t x) +{ + return (uint64_t)(x >> 64); +} + +inline static uint128_t +FStar_UInt128_add_underspec(uint128_t x, uint128_t y) +{ + return x + y; +} + +inline static uint128_t +FStar_UInt128_sub_underspec(uint128_t x, uint128_t y) +{ + return x - y; +} + +inline static bool +FStar_UInt128_eq(uint128_t x, uint128_t y) +{ + return x == y; +} + +inline static bool +FStar_UInt128_gt(uint128_t x, uint128_t y) +{ + return x > y; +} + +inline static bool +FStar_UInt128_lt(uint128_t x, uint128_t y) +{ + return x < y; +} + +inline static bool +FStar_UInt128_gte(uint128_t x, uint128_t y) +{ + return x >= y; +} + +inline static bool +FStar_UInt128_lte(uint128_t x, uint128_t y) +{ + return x <= y; +} + +inline static uint128_t +FStar_UInt128_mul32(uint64_t x, uint32_t y) +{ + return (uint128_t)x * (uint128_t)y; +} + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h new file mode 100644 index 0000000000..e9b366e259 --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h @@ -0,0 +1,571 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +/* This file was generated by KaRaMeL <https://github.com/FStarLang/karamel> + * then hand-edited to use MSVC intrinsics KaRaMeL invocation: + * C:\users\barrybo\mitls2c\karamel\_build\src\Karamel.native -minimal -fnouint128 C:/users/barrybo/mitls2c/FStar/ulib/FStar.UInt128.fst -tmpdir ../secure_api/out/runtime_switch/uint128 -skip-compilation -add-include "krmllib0.h" -drop FStar.Int.Cast.Full -bundle FStar.UInt128=FStar.*,Prims + * F* version: 15104ff8 + * KaRaMeL version: 318b7fa8 + */ + +#ifndef FSTAR_UINT128_MSVC +#define FSTAR_UINT128_MSVC + +#include "krml/internal/types.h" +#include "FStar_UInt128.h" +#include "FStar_UInt_8_16_32_64.h" + +#ifndef _MSC_VER +#error This file only works with the MSVC compiler +#endif + +/* JP: need to rip out HAS_OPTIMIZED since the header guards in types.h are now + * done properly and only include this file when we know for sure we are on + * 64-bit MSVC. */ + +#if defined(_M_X64) && !defined(KRML_VERIFIED_UINT128) +#define HAS_OPTIMIZED 1 +#else +#define HAS_OPTIMIZED 0 +#endif + +// Define .low and .high in terms of the __m128i fields, to reduce +// the amount of churn in this file. +#if HAS_OPTIMIZED +#include <intrin.h> +#include <immintrin.h> +#define low m128i_u64[0] +#define high m128i_u64[1] +#endif + +inline static FStar_UInt128_uint128 +load128_le(uint8_t *b) +{ +#if HAS_OPTIMIZED + return _mm_loadu_si128((__m128i *)b); +#else + FStar_UInt128_uint128 lit; + lit.low = load64_le(b); + lit.high = load64_le(b + 8); + return lit; +#endif +} + +inline static void +store128_le(uint8_t *b, FStar_UInt128_uint128 n) +{ + store64_le(b, n.low); + store64_le(b + 8, n.high); +} + +inline static FStar_UInt128_uint128 +load128_be(uint8_t *b) +{ + uint64_t l = load64_be(b + 8); + uint64_t h = load64_be(b); +#if HAS_OPTIMIZED + return _mm_set_epi64x(h, l); +#else + FStar_UInt128_uint128 lit; + lit.low = l; + lit.high = h; + return lit; +#endif +} + +inline static void +store128_be(uint8_t *b, uint128_t n) +{ + store64_be(b, n.high); + store64_be(b + 8, n.low); +} + +inline static uint64_t +FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b) +{ + return (a ^ (a ^ b | a - b ^ b)) >> (uint32_t)63U; +} + +inline static uint64_t +FStar_UInt128_carry(uint64_t a, uint64_t b) +{ + return FStar_UInt128_constant_time_carry(a, b); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + uint64_t l, h; + + unsigned char carry = + _addcarry_u64(0, a.low, b.low, &l); // low/CF = a.low+b.low+0 + _addcarry_u64(carry, a.high, b.high, &h); // high = a.high+b.high+CF + return _mm_set_epi64x(h, l); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return FStar_UInt128_add(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low; + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return FStar_UInt128_add(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + uint64_t l, h; + + unsigned char borrow = _subborrow_u64(0, a.low, b.low, &l); + _subborrow_u64(borrow, a.high, b.high, &h); + return _mm_set_epi64x(h, l); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return FStar_UInt128_sub(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return FStar_UInt128_sub(a, b); +#else + return FStar_UInt128_sub_mod_impl(a, b); +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return _mm_and_si128(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low & b.low; + lit.high = a.high & b.high; + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return _mm_xor_si128(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low ^ b.low; + lit.high = a.high ^ b.high; + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + return _mm_or_si128(a, b); +#else + FStar_UInt128_uint128 lit; + lit.low = a.low | b.low; + lit.high = a.high | b.high; + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_lognot(FStar_UInt128_uint128 a) +{ +#if HAS_OPTIMIZED + return _mm_andnot_si128(a, a); +#else + FStar_UInt128_uint128 lit; + lit.low = ~a.low; + lit.high = ~a.high; + return lit; +#endif +} + +static const uint32_t FStar_UInt128_u32_64 = (uint32_t)64U; + +inline static uint64_t +FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (hi << s) + (lo >> FStar_UInt128_u32_64 - s); +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_left(hi, lo, s); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) + return a; + else { + FStar_UInt128_uint128 lit; + lit.low = a.low << s; + lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s); + return lit; + } +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) +{ + FStar_UInt128_uint128 lit; + lit.low = (uint64_t)0U; + lit.high = a.low << s - FStar_UInt128_u32_64; + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s) +{ +#if HAS_OPTIMIZED + if (s == 0) { + return a; + } else if (s < FStar_UInt128_u32_64) { + uint64_t l = a.low << s; + uint64_t h = __shiftleft128(a.low, a.high, (unsigned char)s); + return _mm_set_epi64x(h, l); + } else { + return _mm_set_epi64x(a.low << (s - FStar_UInt128_u32_64), 0); + } +#else + if (s < FStar_UInt128_u32_64) + return FStar_UInt128_shift_left_small(a, s); + else + return FStar_UInt128_shift_left_large(a, s); +#endif +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (lo >> s) + (hi << FStar_UInt128_u32_64 - s); +} + +inline static uint64_t +FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_right(hi, lo, s); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) + return a; + else { + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s); + lit.high = a.high >> s; + return lit; + } +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) +{ + FStar_UInt128_uint128 lit; + lit.low = a.high >> s - FStar_UInt128_u32_64; + lit.high = (uint64_t)0U; + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s) +{ +#if HAS_OPTIMIZED + if (s == 0) { + return a; + } else if (s < FStar_UInt128_u32_64) { + uint64_t l = __shiftright128(a.low, a.high, (unsigned char)s); + uint64_t h = a.high >> s; + return _mm_set_epi64x(h, l); + } else { + return _mm_set_epi64x(0, a.high >> (s - FStar_UInt128_u32_64)); + } +#else + if (s < FStar_UInt128_u32_64) + return FStar_UInt128_shift_right_small(a, s); + else + return FStar_UInt128_shift_right_large(a, s); +#endif +} + +inline static bool +FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.low == b.low && a.high == b.high; +} + +inline static bool +FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || a.high == b.high && a.low > b.low; +} + +inline static bool +FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || a.high == b.high && a.low < b.low; +} + +inline static bool +FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high > b.high || a.high == b.high && a.low >= b.low; +} + +inline static bool +FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return a.high < b.high || a.high == b.high && a.low <= b.low; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED + // PCMPW to produce 4 32-bit values, all either 0x0 or 0xffffffff + __m128i r32 = _mm_cmpeq_epi32(a, b); + // Shuffle 3,2,1,0 into 2,3,0,1 (swapping dwords inside each half) + __m128i s32 = _mm_shuffle_epi32(r32, _MM_SHUFFLE(2, 3, 0, 1)); + // Bitwise and to compute (3&2),(2&3),(1&0),(0&1) + __m128i ret64 = _mm_and_si128(r32, s32); + // Swap the two 64-bit values to form s64 + __m128i s64 = + _mm_shuffle_epi32(ret64, _MM_SHUFFLE(1, 0, 3, 2)); // 3,2,1,0 -> 1,0,3,2 + // And them together + return _mm_and_si128(ret64, s64); +#else + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ +#if HAS_OPTIMIZED && 0 + // ge - compare 3,2,1,0 for >= and generating 0 or 0xffffffff for each + // eq - compare 3,2,1,0 for == and generating 0 or 0xffffffff for each + // slot 0 = ge0 | (eq0 & ge1) | (eq0 & eq1 & ge2) | (eq0 & eq1 & eq2 & ge3) + // then splat slot 0 to 3,2,1,0 + __m128i gt = _mm_cmpgt_epi32(a, b); + __m128i eq = _mm_cmpeq_epi32(a, b); + __m128i ge = _mm_or_si128(gt, eq); + __m128i ge0 = ge; + __m128i eq0 = eq; + __m128i ge1 = _mm_srli_si128(ge, 4); // shift ge from 3,2,1,0 to 0x0,3,2,1 + __m128i t1 = _mm_and_si128(eq0, ge1); + __m128i ret = _mm_or_si128(ge, t1); // ge0 | (eq0 & ge1) is now in 0 + __m128i eq1 = _mm_srli_si128(eq, 4); // shift eq from 3,2,1,0 to 0x0,3,2,1 + __m128i ge2 = + _mm_srli_si128(ge1, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,3,2 + __m128i t2 = + _mm_and_si128(eq0, _mm_and_si128(eq1, ge2)); // t2 = (eq0 & eq1 & ge2) + ret = _mm_or_si128(ret, t2); + __m128i eq2 = _mm_srli_si128(eq1, 4); // shift eq from 3,2,1,0 to 0x0,00,00,3 + __m128i ge3 = + _mm_srli_si128(ge2, 4); // shift original ge from 3,2,1,0 to 0x0,0x0,0x0,3 + __m128i t3 = _mm_and_si128( + eq0, _mm_and_si128( + eq1, _mm_and_si128(eq2, ge3))); // t3 = (eq0 & eq1 & eq2 & ge3) + ret = _mm_or_si128(ret, t3); + return _mm_shuffle_epi32( + ret, + _MM_SHUFFLE(0, 0, 0, 0)); // the result is in 0. Shuffle into all dwords. +#else + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt64_gte_mask(a.high, b.high) & + ~FStar_UInt64_eq_mask(a.high, b.high) | + FStar_UInt64_eq_mask(a.high, b.high) & + FStar_UInt64_gte_mask(a.low, b.low); + lit.high = FStar_UInt64_gte_mask(a.high, b.high) & + ~FStar_UInt64_eq_mask(a.high, b.high) | + FStar_UInt64_eq_mask(a.high, b.high) & + FStar_UInt64_gte_mask(a.low, b.low); + return lit; +#endif +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_uint64_to_uint128(uint64_t a) +{ +#if HAS_OPTIMIZED + return _mm_set_epi64x(0, a); +#else + FStar_UInt128_uint128 lit; + lit.low = a; + lit.high = (uint64_t)0U; + return lit; +#endif +} + +inline static uint64_t +FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a) +{ + return a.low; +} + +inline static uint64_t +FStar_UInt128_u64_mod_32(uint64_t a) +{ + return a & (uint64_t)0xffffffffU; +} + +static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U; + +inline static uint64_t +FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_mul32(uint64_t x, uint32_t y) +{ +#if HAS_OPTIMIZED + uint64_t l, h; + l = _umul128(x, (uint64_t)y, &h); + return _mm_set_epi64x(h, l); +#else + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_u32_combine( + (x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> + FStar_UInt128_u32_32), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)); + lit.high = (x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> + FStar_UInt128_u32_32) >> + FStar_UInt128_u32_32; + return lit; +#endif +} + +/* Note: static headers bring scope collision issues when they define types! + * Because now client (karamel-generated) code will include this header and + * there might be type collisions if the client code uses quadruples of uint64s. + * So, we cannot use the karamel-generated name. */ +typedef struct K_quad_s { + uint64_t fst; + uint64_t snd; + uint64_t thd; + uint64_t f3; +} K_quad; + +inline static K_quad +FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y) +{ + K_quad tmp; + tmp.fst = FStar_UInt128_u64_mod_32(x); + tmp.snd = FStar_UInt128_u64_mod_32( + FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)); + tmp.thd = x >> FStar_UInt128_u32_32; + tmp.f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> + FStar_UInt128_u32_32); + return tmp; +} + +static uint64_t +FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y) +{ + K_quad scrut = + FStar_UInt128_mul_wide_impl_t_(x, y); + uint64_t u1 = scrut.fst; + uint64_t w3 = scrut.snd; + uint64_t x_ = scrut.thd; + uint64_t t_ = scrut.f3; + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_u32_combine_( + u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), w3); + lit.high = + x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) + + (u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_) >> + FStar_UInt128_u32_32); + return lit; +} + +inline static FStar_UInt128_uint128 +FStar_UInt128_mul_wide(uint64_t x, uint64_t y) +{ +#if HAS_OPTIMIZED + uint64_t l, h; + l = _umul128(x, y, &h); + return _mm_set_epi64x(h, l); +#else + return FStar_UInt128_mul_wide_impl(x, y); +#endif +} + +#undef low +#undef high + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h new file mode 100644 index 0000000000..61fe85c49e --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h @@ -0,0 +1,84 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef FSTAR_UINT128_STRUCT_ENDIANNESS_H +#define FSTAR_UINT128_STRUCT_ENDIANNESS_H + +/* Hand-written implementation of endianness-related uint128 functions + * for the extracted uint128 implementation */ + +/* Access 64-bit fields within the int128. */ +#define HIGH64_OF(x) ((x)->high) +#define LOW64_OF(x) ((x)->low) + +/* A series of definitions written using pointers. */ + +inline static void +load128_le_(uint8_t *b, uint128_t *r) +{ + LOW64_OF(r) = load64_le(b); + HIGH64_OF(r) = load64_le(b + 8); +} + +inline static void +store128_le_(uint8_t *b, uint128_t *n) +{ + store64_le(b, LOW64_OF(n)); + store64_le(b + 8, HIGH64_OF(n)); +} + +inline static void +load128_be_(uint8_t *b, uint128_t *r) +{ + HIGH64_OF(r) = load64_be(b); + LOW64_OF(r) = load64_be(b + 8); +} + +inline static void +store128_be_(uint8_t *b, uint128_t *n) +{ + store64_be(b, HIGH64_OF(n)); + store64_be(b + 8, LOW64_OF(n)); +} + +#ifndef KRML_NOSTRUCT_PASSING + +inline static uint128_t +load128_le(uint8_t *b) +{ + uint128_t r; + load128_le_(b, &r); + return r; +} + +inline static void +store128_le(uint8_t *b, uint128_t n) +{ + store128_le_(b, &n); +} + +inline static uint128_t +load128_be(uint8_t *b) +{ + uint128_t r; + load128_be_(b, &r); + return r; +} + +inline static void +store128_be(uint8_t *b, uint128_t n) +{ + store128_be_(b, &n); +} + +#else /* !defined(KRML_STRUCT_PASSING) */ + +#define print128 print128_ +#define load128_le load128_le_ +#define store128_le store128_le_ +#define load128_be load128_be_ +#define store128_be store128_be_ + +#endif /* KRML_STRUCT_PASSING */ + +#endif diff --git a/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def new file mode 100644 index 0000000000..c4ab8e38ed --- /dev/null +++ b/security/nss/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def @@ -0,0 +1,11 @@ +LIBRARY libkrmllib + +EXPORTS + FStar_UInt64_eq_mask + FStar_UInt64_gte_mask + FStar_UInt32_eq_mask + FStar_UInt32_gte_mask + FStar_UInt16_eq_mask + FStar_UInt16_gte_mask + FStar_UInt8_eq_mask + FStar_UInt8_gte_mask diff --git a/security/nss/lib/freebl/verified/libintvector.h b/security/nss/lib/freebl/verified/libintvector.h new file mode 100644 index 0000000000..fab6a35d18 --- /dev/null +++ b/security/nss/lib/freebl/verified/libintvector.h @@ -0,0 +1,915 @@ +#ifndef __Vec_Intrin_H +#define __Vec_Intrin_H + +#include <sys/types.h> + +/* We include config.h here to ensure that the various feature-flags are + * properly brought into scope. Users can either run the configure script, or + * write a config.h themselves and put it under version control. */ +#if defined(__has_include) +#if __has_include("config.h") +#include "config.h" +#endif +#endif + +/* # DEBUGGING: + * ============ + * It is possible to debug the current definitions by using libintvector_debug.h + * See the include at the bottom of the file. */ + +#define Lib_IntVector_Intrinsics_bit_mask64(x) -((x)&1) + +#if defined(__x86_64__) || defined(_M_X64) + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include <emmintrin.h> +#include <tmmintrin.h> +#include <smmintrin.h> + +typedef __m128i Lib_IntVector_Intrinsics_vec128; + +#define Lib_IntVector_Intrinsics_ni_aes_enc(x0, x1) \ + (_mm_aesenc_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_aes_enc_last(x0, x1) \ + (_mm_aesenclast_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_aes_keygen_assist(x0, x1) \ + (_mm_aeskeygenassist_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_ni_clmul(x0, x1, x2) \ + (_mm_clmulepi64_si128(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + (_mm_xor_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + (_mm_cmpeq_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + (_mm_cmpeq_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + (_mm_cmpgt_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + (_mm_cmpgt_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + (_mm_or_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + (_mm_and_si128(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + (_mm_xor_si128(x0, _mm_set1_epi32(-1))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \ + (_mm_slli_si128(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \ + (_mm_srli_si128(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (_mm_slli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (_mm_srli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \ + (_mm_slli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (_mm_srli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) \ + (_mm_shuffle_epi8(x0, _mm_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + (((x1) == 8 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_8(x0) : ((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : ((x1) == 24 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_24(x0) : _mm_xor_si128(_mm_slli_epi32(x0, x1), _mm_srli_epi32(x0, 32 - (x1))))))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, 32 - (x1))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x4, x3, x2, x1))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2 * x1 + 1, 2 * x1, 2 * x2 + 1, 2 * x2))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE((2 * x1 + 3) % 4, (2 * x1 + 2) % 4, (2 * x1 + 1) % 4, (2 * x1) % 4))) + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \ + (_mm_loadu_si128((__m128i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \ + (_mm_loadu_si128((__m128i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))) + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))) + +#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \ + (_mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(x0)), _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))) + +#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))) + +#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))) + +#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))) + +#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \ + (_mm_insert_epi8(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + (_mm_insert_epi32(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + (_mm_insert_epi64(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \ + (_mm_extract_epi8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + (_mm_extract_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + (_mm_extract_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + (_mm_setzero_si128()) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + (_mm_add_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + (_mm_sub_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + (_mm_mul_epu32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + (_mm_mul_epu32(x0, _mm_set1_epi64x(x1))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + (_mm_add_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \ + (_mm_sub_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \ + (_mm_mullo_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \ + (_mm_mullo_epi32(x0, _mm_set1_epi32(x1))) + +#define Lib_IntVector_Intrinsics_vec128_load128(x) \ + ((__m128i)x) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + (_mm_set1_epi64x(x)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load64s(x0, x1) \ + (_mm_set_epi64x(x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + (_mm_set1_epi32(x)) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + (_mm_set_epi32(x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \ + (_mm_unpacklo_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \ + (_mm_unpackhi_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \ + (_mm_unpacklo_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ + (_mm_unpackhi_epi64(x1, x2)) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#if defined(HACL_CAN_COMPILE_VEC256) + +#include <immintrin.h> +#include <wmmintrin.h> + +typedef __m256i Lib_IntVector_Intrinsics_vec256; + +#define Lib_IntVector_Intrinsics_vec256_eq64(x0, x1) \ + (_mm256_cmpeq_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_eq32(x0, x1) \ + (_mm256_cmpeq_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_gt64(x0, x1) \ + (_mm256_cmpgt_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_gt32(x0, x1) \ + (_mm256_cmpgt_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_xor(x0, x1) \ + (_mm256_xor_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_or(x0, x1) \ + (_mm256_or_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_and(x0, x1) \ + (_mm256_and_si256(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_lognot(x0) \ + (_mm256_xor_si256(x0, _mm256_set1_epi32(-1))) + +#define Lib_IntVector_Intrinsics_vec256_shift_left(x0, x1) \ + (_mm256_slli_si256(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right(x0, x1) \ + (_mm256_srli_si256(x0, (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec256_shift_left64(x0, x1) \ + (_mm256_slli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right64(x0, x1) \ + (_mm256_srli_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_left32(x0, x1) \ + (_mm256_slli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_shift_right32(x0, x1) \ + (_mm256_srli_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3, 14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2, 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1, 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, x1) \ + ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_left32_24(x0) : _mm256_or_si256(_mm256_slli_epi32(x0, x1), _mm256_srli_epi32(x0, 32 - (x1))))))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec256_rotate_left32(x0, 32 - (x1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1, 8, 15, 14, 13, 12, 11, 10, 9, 0, 7, 6, 5, 4, 3, 2, 1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2, 9, 8, 15, 14, 13, 12, 11, 10, 1, 0, 7, 6, 5, 4, 3, 2))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3, 10, 9, 8, 15, 14, 13, 12, 11, 2, 1, 0, 7, 6, 5, 4, 3))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 3, 2, 1, 0, 7, 6, 5, 4))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5, 12, 11, 10, 9, 8, 15, 14, 13, 4, 3, 2, 1, 0, 7, 6, 5))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6, 13, 12, 11, 10, 9, 8, 15, 14, 5, 4, 3, 2, 1, 0, 7, 6))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) \ + (_mm256_shuffle_epi8(x0, _mm256_set_epi8(14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7, 14, 13, 12, 11, 10, 9, 8, 15, 6, 5, 4, 3, 2, 1, 0, 7))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, x1) \ + ((x1 == 8 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_8(x0) : (x1 == 16 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_16(x0) : (x1 == 24 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_24(x0) : (x1 == 32 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_32(x0) : (x1 == 40 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_40(x0) : (x1 == 48 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_48(x0) : (x1 == 56 ? Lib_IntVector_Intrinsics_vec256_rotate_right64_56(x0) : _mm256_xor_si256(_mm256_srli_epi64((x0), (x1)), _mm256_slli_epi64((x0), (64 - (x1)))))))))))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_left64(x0, x1) \ + (Lib_IntVector_Intrinsics_vec256_rotate_right64(x0, 64 - (x1))) + +#define Lib_IntVector_Intrinsics_vec256_shuffle64(x0, x1, x2, x3, x4) \ + (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE(x4, x3, x2, x1))) + +#define Lib_IntVector_Intrinsics_vec256_shuffle32(x0, x1, x2, x3, x4, x5, x6, x7, x8) \ + (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32(x8, x7, x6, x5, x4, x3, x2, x1))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes32(x0, x1) \ + (_mm256_permutevar8x32_epi32(x0, _mm256_set_epi32((x1 + 7) % 8, (x1 + 6) % 8, (x1 + 5) % 8, (x1 + 4) % 8, (x1 + 3 % 8), (x1 + 2) % 8, (x1 + 1) % 8, x1 % 8))) + +#define Lib_IntVector_Intrinsics_vec256_rotate_right_lanes64(x0, x1) \ + (_mm256_permute4x64_epi64(x0, _MM_SHUFFLE((x1 + 3) % 4, (x1 + 2) % 4, (x1 + 1) % 4, x1 % 4))) + +#define Lib_IntVector_Intrinsics_vec256_load32_le(x0) \ + (_mm256_loadu_si256((__m256i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec256_load64_le(x0) \ + (_mm256_loadu_si256((__m256i*)(x0))) + +#define Lib_IntVector_Intrinsics_vec256_load32_be(x0) \ + (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3))) + +#define Lib_IntVector_Intrinsics_vec256_load64_be(x0) \ + (_mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)(x0)), _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7))) + +#define Lib_IntVector_Intrinsics_vec256_store32_le(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec256_store64_le(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec256_store32_be(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3)))) + +#define Lib_IntVector_Intrinsics_vec256_store64_be(x0, x1) \ + (_mm256_storeu_si256((__m256i*)(x0), _mm256_shuffle_epi8(x1, _mm256_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7)))) + +#define Lib_IntVector_Intrinsics_vec256_insert8(x0, x1, x2) \ + (_mm256_insert_epi8(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_insert32(x0, x1, x2) \ + (_mm256_insert_epi32(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_insert64(x0, x1, x2) \ + (_mm256_insert_epi64(x0, x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_extract8(x0, x1) \ + (_mm256_extract_epi8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_extract32(x0, x1) \ + (_mm256_extract_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_extract64(x0, x1) \ + (_mm256_extract_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_zero \ + (_mm256_setzero_si256()) + +#define Lib_IntVector_Intrinsics_vec256_add64(x0, x1) \ + (_mm256_add_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_sub64(x0, x1) \ + (_mm256_sub_epi64(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_mul64(x0, x1) \ + (_mm256_mul_epu32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_smul64(x0, x1) \ + (_mm256_mul_epu32(x0, _mm256_set1_epi64x(x1))) + +#define Lib_IntVector_Intrinsics_vec256_add32(x0, x1) \ + (_mm256_add_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_sub32(x0, x1) \ + (_mm256_sub_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_mul32(x0, x1) \ + (_mm256_mullo_epi32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec256_smul32(x0, x1) \ + (_mm256_mullo_epi32(x0, _mm256_set1_epi32(x1))) + +#define Lib_IntVector_Intrinsics_vec256_load64(x1) \ + (_mm256_set1_epi64x(x1)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load64s(x0, x1, x2, x3) \ + (_mm256_set_epi64x(x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load32(x) \ + (_mm256_set1_epi32(x)) + +#define Lib_IntVector_Intrinsics_vec256_load32s(x0, x1, x2, x3, x4, x5, x6, x7) \ + (_mm256_set_epi32(x7, x6, x5, x4, x3, x2, x1, x0)) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec256_load128(x) \ + (_mm256_set_m128i((__m128i)x)) + +#define Lib_IntVector_Intrinsics_vec256_load128s(x0, x1) \ + (_mm256_set_m128i((__m128i)x1, (__m128i)x0)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low32(x1, x2) \ + (_mm256_unpacklo_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high32(x1, x2) \ + (_mm256_unpackhi_epi32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low64(x1, x2) \ + (_mm256_unpacklo_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high64(x1, x2) \ + (_mm256_unpackhi_epi64(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_low128(x1, x2) \ + (_mm256_permute2x128_si256(x1, x2, 0x20)) + +#define Lib_IntVector_Intrinsics_vec256_interleave_high128(x1, x2) \ + (_mm256_permute2x128_si256(x1, x2, 0x31)) + +#endif /* HACL_CAN_COMPILE_VEC256 */ + +#elif (defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && !defined(__ARM_32BIT_STATE) + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include <arm_neon.h> + +typedef uint32x4_t Lib_IntVector_Intrinsics_vec128; + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + (veorq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + (vceqq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + (vceqq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + (vcgtq_u32(x0, x1)) + +#define high32(x0) \ + (vmovn_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), 32))) + +#define low32(x0) \ + (vmovn_u64(vreinterpretq_u64_u32(x0))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + (vreinterpretq_u32_u64(vmovl_u32(vorr_u32(vcgt_u32(high32(x0), high32(x1)), vand_u32(vceq_u32(high32(x0), high32(x1)), vcgt_u32(low32(x0), low32(x1))))))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + (vorrq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + (vandq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + (vmvnq_u32(x0)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left(x0, x1) \ + (vextq_u32(x0, vdupq_n_u8(0), 16 - (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right(x0, x1) \ + (vextq_u32(x0, vdupq_n_u8(0), (x1) / 8)) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (vreinterpretq_u32_u64(vshlq_n_u64(vreinterpretq_u64_u32(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (vreinterpretq_u32_u64(vshrq_n_u64(vreinterpretq_u64_u32(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left32(x0, x1) \ + (vshlq_n_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (vshrq_n_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x1) \ + (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_left32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), (x1)), (x0), 32 - (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x1) \ + (vreinterpretq_u32_u16(vrev32q_u16(vreinterpretq_u16_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (((x1) == 16 ? Lib_IntVector_Intrinsics_vec128_rotate_right32_16(x0) : vsriq_n_u32(vshlq_n_u32((x0), 32 - (x1)), (x0), (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + (vextq_u32(x0, x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes64(x0, x1) \ + (vextq_u64(x0, x0, x1)) + +/* +#define Lib_IntVector_Intrinsics_vec128_shuffle32(x0, x1, x2, x3, x4) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(x1,x2,x3,x4))) + +#define Lib_IntVector_Intrinsics_vec128_shuffle64(x0, x1, x2) \ + (_mm_shuffle_epi32(x0, _MM_SHUFFLE(2*x1+1,2*x1,2*x2+1,2*x2))) +*/ + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x0) \ + (vld1q_u32((const uint32_t*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x0) \ + (vld1q_u32((const uint32_t*)(x0))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (x1))) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (x1))) + +/* +#define Lib_IntVector_Intrinsics_vec128_load_be(x0) \ + ( Lib_IntVector_Intrinsics_vec128 l = vrev64q_u8(vld1q_u32((uint32_t*)(x0))); + +*/ + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x0) \ + (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0)))))) + +#define Lib_IntVector_Intrinsics_vec128_load64_be(x0) \ + (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(vld1q_u32((const uint32_t*)(x0)))))) + +/* +#define Lib_IntVector_Intrinsics_vec128_store_be(x0, x1) \ + (_mm_storeu_si128((__m128i*)(x0), _mm_shuffle_epi8(x1, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)))) +*/ + +#define Lib_IntVector_Intrinsics_vec128_store32_be(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x1)))))) + +#define Lib_IntVector_Intrinsics_vec128_store64_be(x0, x1) \ + (vst1q_u32((uint32_t*)(x0), (vreinterpretq_u32_u8(vrev64q_u8(vreinterpretq_u8_u32(x1)))))) + +#define Lib_IntVector_Intrinsics_vec128_insert8(x0, x1, x2) \ + (vsetq_lane_u8(x1, x0, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + (vsetq_lane_u32(x1, x0, x2)) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + (vreinterpretq_u32_u64(vsetq_lane_u64(x1, vreinterpretq_u64_u32(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_extract8(x0, x1) \ + (vgetq_lane_u8(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + (vgetq_lane_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + (vgetq_lane_u64(vreinterpretq_u64_u32(x0), x1)) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + (vdupq_n_u32(0)) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + (vreinterpretq_u32_u64(vaddq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + (vreinterpretq_u32_u64(vsubq_u64(vreinterpretq_u64_u32(x0), vreinterpretq_u64_u32(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + (vreinterpretq_u32_u64(vmull_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), vmovn_u64(vreinterpretq_u64_u32(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + (vreinterpretq_u32_u64(vmull_n_u32(vmovn_u64(vreinterpretq_u64_u32(x0)), (uint32_t)x1))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + (vaddq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_sub32(x0, x1) \ + (vsubq_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_mul32(x0, x1) \ + (vmulq_lane_u32(x0, x1)) + +#define Lib_IntVector_Intrinsics_vec128_smul32(x0, x1) \ + (vmulq_lane_u32(x0, vdupq_n_u32(x1))) + +#define Lib_IntVector_Intrinsics_vec128_load128(x) \ + ((uint32x4_t)(x)) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + (vreinterpretq_u32_u64(vdupq_n_u64(x))) /* hi lo */ + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + (vdupq_n_u32(x)) /* hi lo */ + +static inline Lib_IntVector_Intrinsics_vec128 +Lib_IntVector_Intrinsics_vec128_load64s(uint64_t x1, uint64_t x2) +{ + const uint64_t a[2] = { x1, x2 }; + return vreinterpretq_u32_u64(vld1q_u64(a)); +} + +static inline Lib_IntVector_Intrinsics_vec128 +Lib_IntVector_Intrinsics_vec128_load32s(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4) +{ + const uint32_t a[4] = { x1, x2, x3, x4 }; + return vld1q_u32(a); +} + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x1, x2) \ + (vzip1q_u32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x1, x2) \ + (vzip2q_u32(x1, x2)) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x1, x2) \ + (vreinterpretq_u32_u64(vzip1q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ + (vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2)))) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +/* IBM z architecture */ +#elif defined(__s390x__) /* this flag is for GCC only */ + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include <stdint.h> +#include <vecintrin.h> + +/* The main vector 128 type + * We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, + * unsigned int, unsigned long long: the compiler complains that the parameter + * combination is invalid. */ +typedef unsigned char vector128_8 __attribute__((vector_size(16))); +typedef unsigned int vector128_32 __attribute__((vector_size(16))); +typedef unsigned long long vector128_64 __attribute__((vector_size(16))); + +typedef vector128_8 Lib_IntVector_Intrinsics_vec128; +typedef vector128_8 vector128; + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ + (vector128)((vector128_32)vec_revb(*((vector128_32*)(const uint8_t*)(x)))) + +#define Lib_IntVector_Intrinsics_vec128_load32_be(x) \ + (vector128)(*((vector128_32*)(const uint8_t*)(x))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ + (vector128)((vector128_64)vec_revb(*((vector128_64*)(const uint8_t*)(x)))) + +static inline void +Lib_IntVector_Intrinsics_vec128_store32_le(const uint8_t *x0, vector128 x1) +{ + *((vector128_32 *)x0) = vec_revb((vector128_32)x1); +} + +static inline void +Lib_IntVector_Intrinsics_vec128_store32_be(const uint8_t *x0, vector128 x1) +{ + *((vector128_32 *)x0) = (vector128_32)x1; +} + +static inline void +Lib_IntVector_Intrinsics_vec128_store64_le(const uint8_t *x0, vector128 x1) +{ + *((vector128_64 *)x0) = vec_revb((vector128_64)x1); +} + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + ((vector128)(vec_and((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + ((unsigned int)(vec_extract((vector128_32)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + ((unsigned long long)(vec_extract((vector128_64)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \ + ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \ + ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \ + ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \ + ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \ + (unsigned int)(x), (unsigned int)(x) })) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) })) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + ((vector128)((vector128_64)vec_load_pair((unsigned long long)(x), (unsigned long long)(x)))) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + ((vector128)(vec_mulo((vector128_32)(x0), \ + (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + ((vector128)(vec_or((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + ((vector128)(vec_rli((vector128_32)(x0), (unsigned long)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), (x1 % 4) * 4))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(x1)))) & \ + ((vector128)((vector128_64){ 0xffffffffffffffff << (x1), 0xffffffffffffffff << (x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(64 - (x1))))) & \ + ((vector128)((vector128_64){ 0xffffffffffffffff >> (x1), 0xffffffffffffffff >> (x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (((vector128)((vector128_32)vec_rli((vector128_32)(x0), (unsigned int)(32 - (x1))))) & \ + ((vector128)((vector128_32){ 0xffffffff >> (x1), 0xffffffff >> (x1), \ + 0xffffffff >> (x1), 0xffffffff >> (x1) }))) + +/* Doesn't work with vec_splat_u64 */ +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) })))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + ((vector128)((vector128_64)(x0) - (vector128_64)(x1))) + +static inline vector128 +Lib_IntVector_Intrinsics_vec128_xor(vector128 x0, vector128 x1) +{ + return ((vector128)(vec_xor((vector128)(x0), (vector128)(x1)))); +} + +#define Lib_IntVector_Intrinsics_vec128_zero \ + ((vector128){}) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#elif defined(__powerpc64__) // PowerPC 64 - this flag is for GCC only + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include <altivec.h> +#include <string.h> // for memcpy +#include <stdint.h> + +// The main vector 128 type +// We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, +// unsigned int, unsigned long long: the compiler complains that the parameter +// combination is invalid. +typedef vector unsigned char vector128_8; +typedef vector unsigned int vector128_32; +typedef vector unsigned long long vector128_64; + +typedef vector128_8 Lib_IntVector_Intrinsics_vec128; +typedef vector128_8 vector128; + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ + ((vector128)((vector128_32)(vec_xl(0, (const unsigned int*)((const uint8_t*)(x)))))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ + ((vector128)((vector128_64)(vec_xl(0, (const unsigned long long*)((const uint8_t*)(x)))))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (vec_xst((vector128_32)(x1), 0, (unsigned int*)((uint8_t*)(x0)))) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (vec_xst((vector128_64)(x1), 0, (unsigned long long*)((uint8_t*)(x0)))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + ((vector128)(vec_and((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + ((unsigned int)(vec_extract((vector128_32)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + ((unsigned long long)(vec_extract((vector128_64)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \ + ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \ + ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \ + ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \ + ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \ + (unsigned int)(x), (unsigned int)(x) })) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) })) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + ((vector128)((vector128_64){ (unsigned long long)(x), (unsigned long long)(x) })) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + ((vector128)(vec_mule((vector128_32)(x0), \ + (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + ((vector128)(vec_or((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + ((vector128)(vec_rl((vector128_32)(x0), (vector128_32){ (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), ((4 - (x1)) % 4) * 4))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + ((vector128)((vector128_64)vec_sl((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + ((vector128)((vector128_64)vec_sr((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) }))) + +// Doesn't work with vec_splat_u64 +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) })))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + ((vector128)((vector128_64)(x0) - (vector128_64)(x1))) + +#define Lib_IntVector_Intrinsics_vec128_xor(x0, x1) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_zero \ + ((vector128){}) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#endif // PowerPC64 + +// DEBUGGING: +// If libintvector_debug.h exists, use it to debug the current implementations. +// Note that some flags must be enabled for the debugging to be effective: +// see libintvector_debug.h for more details. +#if defined(__has_include) +#if __has_include("libintvector_debug.h") +#include "libintvector_debug.h" +#endif +#endif + +#endif // __Vec_Intrin_H |