summaryrefslogtreecommitdiffstats
path: root/security/nss/lib/freebl/gcm-x86.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /security/nss/lib/freebl/gcm-x86.c
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'security/nss/lib/freebl/gcm-x86.c')
-rw-r--r--security/nss/lib/freebl/gcm-x86.c127
1 files changed, 127 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/gcm-x86.c b/security/nss/lib/freebl/gcm-x86.c
new file mode 100644
index 0000000000..e34d633943
--- /dev/null
+++ b/security/nss/lib/freebl/gcm-x86.c
@@ -0,0 +1,127 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "gcm.h"
+#include "secerr.h"
+
+#include <wmmintrin.h> /* clmul */
+
+#define WRITE64(x, bytes) \
+ (bytes)[0] = (x) >> 56; \
+ (bytes)[1] = (x) >> 48; \
+ (bytes)[2] = (x) >> 40; \
+ (bytes)[3] = (x) >> 32; \
+ (bytes)[4] = (x) >> 24; \
+ (bytes)[5] = (x) >> 16; \
+ (bytes)[6] = (x) >> 8; \
+ (bytes)[7] = (x);
+
+SECStatus
+gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
+{
+ uint64_t tmp_out[2];
+ _mm_storeu_si128((__m128i *)tmp_out, ghash->x);
+ /* maxout must be larger than 16 byte (checked by the caller). */
+ WRITE64(tmp_out[0], outbuf + 8);
+ WRITE64(tmp_out[1], outbuf);
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count)
+{
+ size_t i;
+ pre_align __m128i z_high post_align;
+ pre_align __m128i z_low post_align;
+ pre_align __m128i C post_align;
+ pre_align __m128i D post_align;
+ pre_align __m128i E post_align;
+ pre_align __m128i F post_align;
+ pre_align __m128i bin post_align;
+ pre_align __m128i Ci post_align;
+ pre_align __m128i tmp post_align;
+
+ for (i = 0; i < count; i++, buf += 16) {
+ bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1],
+ ((uint16_t)buf[2] << 8) | buf[3],
+ ((uint16_t)buf[4] << 8) | buf[5],
+ ((uint16_t)buf[6] << 8) | buf[7],
+ ((uint16_t)buf[8] << 8) | buf[9],
+ ((uint16_t)buf[10] << 8) | buf[11],
+ ((uint16_t)buf[12] << 8) | buf[13],
+ ((uint16_t)buf[14] << 8) | buf[15]);
+ Ci = _mm_xor_si128(bin, ghash->x);
+
+ /* Do binary mult ghash->X = Ci * ghash->H. */
+ C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00);
+ D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11);
+ E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01);
+ F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10);
+ tmp = _mm_xor_si128(E, F);
+ z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8));
+ z_high = _mm_unpackhi_epi64(z_high, D);
+ z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C);
+ z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low);
+
+ /* Shift one to the left (multiply by x) as gcm spec is stupid. */
+ C = _mm_slli_si128(z_low, 8);
+ E = _mm_srli_epi64(C, 63);
+ D = _mm_slli_si128(z_high, 8);
+ F = _mm_srli_epi64(D, 63);
+ /* Carry over */
+ C = _mm_srli_si128(z_low, 8);
+ D = _mm_srli_epi64(C, 63);
+ z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E);
+ z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D);
+
+ /* Reduce */
+ C = _mm_slli_si128(z_low, 8);
+ /* D = z_low << 127 */
+ D = _mm_slli_epi64(C, 63);
+ /* E = z_low << 126 */
+ E = _mm_slli_epi64(C, 62);
+ /* F = z_low << 121 */
+ F = _mm_slli_epi64(C, 57);
+ /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */
+ z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F);
+ C = _mm_srli_si128(z_low, 8);
+ /* D = z_low >> 1 */
+ D = _mm_slli_epi64(C, 63);
+ D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D);
+ /* E = z_low >> 2 */
+ E = _mm_slli_epi64(C, 62);
+ E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E);
+ /* F = z_low >> 7 */
+ F = _mm_slli_epi64(C, 57);
+ F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F);
+ /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */
+ ghash->x = _mm_xor_si128(_mm_xor_si128(
+ _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E),
+ F);
+ }
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashInit_hw(gcmHashContext *ghash)
+{
+ ghash->ghash_mul = gcm_HashMult_hw;
+ ghash->x = _mm_setzero_si128();
+ /* MSVC requires __m64 to load epi64. */
+ ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high,
+ ghash->h_low >> 32, (uint32_t)ghash->h_low);
+ ghash->hw = PR_TRUE;
+ return SECSuccess;
+}
+
+SECStatus
+gcm_HashZeroX_hw(gcmHashContext *ghash)
+{
+ ghash->x = _mm_setzero_si128();
+ return SECSuccess;
+}