summaryrefslogtreecommitdiffstats
path: root/lib/accelerated/x86/x86-common.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/accelerated/x86/x86-common.c')
-rw-r--r--lib/accelerated/x86/x86-common.c1228
1 files changed, 1228 insertions, 0 deletions
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
new file mode 100644
index 0000000..d1a76c9
--- /dev/null
+++ b/lib/accelerated/x86/x86-common.c
@@ -0,0 +1,1228 @@
+/*
+ * Copyright (C) 2011-2018 Free Software Foundation, Inc.
+ * Copyright (C) 2018 Red Hat, Inc.
+ *
+ * Author: Nikos Mavrogiannopoulos
+ *
+ * This file is part of GnuTLS.
+ *
+ * The GnuTLS is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>
+ *
+ */
+
+/*
+ * The following code is an implementation of the AES-128-CBC cipher
+ * using intel's AES instruction set.
+ */
+
+#include "errors.h"
+#include "gnutls_int.h"
+#include <gnutls/crypto.h>
+#include "errors.h"
+#include <aes-x86.h>
+#include <sha-x86.h>
+#include <x86-common.h>
+#ifdef HAVE_LIBNETTLE
+# include <nettle/aes.h> /* for key generation in 192 and 256 bits */
+# include <sha-padlock.h>
+#endif
+#include <aes-padlock.h>
+#ifdef HAVE_CPUID_H
+# include <cpuid.h>
+#else
+# define __get_cpuid(...) 0
+# define __get_cpuid_count(...) 0
+#endif
+
+/* ebx, ecx, edx
+ * This is a format compatible with openssl's CPUID detection.
+ */
+#if defined(__GNUC__)
+__attribute__((visibility("hidden")))
+#elif defined(__SUNPRO_C)
+__hidden
+#endif
+unsigned int GNUTLS_x86_cpuid_s[4];
+
+#ifndef bit_SHA
+# define bit_SHA (1<<29)
+#endif
+
+/* ecx */
+#ifndef bit_AVX512BITALG
+# define bit_AVX512BITALG 0x4000
+#endif
+
+#ifndef bit_PCLMUL
+# define bit_PCLMUL 0x2
+#endif
+
+#ifndef bit_SSSE3
+/* ecx */
+# define bit_SSSE3 0x0000200
+#endif
+
+#ifndef bit_AES
+# define bit_AES 0x2000000
+#endif
+
+#ifndef bit_AVX
+# define bit_AVX 0x10000000
+#endif
+
+#ifndef bit_AVX2
+# define bit_AVX2 0x00000020
+#endif
+
+#ifndef bit_AVX512F
+# define bit_AVX512F 0x00010000
+#endif
+
+#ifndef bit_AVX512IFMA
+# define bit_AVX512IFMA 0x00200000
+#endif
+
+#ifndef bit_AVX512BW
+# define bit_AVX512BW 0x40000000
+#endif
+
+#ifndef bit_AVX512VL
+# define bit_AVX512VL 0x80000000
+#endif
+
+#ifndef bit_OSXSAVE
+# define bit_OSXSAVE 0x8000000
+#endif
+
+#ifndef bit_MOVBE
+# define bit_MOVBE 0x00400000
+#endif
+
+#define bit_PADLOCK (0x3 << 6)
+#define bit_PADLOCK_PHE (0x3 << 10)
+#define bit_PADLOCK_PHE_SHA512 (0x3 << 25)
+
+/* Our internal bit-string for cpu capabilities. Should be set
+ * in GNUTLS_CPUID_OVERRIDE */
+#define EMPTY_SET 1
+#define INTEL_AES_NI (1<<1)
+#define INTEL_SSSE3 (1<<2)
+#define INTEL_PCLMUL (1<<3)
+#define INTEL_AVX (1<<4)
+#define INTEL_SHA (1<<5)
+#define PADLOCK (1<<20)
+#define PADLOCK_PHE (1<<21)
+#define PADLOCK_PHE_SHA512 (1<<22)
+
+#ifndef HAVE_GET_CPUID_COUNT
+static inline void
+get_cpuid_level7(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* we avoid using __get_cpuid_count, because it is not available with gcc 4.8 */
+ if (__get_cpuid_max(7, 0) < 7)
+ return;
+
+ __cpuid_count(7, 0, *eax, *ebx, *ecx, *edx);
+ return;
+}
+#else
+# define get_cpuid_level7(a,b,c,d) __get_cpuid_count(7, 0, a, b, c, d)
+#endif
+
+static unsigned read_cpuid_vals(unsigned int vals[4])
+{
+ unsigned t1, t2, t3;
+ vals[0] = vals[1] = vals[2] = vals[3] = 0;
+
+ if (!__get_cpuid(1, &t1, &t2, &vals[1], &vals[0]))
+ return 0;
+ /* suppress AVX512; it works conditionally on certain CPUs on the original code */
+ vals[1] &= 0xfffff7ff;
+
+ get_cpuid_level7(&t1, &vals[2], &t2, &t3);
+
+ return 1;
+}
+
+/* Based on the example in "How to detect New Instruction support in
+ * the 4th generation Intel Core processor family.
+ * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
+ */
+static unsigned check_4th_gen_intel_features(unsigned ecx)
+{
+ uint32_t xcr0;
+
+ if ((ecx & bit_OSXSAVE) != bit_OSXSAVE)
+ return 0;
+
+#if defined(_MSC_VER) && !defined(__clang__)
+ xcr0 = _xgetbv(0);
+#else
+ __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
+#endif
+ /* Check if xmm and ymm state are enabled in XCR0. */
+ return (xcr0 & 6) == 6;
+}
+
+static void capabilities_to_intel_cpuid(unsigned capabilities)
+{
+ unsigned a[4];
+
+ if (capabilities & EMPTY_SET) {
+ return;
+ }
+
+ if (!read_cpuid_vals(a))
+ return;
+
+ if (capabilities & INTEL_AES_NI) {
+ if (a[1] & bit_AES) {
+ GNUTLS_x86_cpuid_s[1] |= bit_AES;
+ } else {
+ _gnutls_debug_log
+ ("AESNI acceleration requested but not available\n");
+ }
+ }
+
+ if (capabilities & INTEL_SSSE3) {
+ if (a[1] & bit_SSSE3) {
+ GNUTLS_x86_cpuid_s[1] |= bit_SSSE3;
+ } else {
+ _gnutls_debug_log
+ ("SSSE3 acceleration requested but not available\n");
+ }
+ }
+
+ if (capabilities & INTEL_AVX) {
+ if ((a[1] & bit_AVX) && (a[1] & bit_MOVBE) &&
+ check_4th_gen_intel_features(a[1])) {
+ GNUTLS_x86_cpuid_s[1] |= bit_AVX|bit_MOVBE;
+ } else {
+ _gnutls_debug_log
+ ("AVX acceleration requested but not available\n");
+ }
+ }
+
+ if (capabilities & INTEL_PCLMUL) {
+ if (a[1] & bit_PCLMUL) {
+ GNUTLS_x86_cpuid_s[1] |= bit_PCLMUL;
+ } else {
+ _gnutls_debug_log
+ ("PCLMUL acceleration requested but not available\n");
+ }
+ }
+
+ if (capabilities & INTEL_SHA) {
+ if (a[2] & bit_SHA) {
+ GNUTLS_x86_cpuid_s[2] |= bit_SHA;
+ } else {
+ _gnutls_debug_log
+ ("SHA acceleration requested but not available\n");
+ }
+ }
+}
+
+
+static unsigned check_optimized_aes(void)
+{
+ return (GNUTLS_x86_cpuid_s[1] & bit_AES);
+}
+
+static unsigned check_ssse3(void)
+{
+ return (GNUTLS_x86_cpuid_s[1] & bit_SSSE3);
+}
+
+static unsigned check_sha(void)
+{
+ return (GNUTLS_x86_cpuid_s[2] & bit_SHA);
+}
+
+#ifdef ASM_X86_64
+static unsigned check_avx_movbe(void)
+{
+ return (GNUTLS_x86_cpuid_s[1] & (bit_AVX|bit_MOVBE)) == (bit_AVX|bit_MOVBE);
+}
+
+static unsigned check_pclmul(void)
+{
+ return (GNUTLS_x86_cpuid_s[1] & bit_PCLMUL);
+}
+#endif
+
+#ifdef ENABLE_PADLOCK
+static unsigned capabilities_to_zhaoxin_edx(unsigned capabilities)
+{
+ unsigned a,b,c,t;
+
+ if (capabilities & EMPTY_SET) {
+ return 0;
+ }
+
+ if (!__get_cpuid(1, &t, &a, &b, &c))
+ return 0;
+ if (capabilities & PADLOCK) {
+ if (c & bit_PADLOCK) {
+ GNUTLS_x86_cpuid_s[2] |= bit_PADLOCK;
+ } else {
+ _gnutls_debug_log
+ ("Padlock acceleration requested but not available\n");
+ }
+ }
+
+ if (capabilities & PADLOCK_PHE) {
+ if (c & bit_PADLOCK_PHE) {
+ GNUTLS_x86_cpuid_s[2] |= bit_PADLOCK_PHE;
+ } else {
+ _gnutls_debug_log
+ ("Padlock-PHE acceleration requested but not available\n");
+ }
+ }
+
+ if (capabilities & PADLOCK_PHE_SHA512) {
+ if (c & bit_PADLOCK_PHE_SHA512) {
+ GNUTLS_x86_cpuid_s[2] |= bit_PADLOCK_PHE_SHA512;
+ } else {
+ _gnutls_debug_log
+ ("Padlock-PHE-SHA512 acceleration requested but not available\n");
+ }
+ }
+
+ return GNUTLS_x86_cpuid_s[2];
+}
+
+static int check_padlock(unsigned edx)
+{
+ return ((edx & bit_PADLOCK) == bit_PADLOCK);
+}
+
+static int check_phe(unsigned edx)
+{
+ return ((edx & bit_PADLOCK_PHE) == bit_PADLOCK_PHE);
+}
+
+/* We are actually checking for SHA512 */
+static int check_phe_sha512(unsigned edx)
+{
+ return ((edx & bit_PADLOCK_PHE_SHA512) == bit_PADLOCK_PHE_SHA512);
+}
+
+/* On some of the Zhaoxin CPUs, pclmul has a faster acceleration effect */
+static int check_fast_pclmul(void)
+{
+ unsigned int a,b,c,d;
+ unsigned int family,model;
+
+ if (!__get_cpuid(1, &a, &b, &c, &d))
+ return 0;
+
+ family = ((a >> 8) & 0x0F);
+ model = ((a >> 4) & 0x0F) + ((a >> 12) & 0xF0);
+
+ if (((family == 0x6) && (model == 0xf || model == 0x19)) ||
+ ((family == 0x7) && (model == 0x1B || model == 0x3B)))
+ return 1;
+ else
+ return 0;
+}
+
+static int check_phe_partial(void)
+{
+ const char text[64] =
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+ uint32_t iv[5] = { 0x67452301UL, 0xEFCDAB89UL,
+ 0x98BADCFEUL, 0x10325476UL, 0xC3D2E1F0UL
+ };
+
+ /* If EAX is set to -1 (this is the case with padlock_sha1_blocks), the
+ * xsha1 instruction takes a complete SHA-1 block (64 bytes), while it
+ * takes arbitrary length data otherwise. */
+ padlock_sha1_blocks(iv, text, 1);
+
+ if (iv[0] == 0xDA4968EBUL && iv[1] == 0x2E377C1FUL &&
+ iv[2] == 0x884E8F52UL && iv[3] == 0x83524BEBUL &&
+ iv[4] == 0xE74EBDBDUL)
+ return 1;
+ else
+ return 0;
+}
+
+static unsigned check_zhaoxin(void)
+{
+ unsigned int a, b, c, d;
+
+ if (!__get_cpuid(0, &a, &b, &c, &d))
+ return 0;
+
+ /* Zhaoxin and VIA CPU was detected */
+ if ((memcmp(&b, "Cent", 4) == 0 &&
+ memcmp(&d, "aurH", 4) == 0 &&
+ memcmp(&c, "auls", 4) == 0) ||
+ (memcmp(&b, " Sh", 4) == 0 &&
+ memcmp(&d, "angh", 4) == 0 && memcmp(&c, "ai ", 4) == 0)) {
+ return 1;
+ }
+
+ return 0;
+}
+
+static
+void register_x86_padlock_crypto(unsigned capabilities)
+{
+ int ret, phe;
+ unsigned edx;
+
+ if (check_zhaoxin() == 0)
+ return;
+
+ memset(GNUTLS_x86_cpuid_s, 0, sizeof(GNUTLS_x86_cpuid_s));
+
+ if (capabilities == 0){
+ if(!read_cpuid_vals(GNUTLS_x86_cpuid_s))
+ return;
+ edx = padlock_capability();
+ } else{
+ capabilities_to_intel_cpuid(capabilities);
+ edx = capabilities_to_zhaoxin_edx(capabilities);
+ }
+
+ if (check_ssse3()) {
+ _gnutls_debug_log("Zhaoxin SSSE3 was detected\n");
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_GCM, 90,
+ &_gnutls_aes_gcm_x86_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_GCM, 90,
+ &_gnutls_aes_gcm_x86_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_GCM, 90,
+ &_gnutls_aes_gcm_x86_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_CBC, 90, &_gnutls_aes_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_CBC, 90, &_gnutls_aes_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_CBC, 90, &_gnutls_aes_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+ }
+
+ if (check_sha() || check_ssse3()) {
+ if (check_sha())
+ _gnutls_debug_log("Zhaoxin SHA was detected\n");
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA1,
+ 80,
+ &_gnutls_sha_x86_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA224,
+ 80,
+ &_gnutls_sha_x86_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA256,
+ 80,
+ &_gnutls_sha_x86_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA1,
+ 80,
+ &_gnutls_hmac_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA224,
+ 80,
+ &_gnutls_hmac_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA256,
+ 80,
+ &_gnutls_hmac_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA384,
+ 80,
+ &_gnutls_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA512,
+ 80,
+ &_gnutls_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA384,
+ 80,
+ &_gnutls_hmac_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA512,
+ 80,
+ &_gnutls_hmac_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+ }
+
+ if (check_optimized_aes()) {
+ _gnutls_debug_log("Zhaoxin AES accelerator was detected\n");
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_CBC, 80, &_gnutls_aesni_x86, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_CBC, 80, &_gnutls_aesni_x86, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_CBC, 80, &_gnutls_aesni_x86, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_CCM, 80,
+ &_gnutls_aes_ccm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_CCM, 80,
+ &_gnutls_aes_ccm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_CCM_8, 80,
+ &_gnutls_aes_ccm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_CCM_8, 80,
+ &_gnutls_aes_ccm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_XTS, 80,
+ &_gnutls_aes_xts_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_XTS, 80,
+ &_gnutls_aes_xts_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+#ifdef ASM_X86_64
+ if (check_pclmul()) {
+ /* register GCM ciphers */
+ _gnutls_debug_log
+ ("Zhaoxin GCM accelerator was detected\n");
+ if (check_avx_movbe() && check_fast_pclmul()) {
+ _gnutls_debug_log
+ ("Zhaoxin GCM accelerator (AVX) was detected\n");
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_GCM, 80,
+ &_gnutls_aes_gcm_pclmul_avx, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_GCM, 80,
+ &_gnutls_aes_gcm_pclmul_avx, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_GCM, 80,
+ &_gnutls_aes_gcm_pclmul_avx, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+ } else {
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_GCM, 80,
+ &_gnutls_aes_gcm_pclmul, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_GCM, 80,
+ &_gnutls_aes_gcm_pclmul, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_GCM, 80,
+ &_gnutls_aes_gcm_pclmul, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+ }
+ } else
+#endif
+ {
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_GCM, 80,
+ &_gnutls_aes_gcm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_GCM, 80,
+ &_gnutls_aes_gcm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_GCM, 80,
+ &_gnutls_aes_gcm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+ }
+ }
+
+ if (check_padlock(edx)) {
+ _gnutls_debug_log
+ ("Padlock AES accelerator was detected\n");
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_CBC, 80, &_gnutls_aes_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_CBC, 80, &_gnutls_aes_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ /* register GCM ciphers */
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_GCM, 90,
+ &_gnutls_aes_gcm_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_CBC, 80, &_gnutls_aes_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_GCM, 90,
+ &_gnutls_aes_gcm_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+ }
+
+ if(!check_optimized_aes() && !check_padlock(edx))
+ _gnutls_priority_update_non_aesni();
+
+#ifdef HAVE_LIBNETTLE
+ phe = check_phe(edx);
+
+ if (phe && check_phe_partial()) {
+ _gnutls_debug_log
+ ("Padlock SHA1 and SHA256 (partial) accelerator was detected\n");
+ if (check_phe_sha512(edx)) {
+ _gnutls_debug_log
+ ("Padlock SHA512 (partial) accelerator was detected\n");
+ ret =
+ gnutls_crypto_single_digest_register
+ (GNUTLS_DIG_SHA384, 80,
+ &_gnutls_sha_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_digest_register
+ (GNUTLS_DIG_SHA512, 80,
+ &_gnutls_sha_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_mac_register
+ (GNUTLS_MAC_SHA384, 80,
+ &_gnutls_hmac_sha_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_mac_register
+ (GNUTLS_MAC_SHA512, 80,
+ &_gnutls_hmac_sha_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+ }
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA1,
+ 90,
+ &_gnutls_sha_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA224,
+ 90,
+ &_gnutls_sha_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA256,
+ 90,
+ &_gnutls_sha_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA1,
+ 90,
+ &_gnutls_hmac_sha_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ /* we don't register MAC_SHA224 because it is not used by TLS */
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA256,
+ 90,
+ &_gnutls_hmac_sha_padlock, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+ } else if (phe) {
+ /* Original padlock PHE. Does not support incremental operations.
+ */
+ _gnutls_debug_log
+ ("Padlock SHA1 and SHA256 accelerator was detected\n");
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA1,
+ 90,
+ &_gnutls_sha_padlock_oneshot, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA256,
+ 90,
+ &_gnutls_sha_padlock_oneshot, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA1,
+ 90,
+ &_gnutls_hmac_sha_padlock_oneshot, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA256,
+ 90,
+ &_gnutls_hmac_sha_padlock_oneshot, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+ }
+#endif
+
+ return;
+}
+#endif
+
+enum x86_cpu_vendor {
+ X86_CPU_VENDOR_OTHER,
+ X86_CPU_VENDOR_INTEL,
+ X86_CPU_VENDOR_AMD,
+};
+
+static enum x86_cpu_vendor check_x86_cpu_vendor(void)
+{
+ unsigned int a, b, c, d;
+
+ if (!__get_cpuid(0, &a, &b, &c, &d)) {
+ return X86_CPU_VENDOR_OTHER;
+ }
+
+ if (memcmp(&b, "Genu", 4) == 0 &&
+ memcmp(&d, "ineI", 4) == 0 &&
+ memcmp(&c, "ntel", 4) == 0) {
+ return X86_CPU_VENDOR_INTEL;
+ }
+
+ if (memcmp(&b, "Auth", 4) == 0 &&
+ memcmp(&d, "enti", 4) == 0 &&
+ memcmp(&c, "cAMD", 4) == 0) {
+ return X86_CPU_VENDOR_AMD;
+ }
+
+ return X86_CPU_VENDOR_OTHER;
+}
+
+static
+void register_x86_intel_crypto(unsigned capabilities)
+{
+ int ret;
+ enum x86_cpu_vendor vendor;
+
+ memset(GNUTLS_x86_cpuid_s, 0, sizeof(GNUTLS_x86_cpuid_s));
+
+ vendor = check_x86_cpu_vendor();
+ if (vendor == X86_CPU_VENDOR_OTHER) {
+ return;
+ }
+
+ if (capabilities == 0) {
+ if (!read_cpuid_vals(GNUTLS_x86_cpuid_s))
+ return;
+ if (!check_4th_gen_intel_features(GNUTLS_x86_cpuid_s[1])) {
+ GNUTLS_x86_cpuid_s[1] &= ~bit_AVX;
+
+ /* Clear AVX2 bits as well, according to what
+ * OpenSSL does. Should we clear
+ * bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER,
+ * and bit_AVX512CD? */
+ GNUTLS_x86_cpuid_s[2] &= ~(bit_AVX2|
+ bit_AVX512F|
+ bit_AVX512IFMA|
+ bit_AVX512BW|
+ bit_AVX512BW);
+ }
+ } else {
+ capabilities_to_intel_cpuid(capabilities);
+ }
+
+ /* CRYPTOGAMS uses the (1 << 30) bit as an indicator of Intel CPUs */
+ if (vendor == X86_CPU_VENDOR_INTEL) {
+ GNUTLS_x86_cpuid_s[0] |= 1 << 30;
+ } else {
+ GNUTLS_x86_cpuid_s[0] &= ~(1 << 30);
+ }
+
+ if (check_ssse3()) {
+ _gnutls_debug_log("Intel SSSE3 was detected\n");
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_GCM, 90,
+ &_gnutls_aes_gcm_x86_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_GCM, 90,
+ &_gnutls_aes_gcm_x86_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_GCM, 90,
+ &_gnutls_aes_gcm_x86_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_CBC, 90, &_gnutls_aes_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_CBC, 90, &_gnutls_aes_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_CBC, 90, &_gnutls_aes_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+ }
+
+ if (check_sha() || check_ssse3()) {
+ if (check_sha())
+ _gnutls_debug_log("Intel SHA was detected\n");
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA1,
+ 80,
+ &_gnutls_sha_x86_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA224,
+ 80,
+ &_gnutls_sha_x86_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA256,
+ 80,
+ &_gnutls_sha_x86_ssse3, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA1,
+ 80,
+ &_gnutls_hmac_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA224,
+ 80,
+ &_gnutls_hmac_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA256,
+ 80,
+ &_gnutls_hmac_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA384,
+ 80,
+ &_gnutls_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA512,
+ 80,
+ &_gnutls_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA384,
+ 80,
+ &_gnutls_hmac_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+
+ ret =
+ gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA512,
+ 80,
+ &_gnutls_hmac_sha_x86_ssse3, 0);
+ if (ret < 0)
+ gnutls_assert();
+ }
+
+ if (check_optimized_aes()) {
+ _gnutls_debug_log("Intel AES accelerator was detected\n");
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_CBC, 80, &_gnutls_aesni_x86, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_CBC, 80, &_gnutls_aesni_x86, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_CBC, 80, &_gnutls_aesni_x86, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_CCM, 80,
+ &_gnutls_aes_ccm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_CCM, 80,
+ &_gnutls_aes_ccm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_CCM_8, 80,
+ &_gnutls_aes_ccm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_CCM_8, 80,
+ &_gnutls_aes_ccm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_XTS, 80,
+ &_gnutls_aes_xts_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_XTS, 80,
+ &_gnutls_aes_xts_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+#ifdef ASM_X86_64
+ if (check_pclmul()) {
+ /* register GCM ciphers */
+ if (check_avx_movbe()) {
+ _gnutls_debug_log
+ ("Intel GCM accelerator (AVX) was detected\n");
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_GCM, 80,
+ &_gnutls_aes_gcm_pclmul_avx, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_GCM, 80,
+ &_gnutls_aes_gcm_pclmul_avx, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_GCM, 80,
+ &_gnutls_aes_gcm_pclmul_avx, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+ } else {
+ _gnutls_debug_log
+ ("Intel GCM accelerator was detected\n");
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_GCM, 80,
+ &_gnutls_aes_gcm_pclmul, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_GCM, 80,
+ &_gnutls_aes_gcm_pclmul, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_GCM, 80,
+ &_gnutls_aes_gcm_pclmul, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+ }
+ } else
+#endif
+ {
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_128_GCM, 80,
+ &_gnutls_aes_gcm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_192_GCM, 80,
+ &_gnutls_aes_gcm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+
+ ret =
+ gnutls_crypto_single_cipher_register
+ (GNUTLS_CIPHER_AES_256_GCM, 80,
+ &_gnutls_aes_gcm_x86_aesni, 0);
+ if (ret < 0) {
+ gnutls_assert();
+ }
+ }
+ } else {
+ _gnutls_priority_update_non_aesni();
+ }
+
+ return;
+}
+
+
+void register_x86_crypto(void)
+{
+ unsigned capabilities = 0;
+ char *p;
+ p = secure_getenv("GNUTLS_CPUID_OVERRIDE");
+ if (p) {
+ capabilities = strtol(p, NULL, 0);
+ }
+
+ register_x86_intel_crypto(capabilities);
+#ifdef ENABLE_PADLOCK
+ register_x86_padlock_crypto(capabilities);
+#endif
+}
+