summaryrefslogtreecommitdiffstats
path: root/comm/third_party/libgcrypt/src/hwf-x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'comm/third_party/libgcrypt/src/hwf-x86.c')
-rw-r--r--comm/third_party/libgcrypt/src/hwf-x86.c409
1 files changed, 409 insertions, 0 deletions
diff --git a/comm/third_party/libgcrypt/src/hwf-x86.c b/comm/third_party/libgcrypt/src/hwf-x86.c
new file mode 100644
index 0000000000..9a9ed6d354
--- /dev/null
+++ b/comm/third_party/libgcrypt/src/hwf-x86.c
@@ -0,0 +1,409 @@
+/* hwf-x86.c - Detect hardware features - x86 part
+ * Copyright (C) 2007, 2011, 2012 Free Software Foundation, Inc.
+ * Copyright (C) 2012 Jussi Kivilinna
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+#include "g10lib.h"
+#include "hwf-common.h"
+
+#if !defined (__i386__) && !defined (__x86_64__)
+# error Module build for wrong CPU.
+#endif
+
+/* We use the next macro to decide whether we can test for certain
+ features. */
+#undef HAS_X86_CPUID
+
+#if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+# define HAS_X86_CPUID 1
+
+#if _GCRY_GCC_VERSION >= 40700 /* 4.7 */
+# define FORCE_FUNC_FRAME_POINTER \
+ __attribute__ ((optimize("no-omit-frame-pointer")))
+#else
+# define FORCE_FUNC_FRAME_POINTER
+#endif
+
+static FORCE_FUNC_FRAME_POINTER int
+is_cpuid_available(void)
+{
+ int has_cpuid = 0;
+
+ /* Detect the CPUID feature by testing some undefined behaviour (16
+ vs 32 bit pushf/popf). */
+ asm volatile
+ ("pushf\n\t" /* Copy flags to EAX. */
+ "popl %%eax\n\t"
+ "movl %%eax, %%ecx\n\t" /* Save flags into ECX. */
+ "xorl $0x200000, %%eax\n\t" /* Toggle ID bit and copy it to the flags. */
+ "pushl %%eax\n\t"
+ "popf\n\t"
+ "pushf\n\t" /* Copy changed flags again to EAX. */
+ "popl %%eax\n\t"
+ "pushl %%ecx\n\t" /* Restore flags from ECX. */
+ "popf\n\t"
+ "xorl %%eax, %%ecx\n\t" /* Compare flags against saved flags. */
+ "jz .Lno_cpuid%=\n\t" /* Toggling did not work, thus no CPUID. */
+ "movl $1, %0\n" /* Worked. true -> HAS_CPUID. */
+ ".Lno_cpuid%=:\n\t"
+ : "+r" (has_cpuid)
+ :
+ : "%eax", "%ecx", "cc", "memory"
+ );
+
+ return has_cpuid;
+}
+
+static void
+get_cpuid(unsigned int in, unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ unsigned int regs[4];
+
+ asm volatile
+ ("movl %%ebx, %%edi\n\t" /* Save GOT register. */
+ "xorl %%ebx, %%ebx\n\t"
+ "cpuid\n\t"
+ "movl %%ebx, %1\n\t"
+ "movl %%edi, %%ebx\n\t" /* Restore GOT register. */
+ : "=a" (regs[0]), "=g" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+ : "0" (in), "2" (0), "3" (0)
+ : "cc", "edi"
+ );
+
+ if (eax)
+ *eax = regs[0];
+ if (ebx)
+ *ebx = regs[1];
+ if (ecx)
+ *ecx = regs[2];
+ if (edx)
+ *edx = regs[3];
+}
+
+#if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
+static unsigned int
+get_xgetbv(void)
+{
+ unsigned int t_eax, t_edx;
+
+ asm volatile
+ ("xgetbv\n\t"
+ : "=a" (t_eax), "=d" (t_edx)
+ : "c" (0)
+ );
+
+ return t_eax;
+}
+#endif /* ENABLE_AVX_SUPPORT || ENABLE_AVX2_SUPPORT */
+
+#endif /* i386 && GNUC */
+
+
+#if defined (__x86_64__) && defined (__GNUC__)
+# define HAS_X86_CPUID 1
+
+static int
+is_cpuid_available(void)
+{
+ return 1;
+}
+
+static void
+get_cpuid(unsigned int in, unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ unsigned int regs[4];
+
+ asm volatile
+ ("cpuid\n\t"
+ : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+ : "0" (in), "1" (0), "2" (0), "3" (0)
+ : "cc"
+ );
+
+ if (eax)
+ *eax = regs[0];
+ if (ebx)
+ *ebx = regs[1];
+ if (ecx)
+ *ecx = regs[2];
+ if (edx)
+ *edx = regs[3];
+}
+
+#if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
+static unsigned int
+get_xgetbv(void)
+{
+ unsigned int t_eax, t_edx;
+
+ asm volatile
+ ("xgetbv\n\t"
+ : "=a" (t_eax), "=d" (t_edx)
+ : "c" (0)
+ );
+
+ return t_eax;
+}
+#endif /* ENABLE_AVX_SUPPORT || ENABLE_AVX2_SUPPORT */
+
+#endif /* x86-64 && GNUC */
+
+
+#ifdef HAS_X86_CPUID
+static unsigned int
+detect_x86_gnuc (void)
+{
+ union
+ {
+ char c[12+1];
+ unsigned int ui[3];
+ } vendor_id;
+ unsigned int features, features2;
+ unsigned int os_supports_avx_avx2_registers = 0;
+ unsigned int max_cpuid_level;
+ unsigned int fms, family, model;
+ unsigned int result = 0;
+ unsigned int avoid_vpgather = 0;
+
+ (void)os_supports_avx_avx2_registers;
+
+ if (!is_cpuid_available())
+ return 0;
+
+ get_cpuid(0, &max_cpuid_level, &vendor_id.ui[0], &vendor_id.ui[2],
+ &vendor_id.ui[1]);
+ vendor_id.c[12] = 0;
+
+ if (0)
+ ; /* Just to make "else if" and ifdef macros look pretty. */
+#ifdef ENABLE_PADLOCK_SUPPORT
+ else if (!strcmp (vendor_id.c, "CentaurHauls"))
+ {
+ /* This is a VIA CPU. Check what PadLock features we have. */
+
+ /* Check for extended centaur (EAX). */
+ get_cpuid(0xC0000000, &features, NULL, NULL, NULL);
+
+ /* Has extended centaur features? */
+ if (features > 0xC0000000)
+ {
+ /* Ask for the extended feature flags (EDX). */
+ get_cpuid(0xC0000001, NULL, NULL, NULL, &features);
+
+ /* Test bits 2 and 3 to see whether the RNG exists and is enabled. */
+ if ((features & 0x0C) == 0x0C)
+ result |= HWF_PADLOCK_RNG;
+
+ /* Test bits 6 and 7 to see whether the ACE exists and is enabled. */
+ if ((features & 0xC0) == 0xC0)
+ result |= HWF_PADLOCK_AES;
+
+ /* Test bits 10 and 11 to see whether the PHE exists and is
+ enabled. */
+ if ((features & 0xC00) == 0xC00)
+ result |= HWF_PADLOCK_SHA;
+
+ /* Test bits 12 and 13 to see whether the MONTMUL exists and is
+ enabled. */
+ if ((features & 0x3000) == 0x3000)
+ result |= HWF_PADLOCK_MMUL;
+ }
+ }
+#endif /*ENABLE_PADLOCK_SUPPORT*/
+ else if (!strcmp (vendor_id.c, "GenuineIntel"))
+ {
+ /* This is an Intel CPU. */
+ result |= HWF_INTEL_CPU;
+ }
+ else if (!strcmp (vendor_id.c, "AuthenticAMD"))
+ {
+ /* This is an AMD CPU. */
+ }
+
+ /* Detect Intel features, that might also be supported by other
+ vendors. */
+
+ /* Get CPU family/model/stepping (EAX) and Intel feature flags (ECX, EDX). */
+ get_cpuid(1, &fms, NULL, &features, &features2);
+
+ family = ((fms & 0xf00) >> 8) + ((fms & 0xff00000) >> 20);
+ model = ((fms & 0xf0) >> 4) + ((fms & 0xf0000) >> 12);
+
+ if ((result & HWF_INTEL_CPU) && family == 6)
+ {
+ /* These Intel Core processor models have SHLD/SHRD instruction that
+ * can do integer rotation faster actual ROL/ROR instructions. */
+ switch (model)
+ {
+ case 0x2A:
+ case 0x2D:
+ case 0x3A:
+ case 0x3C:
+ case 0x3F:
+ case 0x45:
+ case 0x46:
+ case 0x3D:
+ case 0x4F:
+ case 0x56:
+ case 0x47:
+ case 0x4E:
+ case 0x5E:
+ case 0x8E:
+ case 0x9E:
+ case 0x55:
+ case 0x66:
+ result |= HWF_INTEL_FAST_SHLD;
+ break;
+ }
+
+ /* These Intel Core processors that have AVX2 have slow VPGATHER and
+ * should be avoided for table-lookup use. */
+ switch (model)
+ {
+ case 0x3C:
+ case 0x3F:
+ case 0x45:
+ case 0x46:
+ /* Haswell */
+ avoid_vpgather |= 1;
+ break;
+ }
+ }
+ else
+ {
+ /* Avoid VPGATHER for non-Intel CPUs as testing is needed to
+ * make sure it is fast enough. */
+
+ avoid_vpgather |= 1;
+ }
+
+#ifdef ENABLE_FORCE_SOFT_HWFEATURES
+ /* Soft HW features mark functionality that is available on all systems
+ * but not feasible to use because of slow HW implementation. */
+
+ /* SHLD is faster at rotating register than actual ROR/ROL instructions
+ * on older Intel systems (~sandy-bridge era). However, SHLD is very
+ * slow on almost anything else and later Intel processors have faster
+ * ROR/ROL. Therefore in regular build HWF_INTEL_FAST_SHLD is enabled
+ * only for those Intel processors that benefit from the SHLD
+ * instruction. Enabled here unconditionally as requested. */
+ result |= HWF_INTEL_FAST_SHLD;
+
+ /* VPGATHER instructions are used for look-up table based
+ * implementations which require VPGATHER to be fast enough to beat
+ * regular parallelized look-up table implementations (see Twofish).
+ * So far, only Intel processors beginning with skylake have had
+ * VPGATHER fast enough to be enabled. AMD Zen3 comes close to
+ * being feasible, but not quite (where twofish-avx2 is few percent
+ * slower than twofish-3way). Enable VPGATHER here unconditionally
+ * as requested. */
+ avoid_vpgather = 0;
+#endif
+
+#ifdef ENABLE_PCLMUL_SUPPORT
+ /* Test bit 1 for PCLMUL. */
+ if (features & 0x00000002)
+ result |= HWF_INTEL_PCLMUL;
+#endif
+ /* Test bit 9 for SSSE3. */
+ if (features & 0x00000200)
+ result |= HWF_INTEL_SSSE3;
+ /* Test bit 19 for SSE4.1. */
+ if (features & 0x00080000)
+ result |= HWF_INTEL_SSE4_1;
+#ifdef ENABLE_AESNI_SUPPORT
+ /* Test bit 25 for AES-NI. */
+ if (features & 0x02000000)
+ result |= HWF_INTEL_AESNI;
+#endif /*ENABLE_AESNI_SUPPORT*/
+#if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
+ /* Test bit 27 for OSXSAVE (required for AVX/AVX2). */
+ if (features & 0x08000000)
+ {
+ /* Check that OS has enabled both XMM and YMM state support. */
+ if ((get_xgetbv() & 0x6) == 0x6)
+ os_supports_avx_avx2_registers = 1;
+ }
+#endif
+#ifdef ENABLE_AVX_SUPPORT
+ /* Test bit 28 for AVX. */
+ if (features & 0x10000000)
+ if (os_supports_avx_avx2_registers)
+ result |= HWF_INTEL_AVX;
+#endif /*ENABLE_AVX_SUPPORT*/
+#ifdef ENABLE_DRNG_SUPPORT
+ /* Test bit 30 for RDRAND. */
+ if (features & 0x40000000)
+ result |= HWF_INTEL_RDRAND;
+#endif /*ENABLE_DRNG_SUPPORT*/
+
+ /* Test bit 4 of EDX for TSC. */
+ if (features2 & 0x00000010)
+ result |= HWF_INTEL_RDTSC;
+
+ /* Check additional Intel feature flags. Early Intel P5 processors report
+ * too high max_cpuid_level, so don't check level 7 if processor does not
+ * support SSE3 (as cpuid:7 contains only features for newer processors).
+ * Source: http://www.sandpile.org/x86/cpuid.htm */
+ if (max_cpuid_level >= 7 && (features & 0x00000001))
+ {
+ /* Get CPUID:7 contains further Intel feature flags. */
+ get_cpuid(7, NULL, &features, NULL, NULL);
+
+ /* Test bit 8 for BMI2. */
+ if (features & 0x00000100)
+ result |= HWF_INTEL_BMI2;
+
+#ifdef ENABLE_AVX2_SUPPORT
+ /* Test bit 5 for AVX2. */
+ if (features & 0x00000020)
+ if (os_supports_avx_avx2_registers)
+ result |= HWF_INTEL_AVX2;
+
+ if ((result & HWF_INTEL_AVX2) && !avoid_vpgather)
+ result |= HWF_INTEL_FAST_VPGATHER;
+#endif /*ENABLE_AVX_SUPPORT*/
+
+ /* Test bit 29 for SHA Extensions. */
+ if (features & (1 << 29))
+ result |= HWF_INTEL_SHAEXT;
+ }
+
+ return result;
+}
+#endif /* HAS_X86_CPUID */
+
+
+unsigned int
+_gcry_hwf_detect_x86 (void)
+{
+#if defined (HAS_X86_CPUID)
+ return detect_x86_gnuc ();
+#else
+ return 0;
+#endif
+}