summaryrefslogtreecommitdiffstats
path: root/mozglue/misc/SSE.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'mozglue/misc/SSE.cpp')
-rw-r--r--mozglue/misc/SSE.cpp202
1 files changed, 202 insertions, 0 deletions
diff --git a/mozglue/misc/SSE.cpp b/mozglue/misc/SSE.cpp
new file mode 100644
index 0000000000..ca0b4c3c86
--- /dev/null
+++ b/mozglue/misc/SSE.cpp
@@ -0,0 +1,202 @@
+/* vim: set shiftwidth=4 tabstop=8 autoindent cindent expandtab: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* compile-time and runtime tests for whether to use SSE instructions */
+
+#include "SSE.h"
+
+#ifdef HAVE_CPUID_H
+// cpuid.h is available on gcc 4.3 and higher on i386 and x86_64
+# include <cpuid.h>
+#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
+// MSVC 2005 or newer on x86-32 or x86-64
+# include <intrin.h>
+#endif
+
+namespace {
+
+// SSE.h has parallel #ifs which declare MOZILLA_SSE_HAVE_CPUID_DETECTION.
+// We can't declare these functions in the header file, however, because
+// <intrin.h> conflicts with <windows.h> on MSVC 2005, and some files want to
+// include both SSE.h and <windows.h>.
+
+#ifdef HAVE_CPUID_H
+
+enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
+
+static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg,
+ unsigned int bits) {
+ unsigned int regs[4];
+ unsigned int eax, ebx, ecx, edx;
+ unsigned max = __get_cpuid_max(level & 0x80000000u, nullptr);
+ if (level > max) return false;
+ __cpuid_count(level, 0, eax, ebx, ecx, edx);
+ regs[0] = eax;
+ regs[1] = ebx;
+ regs[2] = ecx;
+ regs[3] = edx;
+ return (regs[reg] & bits) == bits;
+}
+
+#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
+
+enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
+
+static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg,
+ unsigned int bits) {
+ // Check that the level in question is supported.
+ int regs[4];
+ __cpuid(regs, level & 0x80000000u);
+ if (unsigned(regs[0]) < level) return false;
+
+ // "The __cpuid intrinsic clears the ECX register before calling the cpuid
+ // instruction."
+ __cpuid(regs, level);
+ return (unsigned(regs[reg]) & bits) == bits;
+}
+
+#elif (defined(__GNUC__) || defined(__SUNPRO_CC)) && \
+ (defined(__i386) || defined(__x86_64__))
+
+enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 };
+
+# ifdef __i386
+static void moz_cpuid(int CPUInfo[4], int InfoType) {
+ asm("xchg %esi, %ebx\n"
+ "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0)
+ "cpuid\n"
+ "movl %eax, (%edi)\n"
+ "movl %ebx, 4(%edi)\n"
+ "movl %ecx, 8(%edi)\n"
+ "movl %edx, 12(%edi)\n"
+ "xchg %esi, %ebx\n"
+ :
+ : "a"(InfoType), // %eax
+ "D"(CPUInfo) // %edi
+ : "%ecx", "%edx", "%esi");
+}
+# else
+static void moz_cpuid(int CPUInfo[4], int InfoType) {
+ asm("xchg %rsi, %rbx\n"
+ "xor %ecx, %ecx\n" // ecx is the sub-leaf (we only ever need 0)
+ "cpuid\n"
+ "movl %eax, (%rdi)\n"
+ "movl %ebx, 4(%rdi)\n"
+ "movl %ecx, 8(%rdi)\n"
+ "movl %edx, 12(%rdi)\n"
+ "xchg %rsi, %rbx\n"
+ :
+ : "a"(InfoType), // %eax
+ "D"(CPUInfo) // %rdi
+ : "%ecx", "%edx", "%rsi");
+}
+# endif
+
+static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg,
+ unsigned int bits) {
+ // Check that the level in question is supported.
+ volatile int regs[4];
+ moz_cpuid((int*)regs, level & 0x80000000u);
+ if (unsigned(regs[0]) < level) return false;
+
+ moz_cpuid((int*)regs, level);
+ return (unsigned(regs[reg]) & bits) == bits;
+}
+
+#endif // end CPUID declarations
+
+} // namespace
+
+namespace mozilla {
+
+namespace sse_private {
+
+#if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+
+# if !defined(MOZILLA_PRESUME_MMX)
+bool mmx_enabled = has_cpuid_bits(1u, edx, (1u << 23));
+# endif
+
+# if !defined(MOZILLA_PRESUME_SSE)
+bool sse_enabled = has_cpuid_bits(1u, edx, (1u << 25));
+# endif
+
+# if !defined(MOZILLA_PRESUME_SSE2)
+bool sse2_enabled = has_cpuid_bits(1u, edx, (1u << 26));
+# endif
+
+# if !defined(MOZILLA_PRESUME_SSE3)
+bool sse3_enabled = has_cpuid_bits(1u, ecx, (1u << 0));
+# endif
+
+# if !defined(MOZILLA_PRESUME_SSSE3)
+bool ssse3_enabled = has_cpuid_bits(1u, ecx, (1u << 9));
+# endif
+
+# if !defined(MOZILLA_PRESUME_SSE4A)
+bool sse4a_enabled = has_cpuid_bits(0x80000001u, ecx, (1u << 6));
+# endif
+
+# if !defined(MOZILLA_PRESUME_SSE4_1)
+bool sse4_1_enabled = has_cpuid_bits(1u, ecx, (1u << 19));
+# endif
+
+# if !defined(MOZILLA_PRESUME_SSE4_2)
+bool sse4_2_enabled = has_cpuid_bits(1u, ecx, (1u << 20));
+# endif
+
+# if !defined(MOZILLA_PRESUME_FMA3)
+bool fma3_enabled = has_cpuid_bits(1u, ecx, (1u << 12));
+# endif
+
+# if !defined(MOZILLA_PRESUME_AVX) || !defined(MOZILLA_PRESUME_AVX2)
+static bool has_avx() {
+# if defined(MOZILLA_PRESUME_AVX)
+ return true;
+# else
+ const unsigned AVX = 1u << 28;
+ const unsigned OSXSAVE = 1u << 27;
+ const unsigned XSAVE = 1u << 26;
+
+ const unsigned XMM_STATE = 1u << 1;
+ const unsigned YMM_STATE = 1u << 2;
+ const unsigned AVX_STATE = XMM_STATE | YMM_STATE;
+
+ return has_cpuid_bits(1u, ecx, AVX | OSXSAVE | XSAVE) &&
+ // ensure the OS supports XSAVE of YMM registers
+ (xgetbv(0) & AVX_STATE) == AVX_STATE;
+# endif // MOZILLA_PRESUME_AVX
+}
+# endif // !MOZILLA_PRESUME_AVX || !MOZILLA_PRESUME_AVX2
+
+# if !defined(MOZILLA_PRESUME_AVX)
+bool avx_enabled = has_avx();
+# endif
+
+# if !defined(MOZILLA_PRESUME_AVX2)
+bool avx2_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u << 5));
+# endif
+
+# if !defined(MOZILLA_PRESUME_AES)
+bool aes_enabled = has_cpuid_bits(1u, ecx, (1u << 25));
+# endif
+
+bool has_constant_tsc = has_cpuid_bits(0x80000007u, edx, (1u << 8));
+
+#endif
+
+} // namespace sse_private
+
+#ifdef HAVE_CPUID_H
+
+uint64_t xgetbv(uint32_t xcr) {
+ uint32_t eax, edx;
+ __asm__(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr));
+ return (uint64_t)(edx) << 32 | eax;
+}
+
+#endif
+
+} // namespace mozilla