summaryrefslogtreecommitdiffstats
path: root/mozglue/misc/SSE.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--mozglue/misc/SSE.h388
1 files changed, 388 insertions, 0 deletions
diff --git a/mozglue/misc/SSE.h b/mozglue/misc/SSE.h
new file mode 100644
index 0000000000..d7c7e4ae97
--- /dev/null
+++ b/mozglue/misc/SSE.h
@@ -0,0 +1,388 @@
+/* vim: set shiftwidth=2 tabstop=8 autoindent cindent expandtab: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* compile-time and runtime tests for whether to use SSE instructions */
+
+#ifndef mozilla_SSE_h_
+#define mozilla_SSE_h_
+
+// for definition of MFBT_DATA
+#include "mozilla/Types.h"
+
+/**
+ * The public interface of this header consists of a set of macros and
+ * functions for Intel CPU features.
+ *
+ * DETECTING ISA EXTENSIONS
+ * ========================
+ *
+ * This header provides the following functions for determining whether the
+ * current CPU supports a particular instruction set extension:
+ *
+ * mozilla::supports_mmx
+ * mozilla::supports_sse
+ * mozilla::supports_sse2
+ * mozilla::supports_sse3
+ * mozilla::supports_ssse3
+ * mozilla::supports_sse4a
+ * mozilla::supports_sse4_1
+ * mozilla::supports_sse4_2
+ * mozilla::supports_avx
+ * mozilla::supports_avx2
+ * mozilla::supports_aes
+ * mozilla::has_constant_tsc
+ *
+ * If you're writing code using inline assembly, you should guard it with a
+ * call to one of these functions. For instance:
+ *
+ * if (mozilla::supports_sse2()) {
+ * asm(" ... ");
+ * }
+ * else {
+ * ...
+ * }
+ *
+ * Note that these functions depend on cpuid intrinsics only available in gcc
+ * 4.3 or later and MSVC 8.0 (Visual C++ 2005) or later, so they return false
+ * in older compilers. (This could be fixed by replacing the code with inline
+ * assembly.)
+ *
+ *
+ * USING INTRINSICS
+ * ================
+ *
+ * This header also provides support for coding using CPU intrinsics.
+ *
+ * For each mozilla::supports_abc function, we define a MOZILLA_MAY_SUPPORT_ABC
+ * macro which indicates that the target/compiler combination we're using is
+ * compatible with the ABC extension. For instance, x86_64 with MSVC 2003 is
+ * compatible with SSE2 but not SSE3, since although there exist x86_64 CPUs
+ * with SSE3 support, MSVC 2003 only supports through SSE2.
+ *
+ * Until gcc fixes #pragma target [1] [2] or our x86 builds require SSE2,
+ * you'll need to separate code using intrinsics into a file separate from your
+ * regular code. Here's the recommended pattern:
+ *
+ * #ifdef MOZILLA_MAY_SUPPORT_ABC
+ * namespace mozilla {
+ * namespace ABC {
+ * void foo();
+ * }
+ * }
+ * #endif
+ *
+ * void foo() {
+ * #ifdef MOZILLA_MAY_SUPPORT_ABC
+ * if (mozilla::supports_abc()) {
+ * mozilla::ABC::foo(); // in a separate file
+ * return;
+ * }
+ * #endif
+ *
+ * foo_unvectorized();
+ * }
+ *
+ * You'll need to define mozilla::ABC::foo() in a separate file and add the
+ * -mabc flag when using gcc.
+ *
+ * [1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39787 and
+ * [2] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=41201 being fixed.
+ *
+ */
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
+
+# ifdef __MMX__
+// It's ok to use MMX instructions based on the -march option (or
+// the default for x86_64 or for Intel Mac).
+# define MOZILLA_PRESUME_MMX 1
+# endif
+# ifdef __SSE__
+// It's ok to use SSE instructions based on the -march option (or
+// the default for x86_64 or for Intel Mac).
+# define MOZILLA_PRESUME_SSE 1
+# endif
+# ifdef __SSE2__
+// It's ok to use SSE2 instructions based on the -march option (or
+// the default for x86_64 or for Intel Mac).
+# define MOZILLA_PRESUME_SSE2 1
+# endif
+# ifdef __SSE3__
+// It's ok to use SSE3 instructions based on the -march option (or the
+// default for Intel Mac).
+# define MOZILLA_PRESUME_SSE3 1
+# endif
+# ifdef __SSSE3__
+// It's ok to use SSSE3 instructions based on the -march option.
+# define MOZILLA_PRESUME_SSSE3 1
+# endif
+# ifdef __SSE4A__
+// It's ok to use SSE4A instructions based on the -march option.
+# define MOZILLA_PRESUME_SSE4A 1
+# endif
+# ifdef __SSE4_1__
+// It's ok to use SSE4.1 instructions based on the -march option.
+# define MOZILLA_PRESUME_SSE4_1 1
+# endif
+# ifdef __SSE4_2__
+// It's ok to use SSE4.2 instructions based on the -march option.
+# define MOZILLA_PRESUME_SSE4_2 1
+# endif
+# ifdef __AVX__
+// It's ok to use AVX instructions based on the -march option.
+# define MOZILLA_PRESUME_AVX 1
+# endif
+# ifdef __AVX2__
+// It's ok to use AVX instructions based on the -march option.
+# define MOZILLA_PRESUME_AVX2 1
+# endif
+# ifdef __AVXVNNI__
+// It's ok to use AVX instructions based on the -march option.
+# define MOZILLA_PRESUME_AVXVNNI 1
+# endif
+# ifdef __AES__
+// It's ok to use AES instructions based on the -march option.
+# define MOZILLA_PRESUME_AES 1
+# endif
+
+# ifdef HAVE_CPUID_H
+# define MOZILLA_SSE_HAVE_CPUID_DETECTION
+# endif
+
+#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
+
+# define MOZILLA_SSE_HAVE_CPUID_DETECTION
+
+# if defined(_M_IX86_FP)
+
+# if _M_IX86_FP >= 1
+// It's ok to use SSE instructions based on the /arch option
+# define MOZILLA_PRESUME_SSE
+# endif
+# if _M_IX86_FP >= 2
+// It's ok to use SSE2 instructions based on the /arch option
+# define MOZILLA_PRESUME_SSE2
+# endif
+
+# elif defined(_M_AMD64)
+// MSVC for AMD64 doesn't support MMX, so don't presume it here.
+
+// SSE is always available on AMD64.
+# define MOZILLA_PRESUME_SSE
+// SSE2 is always available on AMD64.
+# define MOZILLA_PRESUME_SSE2
+# endif
+
+#elif defined(__SUNPRO_CC) && (defined(__i386) || defined(__x86_64__))
+// Sun Studio on x86 or amd64
+
+# define MOZILLA_SSE_HAVE_CPUID_DETECTION
+
+# if defined(__x86_64__)
+// MMX is always available on AMD64.
+# define MOZILLA_PRESUME_MMX
+// SSE is always available on AMD64.
+# define MOZILLA_PRESUME_SSE
+// SSE2 is always available on AMD64.
+# define MOZILLA_PRESUME_SSE2
+# endif
+
+#endif
+
+namespace mozilla {
+
+namespace sse_private {
+#if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# if !defined(MOZILLA_PRESUME_MMX)
+extern bool MFBT_DATA mmx_enabled;
+# endif
+# if !defined(MOZILLA_PRESUME_SSE)
+extern bool MFBT_DATA sse_enabled;
+# endif
+# if !defined(MOZILLA_PRESUME_SSE2)
+extern bool MFBT_DATA sse2_enabled;
+# endif
+# if !defined(MOZILLA_PRESUME_SSE3)
+extern bool MFBT_DATA sse3_enabled;
+# endif
+# if !defined(MOZILLA_PRESUME_SSSE3)
+extern bool MFBT_DATA ssse3_enabled;
+# endif
+# if !defined(MOZILLA_PRESUME_SSE4A)
+extern bool MFBT_DATA sse4a_enabled;
+# endif
+# if !defined(MOZILLA_PRESUME_SSE4_1)
+extern bool MFBT_DATA sse4_1_enabled;
+# endif
+# if !defined(MOZILLA_PRESUME_SSE4_2)
+extern bool MFBT_DATA sse4_2_enabled;
+# endif
+# if !defined(MOZILLA_PRESUME_FMA3)
+extern bool MFBT_DATA fma3_enabled;
+# endif
+# if !defined(MOZILLA_PRESUME_AVX)
+extern bool MFBT_DATA avx_enabled;
+# endif
+# if !defined(MOZILLA_PRESUME_AVX2)
+extern bool MFBT_DATA avx2_enabled;
+# endif
+# if !defined(MOZILLA_PRESUME_AVXVNNI)
+extern bool MFBT_DATA avxvnni_enabled;
+# endif
+# if !defined(MOZILLA_PRESUME_AES)
+extern bool MFBT_DATA aes_enabled;
+# endif
+extern bool MFBT_DATA has_constant_tsc;
+
+#endif
+} // namespace sse_private
+
+#ifdef HAVE_CPUID_H
+MOZ_EXPORT uint64_t xgetbv(uint32_t xcr);
+#endif
+
+#if defined(MOZILLA_PRESUME_MMX)
+# define MOZILLA_MAY_SUPPORT_MMX 1
+inline bool supports_mmx() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# if !(defined(_MSC_VER) && defined(_M_AMD64))
+// Define MOZILLA_MAY_SUPPORT_MMX only if we're not on MSVC for
+// AMD64, since that compiler doesn't support MMX.
+# define MOZILLA_MAY_SUPPORT_MMX 1
+# endif
+inline bool supports_mmx() { return sse_private::mmx_enabled; }
+#else
+inline bool supports_mmx() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSE)
+# define MOZILLA_MAY_SUPPORT_SSE 1
+inline bool supports_sse() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# define MOZILLA_MAY_SUPPORT_SSE 1
+inline bool supports_sse() { return sse_private::sse_enabled; }
+#else
+inline bool supports_sse() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSE2)
+# define MOZILLA_MAY_SUPPORT_SSE2 1
+inline bool supports_sse2() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# define MOZILLA_MAY_SUPPORT_SSE2 1
+inline bool supports_sse2() { return sse_private::sse2_enabled; }
+#else
+inline bool supports_sse2() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSE3)
+# define MOZILLA_MAY_SUPPORT_SSE3 1
+inline bool supports_sse3() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# define MOZILLA_MAY_SUPPORT_SSE3 1
+inline bool supports_sse3() { return sse_private::sse3_enabled; }
+#else
+inline bool supports_sse3() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSSE3)
+# define MOZILLA_MAY_SUPPORT_SSSE3 1
+inline bool supports_ssse3() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# define MOZILLA_MAY_SUPPORT_SSSE3 1
+inline bool supports_ssse3() { return sse_private::ssse3_enabled; }
+#else
+inline bool supports_ssse3() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSE4A)
+# define MOZILLA_MAY_SUPPORT_SSE4A 1
+inline bool supports_sse4a() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# define MOZILLA_MAY_SUPPORT_SSE4A 1
+inline bool supports_sse4a() { return sse_private::sse4a_enabled; }
+#else
+inline bool supports_sse4a() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSE4_1)
+# define MOZILLA_MAY_SUPPORT_SSE4_1 1
+inline bool supports_sse4_1() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# define MOZILLA_MAY_SUPPORT_SSE4_1 1
+inline bool supports_sse4_1() { return sse_private::sse4_1_enabled; }
+#else
+inline bool supports_sse4_1() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_SSE4_2)
+# define MOZILLA_MAY_SUPPORT_SSE4_2 1
+inline bool supports_sse4_2() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# define MOZILLA_MAY_SUPPORT_SSE4_2 1
+inline bool supports_sse4_2() { return sse_private::sse4_2_enabled; }
+#else
+inline bool supports_sse4_2() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_FMA3)
+# define MOZILLA_MAY_SUPPORT_FMA3 1
+inline bool supports_fma3() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# define MOZILLA_MAY_SUPPORT_FMA3 1
+inline bool supports_fma3() { return sse_private::fma3_enabled; }
+#else
+inline bool supports_fma3() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_AVX)
+# define MOZILLA_MAY_SUPPORT_AVX 1
+inline bool supports_avx() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# define MOZILLA_MAY_SUPPORT_AVX 1
+inline bool supports_avx() { return sse_private::avx_enabled; }
+#else
+inline bool supports_avx() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_AVX2)
+# define MOZILLA_MAY_SUPPORT_AVX2 1
+inline bool supports_avx2() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# define MOZILLA_MAY_SUPPORT_AVX2 1
+inline bool supports_avx2() { return sse_private::avx2_enabled; }
+#else
+inline bool supports_avx2() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_AVXVNNI)
+# define MOZILLA_MAY_SUPPORT_AVXVNNI 1
+inline bool supports_avxvnni() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# define MOZILLA_MAY_SUPPORT_AVXVNNI 1
+inline bool supports_avxvnni() { return sse_private::avxvnni_enabled; }
+#else
+inline bool supports_avxvnni() { return false; }
+#endif
+
+#if defined(MOZILLA_PRESUME_AES)
+# define MOZILLA_MAY_SUPPORT_AES 1
+inline bool supports_aes() { return true; }
+#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
+# define MOZILLA_MAY_SUPPORT_AES 1
+inline bool supports_aes() { return sse_private::aes_enabled; }
+#else
+inline bool supports_aes() { return false; }
+#endif
+
+#ifdef MOZILLA_SSE_HAVE_CPUID_DETECTION
+inline bool has_constant_tsc() { return sse_private::has_constant_tsc; }
+#else
+inline bool has_constant_tsc() { return false; }
+#endif
+
+} // namespace mozilla
+
+#endif /* !defined(mozilla_SSE_h_) */