summaryrefslogtreecommitdiffstats
path: root/spa/plugins/support/cpu-x86.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--spa/plugins/support/cpu-x86.c204
1 files changed, 204 insertions, 0 deletions
diff --git a/spa/plugins/support/cpu-x86.c b/spa/plugins/support/cpu-x86.c
new file mode 100644
index 0000000..722f7c9
--- /dev/null
+++ b/spa/plugins/support/cpu-x86.c
@@ -0,0 +1,204 @@
+/* Spa
+ *
+ * Copyright © 2018 Wim Taymans
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <cpuid.h>
+
+static int
+x86_init(struct impl *impl)
+{
+ uint32_t flags;
+
+ unsigned int vendor;
+ unsigned int model, family;
+ unsigned int max_level, ext_level, has_osxsave;
+ unsigned int eax, ebx, ecx, edx;
+
+
+ max_level = __get_cpuid_max(0, &vendor);
+ if (max_level < 1)
+ return 0;
+
+ __cpuid(1, eax, ebx, ecx, edx);
+
+ model = (eax >> 4) & 0x0f;
+ family = (eax >> 8) & 0x0f;
+
+ if (vendor == signature_INTEL_ebx ||
+ vendor == signature_AMD_ebx) {
+ unsigned int extended_model, extended_family;
+
+ extended_model = (eax >> 12) & 0xf0;
+ extended_family = (eax >> 20) & 0xff;
+ if (family == 0x0f) {
+ family += extended_family;
+ model += extended_model;
+ } else if (family == 0x06)
+ model += extended_model;
+ }
+ (void)model;
+
+ flags = 0;
+ if (ecx & bit_SSE3)
+ flags |= SPA_CPU_FLAG_SSE3;
+ if (ecx & bit_SSSE3)
+ flags |= SPA_CPU_FLAG_SSSE3;
+ if (ecx & bit_SSE4_1)
+ flags |= SPA_CPU_FLAG_SSE41;
+ if (ecx & bit_SSE4_2)
+ flags |= SPA_CPU_FLAG_SSE42;
+ if (ecx & bit_AVX)
+ flags |= SPA_CPU_FLAG_AVX;
+ has_osxsave = ecx & bit_OSXSAVE;
+ if (ecx & bit_FMA)
+ flags |= SPA_CPU_FLAG_FMA3;
+
+ if (edx & bit_CMOV)
+ flags |= SPA_CPU_FLAG_CMOV;
+ if (edx & bit_MMX)
+ flags |= SPA_CPU_FLAG_MMX;
+ if (edx & bit_MMXEXT)
+ flags |= SPA_CPU_FLAG_MMXEXT;
+ if (edx & bit_SSE)
+ flags |= SPA_CPU_FLAG_SSE;
+ if (edx & bit_SSE2)
+ flags |= SPA_CPU_FLAG_SSE2;
+
+
+ if (max_level >= 7) {
+ __cpuid_count(7, 0, eax, ebx, ecx, edx);
+
+ if (ebx & bit_BMI)
+ flags |= SPA_CPU_FLAG_BMI1;
+ if (ebx & bit_AVX2)
+ flags |= SPA_CPU_FLAG_AVX2;
+ if (ebx & bit_BMI2)
+ flags |= SPA_CPU_FLAG_BMI2;
+#define AVX512_BITS (bit_AVX512F | bit_AVX512DQ | bit_AVX512CD | bit_AVX512BW | bit_AVX512VL)
+ if ((ebx & AVX512_BITS) == AVX512_BITS)
+ flags |= SPA_CPU_FLAG_AVX512;
+ }
+
+ /* Check cpuid level of extended features. */
+ __cpuid (0x80000000, ext_level, ebx, ecx, edx);
+
+ if (ext_level >= 0x80000001) {
+ __cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (edx & bit_3DNOW)
+ flags |= SPA_CPU_FLAG_3DNOW;
+ if (edx & bit_3DNOWP)
+ flags |= SPA_CPU_FLAG_3DNOWEXT;
+ if (edx & bit_MMX)
+ flags |= SPA_CPU_FLAG_MMX;
+ if (edx & bit_MMXEXT)
+ flags |= SPA_CPU_FLAG_MMXEXT;
+ if (ecx & bit_FMA4)
+ flags |= SPA_CPU_FLAG_FMA4;
+ if (ecx & bit_XOP)
+ flags |= SPA_CPU_FLAG_XOP;
+ }
+
+ /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
+#define XCR_XFEATURE_ENABLED_MASK 0x0
+#define XSTATE_FP 0x1
+#define XSTATE_SSE 0x2
+#define XSTATE_YMM 0x4
+#define XSTATE_OPMASK 0x20
+#define XSTATE_ZMM 0x40
+#define XSTATE_HI_ZMM 0x80
+
+#define XCR_AVX_ENABLED_MASK \
+ (XSTATE_SSE | XSTATE_YMM)
+#define XCR_AVX512F_ENABLED_MASK \
+ (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
+
+ if (has_osxsave)
+ asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
+ : "=a" (eax), "=d" (edx)
+ : "c" (XCR_XFEATURE_ENABLED_MASK));
+ else
+ eax = 0;
+
+ /* Check if AVX registers are supported. */
+ if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK) {
+ flags &= ~(SPA_CPU_FLAG_AVX |
+ SPA_CPU_FLAG_AVX2 |
+ SPA_CPU_FLAG_FMA3 |
+ SPA_CPU_FLAG_FMA4 |
+ SPA_CPU_FLAG_XOP);
+ }
+
+ /* Check if AVX512F registers are supported. */
+ if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK) {
+ flags &= ~SPA_CPU_FLAG_AVX512;
+ }
+
+ if (flags & SPA_CPU_FLAG_AVX512)
+ impl->max_align = 64;
+ else if (flags & (SPA_CPU_FLAG_AVX2 |
+ SPA_CPU_FLAG_AVX |
+ SPA_CPU_FLAG_XOP |
+ SPA_CPU_FLAG_FMA4 |
+ SPA_CPU_FLAG_FMA3))
+ impl->max_align = 32;
+ else if (flags & (SPA_CPU_FLAG_AESNI |
+ SPA_CPU_FLAG_SSE42 |
+ SPA_CPU_FLAG_SSE41 |
+ SPA_CPU_FLAG_SSSE3 |
+ SPA_CPU_FLAG_SSE3 |
+ SPA_CPU_FLAG_SSE2 |
+ SPA_CPU_FLAG_SSE))
+ impl->max_align = 16;
+ else
+ impl->max_align = 8;
+
+ impl->flags = flags;
+
+ return 0;
+}
+
+#if defined(HAVE_SSE)
+#include <xmmintrin.h>
+#endif
+
+static int x86_zero_denormals(void *object, bool enable)
+{
+#if defined(HAVE_SSE)
+ struct impl *impl = object;
+ if (impl->flags & SPA_CPU_FLAG_SSE) {
+ unsigned int mxcsr;
+ mxcsr = _mm_getcsr();
+ if (enable)
+ mxcsr |= 0x8040;
+ else
+ mxcsr &= ~0x8040;
+ _mm_setcsr(mxcsr);
+ spa_log_debug(impl->log, "%p: zero-denormals:%s",
+ impl, enable ? "on" : "off");
+ }
+ return 0;
+#else
+ return -ENOTSUP;
+#endif
+}