diff options
Diffstat (limited to 'spa/plugins/support/cpu-x86.c')
-rw-r--r-- | spa/plugins/support/cpu-x86.c | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/spa/plugins/support/cpu-x86.c b/spa/plugins/support/cpu-x86.c new file mode 100644 index 0000000..722f7c9 --- /dev/null +++ b/spa/plugins/support/cpu-x86.c @@ -0,0 +1,204 @@ +/* Spa + * + * Copyright © 2018 Wim Taymans + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <cpuid.h> + +static int +x86_init(struct impl *impl) +{ + uint32_t flags; + + unsigned int vendor; + unsigned int model, family; + unsigned int max_level, ext_level, has_osxsave; + unsigned int eax, ebx, ecx, edx; + + + max_level = __get_cpuid_max(0, &vendor); + if (max_level < 1) + return 0; + + __cpuid(1, eax, ebx, ecx, edx); + + model = (eax >> 4) & 0x0f; + family = (eax >> 8) & 0x0f; + + if (vendor == signature_INTEL_ebx || + vendor == signature_AMD_ebx) { + unsigned int extended_model, extended_family; + + extended_model = (eax >> 12) & 0xf0; + extended_family = (eax >> 20) & 0xff; + if (family == 0x0f) { + family += extended_family; + model += extended_model; + } else if (family == 0x06) + model += extended_model; + } + (void)model; + + flags = 0; + if (ecx & bit_SSE3) + flags |= SPA_CPU_FLAG_SSE3; + if (ecx & bit_SSSE3) + flags |= SPA_CPU_FLAG_SSSE3; + if (ecx & bit_SSE4_1) + flags |= SPA_CPU_FLAG_SSE41; + if (ecx & bit_SSE4_2) + flags |= SPA_CPU_FLAG_SSE42; + if (ecx & bit_AVX) + flags |= SPA_CPU_FLAG_AVX; + has_osxsave = ecx & bit_OSXSAVE; + if (ecx & bit_FMA) + flags |= SPA_CPU_FLAG_FMA3; + + if (edx & bit_CMOV) + flags |= SPA_CPU_FLAG_CMOV; + if (edx & bit_MMX) + flags |= SPA_CPU_FLAG_MMX; + if (edx & bit_MMXEXT) + flags |= SPA_CPU_FLAG_MMXEXT; + if (edx & bit_SSE) + flags |= SPA_CPU_FLAG_SSE; + if (edx & bit_SSE2) + flags |= SPA_CPU_FLAG_SSE2; + + + if (max_level >= 7) { + __cpuid_count(7, 0, eax, ebx, ecx, edx); + + if (ebx & bit_BMI) + flags |= SPA_CPU_FLAG_BMI1; + if (ebx & bit_AVX2) + flags |= SPA_CPU_FLAG_AVX2; + if (ebx & bit_BMI2) + flags |= SPA_CPU_FLAG_BMI2; +#define AVX512_BITS (bit_AVX512F | bit_AVX512DQ | bit_AVX512CD | bit_AVX512BW | bit_AVX512VL) + if ((ebx & AVX512_BITS) == AVX512_BITS) + flags |= SPA_CPU_FLAG_AVX512; + } + + /* Check cpuid level of extended features. */ + __cpuid (0x80000000, ext_level, ebx, ecx, edx); + + if (ext_level >= 0x80000001) { + __cpuid (0x80000001, eax, ebx, ecx, edx); + + if (edx & bit_3DNOW) + flags |= SPA_CPU_FLAG_3DNOW; + if (edx & bit_3DNOWP) + flags |= SPA_CPU_FLAG_3DNOWEXT; + if (edx & bit_MMX) + flags |= SPA_CPU_FLAG_MMX; + if (edx & bit_MMXEXT) + flags |= SPA_CPU_FLAG_MMXEXT; + if (ecx & bit_FMA4) + flags |= SPA_CPU_FLAG_FMA4; + if (ecx & bit_XOP) + flags |= SPA_CPU_FLAG_XOP; + } + + /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */ +#define XCR_XFEATURE_ENABLED_MASK 0x0 +#define XSTATE_FP 0x1 +#define XSTATE_SSE 0x2 +#define XSTATE_YMM 0x4 +#define XSTATE_OPMASK 0x20 +#define XSTATE_ZMM 0x40 +#define XSTATE_HI_ZMM 0x80 + +#define XCR_AVX_ENABLED_MASK \ + (XSTATE_SSE | XSTATE_YMM) +#define XCR_AVX512F_ENABLED_MASK \ + (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM) + + if (has_osxsave) + asm (".byte 0x0f; .byte 0x01; .byte 0xd0" + : "=a" (eax), "=d" (edx) + : "c" (XCR_XFEATURE_ENABLED_MASK)); + else + eax = 0; + + /* Check if AVX registers are supported. */ + if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK) { + flags &= ~(SPA_CPU_FLAG_AVX | + SPA_CPU_FLAG_AVX2 | + SPA_CPU_FLAG_FMA3 | + SPA_CPU_FLAG_FMA4 | + SPA_CPU_FLAG_XOP); + } + + /* Check if AVX512F registers are supported. */ + if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK) { + flags &= ~SPA_CPU_FLAG_AVX512; + } + + if (flags & SPA_CPU_FLAG_AVX512) + impl->max_align = 64; + else if (flags & (SPA_CPU_FLAG_AVX2 | + SPA_CPU_FLAG_AVX | + SPA_CPU_FLAG_XOP | + SPA_CPU_FLAG_FMA4 | + SPA_CPU_FLAG_FMA3)) + impl->max_align = 32; + else if (flags & (SPA_CPU_FLAG_AESNI | + SPA_CPU_FLAG_SSE42 | + SPA_CPU_FLAG_SSE41 | + SPA_CPU_FLAG_SSSE3 | + SPA_CPU_FLAG_SSE3 | + SPA_CPU_FLAG_SSE2 | + SPA_CPU_FLAG_SSE)) + impl->max_align = 16; + else + impl->max_align = 8; + + impl->flags = flags; + + return 0; +} + +#if defined(HAVE_SSE) +#include <xmmintrin.h> +#endif + +static int x86_zero_denormals(void *object, bool enable) +{ +#if defined(HAVE_SSE) + struct impl *impl = object; + if (impl->flags & SPA_CPU_FLAG_SSE) { + unsigned int mxcsr; + mxcsr = _mm_getcsr(); + if (enable) + mxcsr |= 0x8040; + else + mxcsr &= ~0x8040; + _mm_setcsr(mxcsr); + spa_log_debug(impl->log, "%p: zero-denormals:%s", + impl, enable ? "on" : "off"); + } + return 0; +#else + return -ENOTSUP; +#endif +} |