From d8bbc7858622b6d9c278469aab701ca0b609cddf Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 15 May 2024 05:35:49 +0200 Subject: Merging upstream version 126.0. Signed-off-by: Daniel Baumann --- third_party/dav1d/src/arm/cpu.c | 137 ++++++++++++++++++++++++++++++++-------- 1 file changed, 109 insertions(+), 28 deletions(-) (limited to 'third_party/dav1d/src/arm/cpu.c') diff --git a/third_party/dav1d/src/arm/cpu.c b/third_party/dav1d/src/arm/cpu.c index b7a0d3adbc..d9b1751a6a 100644 --- a/third_party/dav1d/src/arm/cpu.c +++ b/third_party/dav1d/src/arm/cpu.c @@ -31,22 +31,95 @@ #include "src/arm/cpu.h" -#if defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32) || ARCH_AARCH64 -// NEON is always available; runtime tests are not needed. -#elif defined(HAVE_GETAUXVAL) && ARCH_ARM +#if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO) #include +#if ARCH_AARCH64 + +#define HWCAP_AARCH64_ASIMDDP (1 << 20) +#define HWCAP_AARCH64_SVE (1 << 22) +#define HWCAP2_AARCH64_SVE2 (1 << 1) +#define HWCAP2_AARCH64_I8MM (1 << 13) + +COLD unsigned dav1d_get_cpu_flags_arm(void) { +#ifdef HAVE_GETAUXVAL + unsigned long hw_cap = getauxval(AT_HWCAP); + unsigned long hw_cap2 = getauxval(AT_HWCAP2); +#else + unsigned long hw_cap = 0; + unsigned long hw_cap2 = 0; + elf_aux_info(AT_HWCAP, &hw_cap, sizeof(hw_cap)); + elf_aux_info(AT_HWCAP2, &hw_cap2, sizeof(hw_cap2)); +#endif + + unsigned flags = DAV1D_ARM_CPU_FLAG_NEON; + flags |= (hw_cap & HWCAP_AARCH64_ASIMDDP) ? DAV1D_ARM_CPU_FLAG_DOTPROD : 0; + flags |= (hw_cap2 & HWCAP2_AARCH64_I8MM) ? DAV1D_ARM_CPU_FLAG_I8MM : 0; + flags |= (hw_cap & HWCAP_AARCH64_SVE) ? DAV1D_ARM_CPU_FLAG_SVE : 0; + flags |= (hw_cap2 & HWCAP2_AARCH64_SVE2) ? DAV1D_ARM_CPU_FLAG_SVE2 : 0; + return flags; +} +#else /* !ARCH_AARCH64 */ + #ifndef HWCAP_ARM_NEON -#define HWCAP_ARM_NEON (1 << 12) +#define HWCAP_ARM_NEON (1 << 12) #endif -#define NEON_HWCAP HWCAP_ARM_NEON +#define HWCAP_ARM_ASIMDDP (1 << 24) +#define HWCAP_ARM_I8MM (1 << 27) -#elif defined(HAVE_ELF_AUX_INFO) && ARCH_ARM -#include +COLD unsigned dav1d_get_cpu_flags_arm(void) { +#ifdef HAVE_GETAUXVAL + unsigned long hw_cap = getauxval(AT_HWCAP); +#else + unsigned long hw_cap = 0; + elf_aux_info(AT_HWCAP, &hw_cap, sizeof(hw_cap)); +#endif + + unsigned flags = (hw_cap & HWCAP_ARM_NEON) ? DAV1D_ARM_CPU_FLAG_NEON : 0; + flags |= (hw_cap & HWCAP_ARM_ASIMDDP) ? DAV1D_ARM_CPU_FLAG_DOTPROD : 0; + flags |= (hw_cap & HWCAP_ARM_I8MM) ? DAV1D_ARM_CPU_FLAG_I8MM : 0; + return flags; +} +#endif /* ARCH_AARCH64 */ + +#elif defined(__APPLE__) +#include + +static int have_feature(const char *feature) { + int supported = 0; + size_t size = sizeof(supported); + if (sysctlbyname(feature, &supported, &size, NULL, 0) != 0) { + return 0; + } + return supported; +} + +COLD unsigned dav1d_get_cpu_flags_arm(void) { + unsigned flags = DAV1D_ARM_CPU_FLAG_NEON; + if (have_feature("hw.optional.arm.FEAT_DotProd")) + flags |= DAV1D_ARM_CPU_FLAG_DOTPROD; + if (have_feature("hw.optional.arm.FEAT_I8MM")) + flags |= DAV1D_ARM_CPU_FLAG_I8MM; + /* No SVE and SVE2 feature detection available on Apple platforms. */ + return flags; +} + +#elif defined(_WIN32) +#include -#define NEON_HWCAP HWCAP_NEON +COLD unsigned dav1d_get_cpu_flags_arm(void) { + unsigned flags = DAV1D_ARM_CPU_FLAG_NEON; +#ifdef PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE + if (IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE)) + flags |= DAV1D_ARM_CPU_FLAG_DOTPROD; +#endif + /* No I8MM or SVE feature detection available on Windows at the time of + * writing. */ + return flags; +} #elif defined(__ANDROID__) +#include #include #include @@ -58,18 +131,25 @@ static unsigned parse_proc_cpuinfo(const char *flag) { char line_buffer[120]; const char *line; + size_t flaglen = strlen(flag); while ((line = fgets(line_buffer, sizeof(line_buffer), file))) { - if (strstr(line, flag)) { - fclose(file); - return 1; + // check all occurances as whole words + const char *found = line; + while ((found = strstr(found, flag))) { + if ((found == line_buffer || !isgraph(found[-1])) && + (isspace(found[flaglen]) || feof(file))) { + fclose(file); + return 1; + } + found += flaglen; } // if line is incomplete seek back to avoid splitting the search // string into two buffers - if (!strchr(line, '\n') && strlen(line) > strlen(flag)) { + if (!strchr(line, '\n') && strlen(line) > flaglen) { // use fseek since the 64 bit fseeko is only available since // Android API level 24 and meson defines _FILE_OFFSET_BITS // by default 64 - if (fseek(file, -strlen(flag), SEEK_CUR)) + if (fseek(file, -flaglen, SEEK_CUR)) break; } } @@ -78,22 +158,23 @@ static unsigned parse_proc_cpuinfo(const char *flag) { return 0; } -#endif COLD unsigned dav1d_get_cpu_flags_arm(void) { - unsigned flags = 0; -#if defined(__ARM_NEON) || defined(__APPLE__) || defined(_WIN32) || ARCH_AARCH64 - flags |= DAV1D_ARM_CPU_FLAG_NEON; -#elif defined(HAVE_GETAUXVAL) && ARCH_ARM - unsigned long hw_cap = getauxval(AT_HWCAP); - flags |= (hw_cap & NEON_HWCAP) ? DAV1D_ARM_CPU_FLAG_NEON : 0; -#elif defined(HAVE_ELF_AUX_INFO) && ARCH_ARM - unsigned long hw_cap = 0; - elf_aux_info(AT_HWCAP, &hw_cap, sizeof(hw_cap)); - flags |= (hw_cap & NEON_HWCAP) ? DAV1D_ARM_CPU_FLAG_NEON : 0; -#elif defined(__ANDROID__) - flags |= parse_proc_cpuinfo("neon") ? DAV1D_ARM_CPU_FLAG_NEON : 0; -#endif - + unsigned flags = parse_proc_cpuinfo("neon") ? DAV1D_ARM_CPU_FLAG_NEON : 0; + flags |= parse_proc_cpuinfo("asimd") ? DAV1D_ARM_CPU_FLAG_NEON : 0; + flags |= parse_proc_cpuinfo("asimddp") ? DAV1D_ARM_CPU_FLAG_DOTPROD : 0; + flags |= parse_proc_cpuinfo("i8mm") ? DAV1D_ARM_CPU_FLAG_I8MM : 0; +#if ARCH_AARCH64 + flags |= parse_proc_cpuinfo("sve") ? DAV1D_ARM_CPU_FLAG_SVE : 0; + flags |= parse_proc_cpuinfo("sve2") ? DAV1D_ARM_CPU_FLAG_SVE2 : 0; +#endif /* ARCH_AARCH64 */ return flags; } + +#else /* Unsupported OS */ + +COLD unsigned dav1d_get_cpu_flags_arm(void) { + return 0; +} + +#endif -- cgit v1.2.3