/* LIBGIMP - The GIMP Library * Copyright (C) 1995-1997 Peter Mattis and Spencer Kimball * * This library is free software: you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library. If not, see * . */ /* * x86 bits Copyright (C) Manish Singh */ /* * PPC CPU acceleration detection was taken from DirectFB but seems to be * originating from mpeg2dec with the following copyright: * * Copyright (C) 1999-2001 Aaron Holtzman */ #include "config.h" #include #include #include #include #include "gimpcpuaccel.h" /** * SECTION: gimpcpuaccel * @title: gimpcpuaccel * @short_description: Functions to query and configure CPU acceleration. * * Functions to query and configure CPU acceleration. **/ static GimpCpuAccelFlags cpu_accel (void) G_GNUC_CONST; static gboolean use_cpu_accel = TRUE; /** * gimp_cpu_accel_get_support: * * Query for CPU acceleration support. * * Return value: #GimpCpuAccelFlags as supported by the CPU. * * Since: 2.4 */ GimpCpuAccelFlags gimp_cpu_accel_get_support (void) { return use_cpu_accel ? cpu_accel () : GIMP_CPU_ACCEL_NONE; } /** * gimp_cpu_accel_set_use: * @use: whether to use CPU acceleration features or not * * This function is for internal use only. * * Since: 2.4 */ void gimp_cpu_accel_set_use (gboolean use) { use_cpu_accel = use ? TRUE : FALSE; } #if defined(ARCH_X86) && defined(USE_MMX) && defined(__GNUC__) #define HAVE_ACCEL 1 typedef enum { ARCH_X86_VENDOR_NONE, ARCH_X86_VENDOR_INTEL, ARCH_X86_VENDOR_AMD, ARCH_X86_VENDOR_CENTAUR, ARCH_X86_VENDOR_CYRIX, ARCH_X86_VENDOR_NSC, ARCH_X86_VENDOR_TRANSMETA, ARCH_X86_VENDOR_NEXGEN, ARCH_X86_VENDOR_RISE, ARCH_X86_VENDOR_UMC, ARCH_X86_VENDOR_SIS, ARCH_X86_VENDOR_HYGON, ARCH_X86_VENDOR_UNKNOWN = 0xff } X86Vendor; enum { ARCH_X86_INTEL_FEATURE_MMX = 1 << 23, ARCH_X86_INTEL_FEATURE_XMM = 1 << 25, ARCH_X86_INTEL_FEATURE_XMM2 = 1 << 26, ARCH_X86_AMD_FEATURE_MMXEXT = 1 << 22, ARCH_X86_AMD_FEATURE_3DNOW = 1 << 31, ARCH_X86_CENTAUR_FEATURE_MMX = 1 << 23, ARCH_X86_CENTAUR_FEATURE_MMXEXT = 1 << 24, ARCH_X86_CENTAUR_FEATURE_3DNOW = 1 << 31, ARCH_X86_CYRIX_FEATURE_MMX = 1 << 23, ARCH_X86_CYRIX_FEATURE_MMXEXT = 1 << 24 }; enum { ARCH_X86_INTEL_FEATURE_PNI = 1 << 0, ARCH_X86_INTEL_FEATURE_SSSE3 = 1 << 9, ARCH_X86_INTEL_FEATURE_SSE4_1 = 1 << 19, ARCH_X86_INTEL_FEATURE_SSE4_2 = 1 << 20, ARCH_X86_INTEL_FEATURE_AVX = 1 << 28 }; #if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__)) #define cpuid(op,eax,ebx,ecx,edx) \ __asm__ ("movl %%ebx, %%esi\n\t" \ "cpuid\n\t" \ "xchgl %%ebx,%%esi" \ : "=a" (eax), \ "=S" (ebx), \ "=c" (ecx), \ "=d" (edx) \ : "0" (op)) #else #define cpuid(op,eax,ebx,ecx,edx) \ __asm__ ("cpuid" \ : "=a" (eax), \ "=b" (ebx), \ "=c" (ecx), \ "=d" (edx) \ : "0" (op)) #endif static X86Vendor arch_get_vendor (void) { guint32 eax, ebx, ecx, edx; union{ gchar idaschar[16]; int idasint[4]; }id; #ifndef ARCH_X86_64 /* Only need to check this on ia32 */ __asm__ ("pushfl\n\t" "pushfl\n\t" "popl %0\n\t" "movl %0,%1\n\t" "xorl $0x200000,%0\n\t" "pushl %0\n\t" "popfl\n\t" "pushfl\n\t" "popl %0\n\t" "popfl" : "=a" (eax), "=c" (ecx) : : "cc"); if (eax == ecx) return ARCH_X86_VENDOR_NONE; #endif cpuid (0, eax, ebx, ecx, edx); if (eax == 0) return ARCH_X86_VENDOR_NONE; id.idasint[0] = ebx; id.idasint[1] = edx; id.idasint[2] = ecx; id.idaschar[12] = '\0'; #ifdef ARCH_X86_64 if (strcmp (id.idaschar, "AuthenticAMD") == 0) return ARCH_X86_VENDOR_AMD; else if (strcmp (id.idaschar, "HygonGenuine") == 0) return ARCH_X86_VENDOR_HYGON; else if (strcmp (id.idaschar, "GenuineIntel") == 0) return ARCH_X86_VENDOR_INTEL; #else if (strcmp (id.idaschar, "GenuineIntel") == 0) return ARCH_X86_VENDOR_INTEL; else if (strcmp (id.idaschar, "AuthenticAMD") == 0) return ARCH_X86_VENDOR_AMD; else if (strcmp (id.idaschar, "HygonGenuine") == 0) return ARCH_X86_VENDOR_HYGON; else if (strcmp (id.idaschar, "CentaurHauls") == 0) return ARCH_X86_VENDOR_CENTAUR; else if (strcmp (id.idaschar, "CyrixInstead") == 0) return ARCH_X86_VENDOR_CYRIX; else if (strcmp (id.idaschar, "Geode by NSC") == 0) return ARCH_X86_VENDOR_NSC; else if (strcmp (id.idaschar, "GenuineTMx86") == 0 || strcmp (id.idaschar, "TransmetaCPU") == 0) return ARCH_X86_VENDOR_TRANSMETA; else if (strcmp (id.idaschar, "NexGenDriven") == 0) return ARCH_X86_VENDOR_NEXGEN; else if (strcmp (id.idaschar, "RiseRiseRise") == 0) return ARCH_X86_VENDOR_RISE; else if (strcmp (id.idaschar, "UMC UMC UMC ") == 0) return ARCH_X86_VENDOR_UMC; else if (strcmp (id.idaschar, "SiS SiS SiS ") == 0) return ARCH_X86_VENDOR_SIS; #endif return ARCH_X86_VENDOR_UNKNOWN; } static guint32 arch_accel_intel (void) { guint32 caps = 0; #ifdef USE_MMX { guint32 eax, ebx, ecx, edx; cpuid (1, eax, ebx, ecx, edx); if ((edx & ARCH_X86_INTEL_FEATURE_MMX) == 0) return 0; caps = GIMP_CPU_ACCEL_X86_MMX; #ifdef USE_SSE if (edx & ARCH_X86_INTEL_FEATURE_XMM) caps |= GIMP_CPU_ACCEL_X86_SSE | GIMP_CPU_ACCEL_X86_MMXEXT; if (edx & ARCH_X86_INTEL_FEATURE_XMM2) caps |= GIMP_CPU_ACCEL_X86_SSE2; if (ecx & ARCH_X86_INTEL_FEATURE_PNI) caps |= GIMP_CPU_ACCEL_X86_SSE3; if (ecx & ARCH_X86_INTEL_FEATURE_SSSE3) caps |= GIMP_CPU_ACCEL_X86_SSSE3; if (ecx & ARCH_X86_INTEL_FEATURE_SSE4_1) caps |= GIMP_CPU_ACCEL_X86_SSE4_1; if (ecx & ARCH_X86_INTEL_FEATURE_SSE4_2) caps |= GIMP_CPU_ACCEL_X86_SSE4_2; if (ecx & ARCH_X86_INTEL_FEATURE_AVX) caps |= GIMP_CPU_ACCEL_X86_AVX; #endif /* USE_SSE */ } #endif /* USE_MMX */ return caps; } static guint32 arch_accel_amd (void) { guint32 caps; caps = arch_accel_intel (); #ifdef USE_MMX { guint32 eax, ebx, ecx, edx; cpuid (0x80000000, eax, ebx, ecx, edx); if (eax < 0x80000001) return caps; #ifdef USE_SSE cpuid (0x80000001, eax, ebx, ecx, edx); if (edx & ARCH_X86_AMD_FEATURE_3DNOW) caps |= GIMP_CPU_ACCEL_X86_3DNOW; if (edx & ARCH_X86_AMD_FEATURE_MMXEXT) caps |= GIMP_CPU_ACCEL_X86_MMXEXT; #endif /* USE_SSE */ } #endif /* USE_MMX */ return caps; } static guint32 arch_accel_centaur (void) { guint32 caps; caps = arch_accel_intel (); #ifdef USE_MMX { guint32 eax, ebx, ecx, edx; cpuid (0x80000000, eax, ebx, ecx, edx); if (eax < 0x80000001) return caps; cpuid (0x80000001, eax, ebx, ecx, edx); if (edx & ARCH_X86_CENTAUR_FEATURE_MMX) caps |= GIMP_CPU_ACCEL_X86_MMX; #ifdef USE_SSE if (edx & ARCH_X86_CENTAUR_FEATURE_3DNOW) caps |= GIMP_CPU_ACCEL_X86_3DNOW; if (edx & ARCH_X86_CENTAUR_FEATURE_MMXEXT) caps |= GIMP_CPU_ACCEL_X86_MMXEXT; #endif /* USE_SSE */ } #endif /* USE_MMX */ return caps; } static guint32 arch_accel_cyrix (void) { guint32 caps; caps = arch_accel_intel (); #ifdef USE_MMX { guint32 eax, ebx, ecx, edx; cpuid (0, eax, ebx, ecx, edx); if (eax != 2) return caps; cpuid (0x80000001, eax, ebx, ecx, edx); if (edx & ARCH_X86_CYRIX_FEATURE_MMX) caps |= GIMP_CPU_ACCEL_X86_MMX; #ifdef USE_SSE if (edx & ARCH_X86_CYRIX_FEATURE_MMXEXT) caps |= GIMP_CPU_ACCEL_X86_MMXEXT; #endif /* USE_SSE */ } #endif /* USE_MMX */ return caps; } #ifdef USE_SSE static jmp_buf sigill_return; static void sigill_handler (gint n) { longjmp (sigill_return, 1); } static gboolean arch_accel_sse_os_support (void) { if (setjmp (sigill_return)) { return FALSE; } else { signal (SIGILL, sigill_handler); __asm__ __volatile__ ("xorps %xmm0, %xmm0"); signal (SIGILL, SIG_DFL); } return TRUE; } #endif /* USE_SSE */ static guint32 arch_accel (void) { guint32 caps; X86Vendor vendor; vendor = arch_get_vendor (); switch (vendor) { case ARCH_X86_VENDOR_NONE: caps = 0; break; case ARCH_X86_VENDOR_AMD: case ARCH_X86_VENDOR_HYGON: caps = arch_accel_amd (); break; case ARCH_X86_VENDOR_CENTAUR: caps = arch_accel_centaur (); break; case ARCH_X86_VENDOR_CYRIX: case ARCH_X86_VENDOR_NSC: caps = arch_accel_cyrix (); break; /* check for what Intel speced, even if UNKNOWN */ default: caps = arch_accel_intel (); break; } #ifdef USE_SSE if ((caps & GIMP_CPU_ACCEL_X86_SSE) && !arch_accel_sse_os_support ()) caps &= ~(GIMP_CPU_ACCEL_X86_SSE | GIMP_CPU_ACCEL_X86_SSE2); #endif return caps; } #endif /* ARCH_X86 && USE_MMX && __GNUC__ */ #if defined(ARCH_PPC) && defined (USE_ALTIVEC) #if defined(HAVE_ALTIVEC_SYSCTL) #include #define HAVE_ACCEL 1 static guint32 arch_accel (void) { gint sels[2] = { CTL_HW, HW_VECTORUNIT }; gboolean has_vu = FALSE; gsize length = sizeof(has_vu); gint err; err = sysctl (sels, 2, &has_vu, &length, NULL, 0); if (err == 0 && has_vu) return GIMP_CPU_ACCEL_PPC_ALTIVEC; return 0; } #elif defined(__GNUC__) #define HAVE_ACCEL 1 static sigjmp_buf jmpbuf; static volatile sig_atomic_t canjump = 0; static void sigill_handler (gint sig) { if (!canjump) { signal (sig, SIG_DFL); raise (sig); } canjump = 0; siglongjmp (jmpbuf, 1); } static guint32 arch_accel (void) { signal (SIGILL, sigill_handler); if (sigsetjmp (jmpbuf, 1)) { signal (SIGILL, SIG_DFL); return 0; } canjump = 1; asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0" : : "r" (-1)); signal (SIGILL, SIG_DFL); return GIMP_CPU_ACCEL_PPC_ALTIVEC; } #endif /* __GNUC__ */ #endif /* ARCH_PPC && USE_ALTIVEC */ static GimpCpuAccelFlags cpu_accel (void) { #ifdef HAVE_ACCEL static guint32 accel = ~0U; if (accel != ~0U) return accel; accel = arch_accel (); return (GimpCpuAccelFlags) accel; #else /* !HAVE_ACCEL */ return GIMP_CPU_ACCEL_NONE; #endif }