summaryrefslogtreecommitdiffstats
path: root/libgimpbase/gimpcpuaccel.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--libgimpbase/gimpcpuaccel.c531
1 files changed, 531 insertions, 0 deletions
diff --git a/libgimpbase/gimpcpuaccel.c b/libgimpbase/gimpcpuaccel.c
new file mode 100644
index 0000000..86a2d49
--- /dev/null
+++ b/libgimpbase/gimpcpuaccel.c
@@ -0,0 +1,531 @@
+/* LIBGIMP - The GIMP Library
+ * Copyright (C) 1995-1997 Peter Mattis and Spencer Kimball
+ *
+ * This library is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 3 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see
+ * <https://www.gnu.org/licenses/>.
+ */
+
+/*
+ * x86 bits Copyright (C) Manish Singh <yosh@gimp.org>
+ */
+
+/*
+ * PPC CPU acceleration detection was taken from DirectFB but seems to be
+ * originating from mpeg2dec with the following copyright:
+ *
+ * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ */
+
+#include "config.h"
+
+#include <string.h>
+#include <signal.h>
+#include <setjmp.h>
+
+#include <glib.h>
+
+#include "gimpcpuaccel.h"
+
+
+/**
+ * SECTION: gimpcpuaccel
+ * @title: gimpcpuaccel
+ * @short_description: Functions to query and configure CPU acceleration.
+ *
+ * Functions to query and configure CPU acceleration.
+ **/
+
+
+static GimpCpuAccelFlags cpu_accel (void) G_GNUC_CONST;
+
+
+static gboolean use_cpu_accel = TRUE;
+
+
+/**
+ * gimp_cpu_accel_get_support:
+ *
+ * Query for CPU acceleration support.
+ *
+ * Return value: #GimpCpuAccelFlags as supported by the CPU.
+ *
+ * Since: 2.4
+ */
+GimpCpuAccelFlags
+gimp_cpu_accel_get_support (void)
+{
+ return use_cpu_accel ? cpu_accel () : GIMP_CPU_ACCEL_NONE;
+}
+
+/**
+ * gimp_cpu_accel_set_use:
+ * @use: whether to use CPU acceleration features or not
+ *
+ * This function is for internal use only.
+ *
+ * Since: 2.4
+ */
+void
+gimp_cpu_accel_set_use (gboolean use)
+{
+ use_cpu_accel = use ? TRUE : FALSE;
+}
+
+
+#if defined(ARCH_X86) && defined(USE_MMX) && defined(__GNUC__)
+
+#define HAVE_ACCEL 1
+
+
+typedef enum
+{
+ ARCH_X86_VENDOR_NONE,
+ ARCH_X86_VENDOR_INTEL,
+ ARCH_X86_VENDOR_AMD,
+ ARCH_X86_VENDOR_CENTAUR,
+ ARCH_X86_VENDOR_CYRIX,
+ ARCH_X86_VENDOR_NSC,
+ ARCH_X86_VENDOR_TRANSMETA,
+ ARCH_X86_VENDOR_NEXGEN,
+ ARCH_X86_VENDOR_RISE,
+ ARCH_X86_VENDOR_UMC,
+ ARCH_X86_VENDOR_SIS,
+ ARCH_X86_VENDOR_HYGON,
+ ARCH_X86_VENDOR_UNKNOWN = 0xff
+} X86Vendor;
+
+enum
+{
+ ARCH_X86_INTEL_FEATURE_MMX = 1 << 23,
+ ARCH_X86_INTEL_FEATURE_XMM = 1 << 25,
+ ARCH_X86_INTEL_FEATURE_XMM2 = 1 << 26,
+
+ ARCH_X86_AMD_FEATURE_MMXEXT = 1 << 22,
+ ARCH_X86_AMD_FEATURE_3DNOW = 1 << 31,
+
+ ARCH_X86_CENTAUR_FEATURE_MMX = 1 << 23,
+ ARCH_X86_CENTAUR_FEATURE_MMXEXT = 1 << 24,
+ ARCH_X86_CENTAUR_FEATURE_3DNOW = 1 << 31,
+
+ ARCH_X86_CYRIX_FEATURE_MMX = 1 << 23,
+ ARCH_X86_CYRIX_FEATURE_MMXEXT = 1 << 24
+};
+
+enum
+{
+ ARCH_X86_INTEL_FEATURE_PNI = 1 << 0,
+ ARCH_X86_INTEL_FEATURE_SSSE3 = 1 << 9,
+ ARCH_X86_INTEL_FEATURE_SSE4_1 = 1 << 19,
+ ARCH_X86_INTEL_FEATURE_SSE4_2 = 1 << 20,
+ ARCH_X86_INTEL_FEATURE_AVX = 1 << 28
+};
+
+#if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__))
+#define cpuid(op,eax,ebx,ecx,edx) \
+ __asm__ ("movl %%ebx, %%esi\n\t" \
+ "cpuid\n\t" \
+ "xchgl %%ebx,%%esi" \
+ : "=a" (eax), \
+ "=S" (ebx), \
+ "=c" (ecx), \
+ "=d" (edx) \
+ : "0" (op))
+#else
+#define cpuid(op,eax,ebx,ecx,edx) \
+ __asm__ ("cpuid" \
+ : "=a" (eax), \
+ "=b" (ebx), \
+ "=c" (ecx), \
+ "=d" (edx) \
+ : "0" (op))
+#endif
+
+
+static X86Vendor
+arch_get_vendor (void)
+{
+ guint32 eax, ebx, ecx, edx;
+ union{
+ gchar idaschar[16];
+ int idasint[4];
+ }id;
+
+#ifndef ARCH_X86_64
+ /* Only need to check this on ia32 */
+ __asm__ ("pushfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "movl %0,%1\n\t"
+ "xorl $0x200000,%0\n\t"
+ "pushl %0\n\t"
+ "popfl\n\t"
+ "pushfl\n\t"
+ "popl %0\n\t"
+ "popfl"
+ : "=a" (eax),
+ "=c" (ecx)
+ :
+ : "cc");
+
+ if (eax == ecx)
+ return ARCH_X86_VENDOR_NONE;
+#endif
+
+ cpuid (0, eax, ebx, ecx, edx);
+
+ if (eax == 0)
+ return ARCH_X86_VENDOR_NONE;
+
+ id.idasint[0] = ebx;
+ id.idasint[1] = edx;
+ id.idasint[2] = ecx;
+
+ id.idaschar[12] = '\0';
+
+#ifdef ARCH_X86_64
+ if (strcmp (id.idaschar, "AuthenticAMD") == 0)
+ return ARCH_X86_VENDOR_AMD;
+ else if (strcmp (id.idaschar, "HygonGenuine") == 0)
+ return ARCH_X86_VENDOR_HYGON;
+ else if (strcmp (id.idaschar, "GenuineIntel") == 0)
+ return ARCH_X86_VENDOR_INTEL;
+#else
+ if (strcmp (id.idaschar, "GenuineIntel") == 0)
+ return ARCH_X86_VENDOR_INTEL;
+ else if (strcmp (id.idaschar, "AuthenticAMD") == 0)
+ return ARCH_X86_VENDOR_AMD;
+ else if (strcmp (id.idaschar, "HygonGenuine") == 0)
+ return ARCH_X86_VENDOR_HYGON;
+ else if (strcmp (id.idaschar, "CentaurHauls") == 0)
+ return ARCH_X86_VENDOR_CENTAUR;
+ else if (strcmp (id.idaschar, "CyrixInstead") == 0)
+ return ARCH_X86_VENDOR_CYRIX;
+ else if (strcmp (id.idaschar, "Geode by NSC") == 0)
+ return ARCH_X86_VENDOR_NSC;
+ else if (strcmp (id.idaschar, "GenuineTMx86") == 0 ||
+ strcmp (id.idaschar, "TransmetaCPU") == 0)
+ return ARCH_X86_VENDOR_TRANSMETA;
+ else if (strcmp (id.idaschar, "NexGenDriven") == 0)
+ return ARCH_X86_VENDOR_NEXGEN;
+ else if (strcmp (id.idaschar, "RiseRiseRise") == 0)
+ return ARCH_X86_VENDOR_RISE;
+ else if (strcmp (id.idaschar, "UMC UMC UMC ") == 0)
+ return ARCH_X86_VENDOR_UMC;
+ else if (strcmp (id.idaschar, "SiS SiS SiS ") == 0)
+ return ARCH_X86_VENDOR_SIS;
+#endif
+
+ return ARCH_X86_VENDOR_UNKNOWN;
+}
+
+static guint32
+arch_accel_intel (void)
+{
+ guint32 caps = 0;
+
+#ifdef USE_MMX
+ {
+ guint32 eax, ebx, ecx, edx;
+
+ cpuid (1, eax, ebx, ecx, edx);
+
+ if ((edx & ARCH_X86_INTEL_FEATURE_MMX) == 0)
+ return 0;
+
+ caps = GIMP_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+ if (edx & ARCH_X86_INTEL_FEATURE_XMM)
+ caps |= GIMP_CPU_ACCEL_X86_SSE | GIMP_CPU_ACCEL_X86_MMXEXT;
+
+ if (edx & ARCH_X86_INTEL_FEATURE_XMM2)
+ caps |= GIMP_CPU_ACCEL_X86_SSE2;
+
+ if (ecx & ARCH_X86_INTEL_FEATURE_PNI)
+ caps |= GIMP_CPU_ACCEL_X86_SSE3;
+
+ if (ecx & ARCH_X86_INTEL_FEATURE_SSSE3)
+ caps |= GIMP_CPU_ACCEL_X86_SSSE3;
+
+ if (ecx & ARCH_X86_INTEL_FEATURE_SSE4_1)
+ caps |= GIMP_CPU_ACCEL_X86_SSE4_1;
+
+ if (ecx & ARCH_X86_INTEL_FEATURE_SSE4_2)
+ caps |= GIMP_CPU_ACCEL_X86_SSE4_2;
+
+ if (ecx & ARCH_X86_INTEL_FEATURE_AVX)
+ caps |= GIMP_CPU_ACCEL_X86_AVX;
+#endif /* USE_SSE */
+ }
+#endif /* USE_MMX */
+
+ return caps;
+}
+
+static guint32
+arch_accel_amd (void)
+{
+ guint32 caps;
+
+ caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+ {
+ guint32 eax, ebx, ecx, edx;
+
+ cpuid (0x80000000, eax, ebx, ecx, edx);
+
+ if (eax < 0x80000001)
+ return caps;
+
+#ifdef USE_SSE
+ cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (edx & ARCH_X86_AMD_FEATURE_3DNOW)
+ caps |= GIMP_CPU_ACCEL_X86_3DNOW;
+
+ if (edx & ARCH_X86_AMD_FEATURE_MMXEXT)
+ caps |= GIMP_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+ }
+#endif /* USE_MMX */
+
+ return caps;
+}
+
+static guint32
+arch_accel_centaur (void)
+{
+ guint32 caps;
+
+ caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+ {
+ guint32 eax, ebx, ecx, edx;
+
+ cpuid (0x80000000, eax, ebx, ecx, edx);
+
+ if (eax < 0x80000001)
+ return caps;
+
+ cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (edx & ARCH_X86_CENTAUR_FEATURE_MMX)
+ caps |= GIMP_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+ if (edx & ARCH_X86_CENTAUR_FEATURE_3DNOW)
+ caps |= GIMP_CPU_ACCEL_X86_3DNOW;
+
+ if (edx & ARCH_X86_CENTAUR_FEATURE_MMXEXT)
+ caps |= GIMP_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+ }
+#endif /* USE_MMX */
+
+ return caps;
+}
+
+static guint32
+arch_accel_cyrix (void)
+{
+ guint32 caps;
+
+ caps = arch_accel_intel ();
+
+#ifdef USE_MMX
+ {
+ guint32 eax, ebx, ecx, edx;
+
+ cpuid (0, eax, ebx, ecx, edx);
+
+ if (eax != 2)
+ return caps;
+
+ cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (edx & ARCH_X86_CYRIX_FEATURE_MMX)
+ caps |= GIMP_CPU_ACCEL_X86_MMX;
+
+#ifdef USE_SSE
+ if (edx & ARCH_X86_CYRIX_FEATURE_MMXEXT)
+ caps |= GIMP_CPU_ACCEL_X86_MMXEXT;
+#endif /* USE_SSE */
+ }
+#endif /* USE_MMX */
+
+ return caps;
+}
+
+#ifdef USE_SSE
+static jmp_buf sigill_return;
+
+static void
+sigill_handler (gint n)
+{
+ longjmp (sigill_return, 1);
+}
+
+static gboolean
+arch_accel_sse_os_support (void)
+{
+ if (setjmp (sigill_return))
+ {
+ return FALSE;
+ }
+ else
+ {
+ signal (SIGILL, sigill_handler);
+ __asm__ __volatile__ ("xorps %xmm0, %xmm0");
+ signal (SIGILL, SIG_DFL);
+ }
+
+ return TRUE;
+}
+#endif /* USE_SSE */
+
+static guint32
+arch_accel (void)
+{
+ guint32 caps;
+ X86Vendor vendor;
+
+ vendor = arch_get_vendor ();
+
+ switch (vendor)
+ {
+ case ARCH_X86_VENDOR_NONE:
+ caps = 0;
+ break;
+
+ case ARCH_X86_VENDOR_AMD:
+ case ARCH_X86_VENDOR_HYGON:
+ caps = arch_accel_amd ();
+ break;
+
+ case ARCH_X86_VENDOR_CENTAUR:
+ caps = arch_accel_centaur ();
+ break;
+
+ case ARCH_X86_VENDOR_CYRIX:
+ case ARCH_X86_VENDOR_NSC:
+ caps = arch_accel_cyrix ();
+ break;
+
+ /* check for what Intel speced, even if UNKNOWN */
+ default:
+ caps = arch_accel_intel ();
+ break;
+ }
+
+#ifdef USE_SSE
+ if ((caps & GIMP_CPU_ACCEL_X86_SSE) && !arch_accel_sse_os_support ())
+ caps &= ~(GIMP_CPU_ACCEL_X86_SSE | GIMP_CPU_ACCEL_X86_SSE2);
+#endif
+
+ return caps;
+}
+
+#endif /* ARCH_X86 && USE_MMX && __GNUC__ */
+
+
+#if defined(ARCH_PPC) && defined (USE_ALTIVEC)
+
+#if defined(HAVE_ALTIVEC_SYSCTL)
+
+#include <sys/sysctl.h>
+
+#define HAVE_ACCEL 1
+
+static guint32
+arch_accel (void)
+{
+ gint sels[2] = { CTL_HW, HW_VECTORUNIT };
+ gboolean has_vu = FALSE;
+ gsize length = sizeof(has_vu);
+ gint err;
+
+ err = sysctl (sels, 2, &has_vu, &length, NULL, 0);
+
+ if (err == 0 && has_vu)
+ return GIMP_CPU_ACCEL_PPC_ALTIVEC;
+
+ return 0;
+}
+
+#elif defined(__GNUC__)
+
+#define HAVE_ACCEL 1
+
+static sigjmp_buf jmpbuf;
+static volatile sig_atomic_t canjump = 0;
+
+static void
+sigill_handler (gint sig)
+{
+ if (!canjump)
+ {
+ signal (sig, SIG_DFL);
+ raise (sig);
+ }
+
+ canjump = 0;
+ siglongjmp (jmpbuf, 1);
+}
+
+static guint32
+arch_accel (void)
+{
+ signal (SIGILL, sigill_handler);
+
+ if (sigsetjmp (jmpbuf, 1))
+ {
+ signal (SIGILL, SIG_DFL);
+ return 0;
+ }
+
+ canjump = 1;
+
+ asm volatile ("mtspr 256, %0\n\t"
+ "vand %%v0, %%v0, %%v0"
+ :
+ : "r" (-1));
+
+ signal (SIGILL, SIG_DFL);
+
+ return GIMP_CPU_ACCEL_PPC_ALTIVEC;
+}
+#endif /* __GNUC__ */
+
+#endif /* ARCH_PPC && USE_ALTIVEC */
+
+
+static GimpCpuAccelFlags
+cpu_accel (void)
+{
+#ifdef HAVE_ACCEL
+ static guint32 accel = ~0U;
+
+ if (accel != ~0U)
+ return accel;
+
+ accel = arch_accel ();
+
+ return (GimpCpuAccelFlags) accel;
+
+#else /* !HAVE_ACCEL */
+ return GIMP_CPU_ACCEL_NONE;
+#endif
+}