summaryrefslogtreecommitdiffstats
path: root/security/nss/lib/freebl/mpi/mpcpucache.c
diff options
context:
space:
mode:
Diffstat (limited to 'security/nss/lib/freebl/mpi/mpcpucache.c')
-rw-r--r--security/nss/lib/freebl/mpi/mpcpucache.c788
1 files changed, 788 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpcpucache.c b/security/nss/lib/freebl/mpi/mpcpucache.c
new file mode 100644
index 0000000000..f09ff34469
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache.c
@@ -0,0 +1,788 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "prtypes.h"
+
+/*
+ * This file implements a single function: s_mpi_getProcessorLineSize();
+ * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
+ * if a cache exists, or zero if there is no cache. If more than one
+ * cache line exists, it should return the smallest line size (which is
+ * usually the L1 cache).
+ *
+ * mp_modexp uses this information to make sure that private key information
+ * isn't being leaked through the cache.
+ *
+ * Currently the file returns good data for most modern x86 processors, and
+ * reasonable data on 64-bit ppc processors. All other processors are assumed
+ * to have a cache line size of 32 bytes.
+ *
+ */
+
+#if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+/* X86 processors have special instructions that tell us about the cache */
+#include "string.h"
+
+#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+#define AMD_64 1
+#endif
+
+/* Generic CPUID function */
+#if defined(AMD_64)
+
+#if defined(__GNUC__)
+
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ __asm__("xor %%ecx, %%ecx\n\t"
+ "cpuid\n\t"
+ : "=a"(*eax),
+ "=b"(*ebx),
+ "=c"(*ecx),
+ "=d"(*edx)
+ : "0"(op));
+}
+
+#elif defined(_MSC_VER)
+
+#include <intrin.h>
+
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ int intrinsic_out[4];
+
+ __cpuid(intrinsic_out, op);
+ *eax = intrinsic_out[0];
+ *ebx = intrinsic_out[1];
+ *ecx = intrinsic_out[2];
+ *edx = intrinsic_out[3];
+}
+
+#endif
+
+#else /* !defined(AMD_64) */
+
+/* x86 */
+
+#if defined(__GNUC__)
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ /* Some older processors don't fill the ecx register with cpuid, so clobber it
+ * before calling cpuid, so that there's no risk of picking random bits that
+ * erroneously indicate that absent CPU features are present.
+ * Also, GCC isn't smart enough to save the ebx PIC register on its own
+ * in this case, so do it by hand. Use edi to store ebx and pass the
+ * value returned in ebx from cpuid through edi. */
+ __asm__("xor %%ecx, %%ecx\n\t"
+ "mov %%ebx,%%edi\n\t"
+ "cpuid\n\t"
+ "xchgl %%ebx,%%edi\n\t"
+ : "=a"(*eax),
+ "=D"(*ebx),
+ "=c"(*ecx),
+ "=d"(*edx)
+ : "0"(op));
+}
+
+/*
+ * try flipping a processor flag to determine CPU type
+ */
+static unsigned long
+changeFlag(unsigned long flag)
+{
+ unsigned long changedFlags, originalFlags;
+ __asm__("pushfl\n\t" /* get the flags */
+ "popl %0\n\t"
+ "movl %0,%1\n\t" /* save the original flags */
+ "xorl %2,%0\n\t" /* flip the bit */
+ "pushl %0\n\t" /* set the flags */
+ "popfl\n\t"
+ "pushfl\n\t" /* get the flags again (for return) */
+ "popl %0\n\t"
+ "pushl %1\n\t" /* restore the original flags */
+ "popfl\n\t"
+ : "=r"(changedFlags),
+ "=r"(originalFlags),
+ "=r"(flag)
+ : "2"(flag));
+ return changedFlags ^ originalFlags;
+}
+
+#elif defined(_MSC_VER)
+
+/*
+ * windows versions of the above assembler
+ */
+#define wcpuid __asm __emit 0fh __asm __emit 0a2h
+void
+freebl_cpuid(unsigned long op, unsigned long *Reax,
+ unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
+{
+ unsigned long Leax, Lebx, Lecx, Ledx;
+ __asm {
+ pushad
+ xor ecx,ecx
+ mov eax,op
+ wcpuid
+ mov Leax,eax
+ mov Lebx,ebx
+ mov Lecx,ecx
+ mov Ledx,edx
+ popad
+ }
+ *Reax = Leax;
+ *Rebx = Lebx;
+ *Recx = Lecx;
+ *Redx = Ledx;
+}
+
+static unsigned long
+changeFlag(unsigned long flag)
+{
+ unsigned long changedFlags, originalFlags;
+ __asm {
+ push eax
+ push ebx
+ pushfd /* get the flags */
+ pop eax
+ push eax /* save the flags on the stack */
+ mov originalFlags,eax /* save the original flags */
+ mov ebx,flag
+ xor eax,ebx /* flip the bit */
+ push eax /* set the flags */
+ popfd
+ pushfd /* get the flags again (for return) */
+ pop eax
+ popfd /* restore the original flags */
+ mov changedFlags,eax
+ pop ebx
+ pop eax
+ }
+ return changedFlags ^ originalFlags;
+}
+#endif
+
+#endif
+
+#if !defined(AMD_64)
+#define AC_FLAG 0x40000
+#define ID_FLAG 0x200000
+
+/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
+static int
+is386()
+{
+ return changeFlag(AC_FLAG) == 0;
+}
+
+/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
+static int
+is486()
+{
+ return changeFlag(ID_FLAG) == 0;
+}
+#endif
+
+/*
+ * table for Intel Cache.
+ * See Intel Application Note AP-485 for more information
+ */
+
+typedef unsigned char CacheTypeEntry;
+
+typedef enum {
+ Cache_NONE = 0,
+ Cache_UNKNOWN = 1,
+ Cache_TLB = 2,
+ Cache_TLBi = 3,
+ Cache_TLBd = 4,
+ Cache_Trace = 5,
+ Cache_L1 = 6,
+ Cache_L1i = 7,
+ Cache_L1d = 8,
+ Cache_L2 = 9,
+ Cache_L2i = 10,
+ Cache_L2d = 11,
+ Cache_L3 = 12,
+ Cache_L3i = 13,
+ Cache_L3d = 14
+} CacheType;
+
+struct _cache {
+ CacheTypeEntry type;
+ unsigned char lineSize;
+};
+static const struct _cache CacheMap[256] = {
+ /* 00 */ { Cache_NONE, 0 },
+ /* 01 */ { Cache_TLBi, 0 },
+ /* 02 */ { Cache_TLBi, 0 },
+ /* 03 */ { Cache_TLBd, 0 },
+ /* 04 */ {
+ Cache_TLBd,
+ },
+ /* 05 */ { Cache_UNKNOWN, 0 },
+ /* 06 */ { Cache_L1i, 32 },
+ /* 07 */ { Cache_UNKNOWN, 0 },
+ /* 08 */ { Cache_L1i, 32 },
+ /* 09 */ { Cache_UNKNOWN, 0 },
+ /* 0a */ { Cache_L1d, 32 },
+ /* 0b */ { Cache_UNKNOWN, 0 },
+ /* 0c */ { Cache_L1d, 32 },
+ /* 0d */ { Cache_UNKNOWN, 0 },
+ /* 0e */ { Cache_UNKNOWN, 0 },
+ /* 0f */ { Cache_UNKNOWN, 0 },
+ /* 10 */ { Cache_UNKNOWN, 0 },
+ /* 11 */ { Cache_UNKNOWN, 0 },
+ /* 12 */ { Cache_UNKNOWN, 0 },
+ /* 13 */ { Cache_UNKNOWN, 0 },
+ /* 14 */ { Cache_UNKNOWN, 0 },
+ /* 15 */ { Cache_UNKNOWN, 0 },
+ /* 16 */ { Cache_UNKNOWN, 0 },
+ /* 17 */ { Cache_UNKNOWN, 0 },
+ /* 18 */ { Cache_UNKNOWN, 0 },
+ /* 19 */ { Cache_UNKNOWN, 0 },
+ /* 1a */ { Cache_UNKNOWN, 0 },
+ /* 1b */ { Cache_UNKNOWN, 0 },
+ /* 1c */ { Cache_UNKNOWN, 0 },
+ /* 1d */ { Cache_UNKNOWN, 0 },
+ /* 1e */ { Cache_UNKNOWN, 0 },
+ /* 1f */ { Cache_UNKNOWN, 0 },
+ /* 20 */ { Cache_UNKNOWN, 0 },
+ /* 21 */ { Cache_UNKNOWN, 0 },
+ /* 22 */ { Cache_L3, 64 },
+ /* 23 */ { Cache_L3, 64 },
+ /* 24 */ { Cache_UNKNOWN, 0 },
+ /* 25 */ { Cache_L3, 64 },
+ /* 26 */ { Cache_UNKNOWN, 0 },
+ /* 27 */ { Cache_UNKNOWN, 0 },
+ /* 28 */ { Cache_UNKNOWN, 0 },
+ /* 29 */ { Cache_L3, 64 },
+ /* 2a */ { Cache_UNKNOWN, 0 },
+ /* 2b */ { Cache_UNKNOWN, 0 },
+ /* 2c */ { Cache_L1d, 64 },
+ /* 2d */ { Cache_UNKNOWN, 0 },
+ /* 2e */ { Cache_UNKNOWN, 0 },
+ /* 2f */ { Cache_UNKNOWN, 0 },
+ /* 30 */ { Cache_L1i, 64 },
+ /* 31 */ { Cache_UNKNOWN, 0 },
+ /* 32 */ { Cache_UNKNOWN, 0 },
+ /* 33 */ { Cache_UNKNOWN, 0 },
+ /* 34 */ { Cache_UNKNOWN, 0 },
+ /* 35 */ { Cache_UNKNOWN, 0 },
+ /* 36 */ { Cache_UNKNOWN, 0 },
+ /* 37 */ { Cache_UNKNOWN, 0 },
+ /* 38 */ { Cache_UNKNOWN, 0 },
+ /* 39 */ { Cache_L2, 64 },
+ /* 3a */ { Cache_UNKNOWN, 0 },
+ /* 3b */ { Cache_L2, 64 },
+ /* 3c */ { Cache_L2, 64 },
+ /* 3d */ { Cache_UNKNOWN, 0 },
+ /* 3e */ { Cache_UNKNOWN, 0 },
+ /* 3f */ { Cache_UNKNOWN, 0 },
+ /* 40 */ { Cache_L2, 0 },
+ /* 41 */ { Cache_L2, 32 },
+ /* 42 */ { Cache_L2, 32 },
+ /* 43 */ { Cache_L2, 32 },
+ /* 44 */ { Cache_L2, 32 },
+ /* 45 */ { Cache_L2, 32 },
+ /* 46 */ { Cache_UNKNOWN, 0 },
+ /* 47 */ { Cache_UNKNOWN, 0 },
+ /* 48 */ { Cache_UNKNOWN, 0 },
+ /* 49 */ { Cache_UNKNOWN, 0 },
+ /* 4a */ { Cache_UNKNOWN, 0 },
+ /* 4b */ { Cache_UNKNOWN, 0 },
+ /* 4c */ { Cache_UNKNOWN, 0 },
+ /* 4d */ { Cache_UNKNOWN, 0 },
+ /* 4e */ { Cache_UNKNOWN, 0 },
+ /* 4f */ { Cache_UNKNOWN, 0 },
+ /* 50 */ { Cache_TLBi, 0 },
+ /* 51 */ { Cache_TLBi, 0 },
+ /* 52 */ { Cache_TLBi, 0 },
+ /* 53 */ { Cache_UNKNOWN, 0 },
+ /* 54 */ { Cache_UNKNOWN, 0 },
+ /* 55 */ { Cache_UNKNOWN, 0 },
+ /* 56 */ { Cache_UNKNOWN, 0 },
+ /* 57 */ { Cache_UNKNOWN, 0 },
+ /* 58 */ { Cache_UNKNOWN, 0 },
+ /* 59 */ { Cache_UNKNOWN, 0 },
+ /* 5a */ { Cache_UNKNOWN, 0 },
+ /* 5b */ { Cache_TLBd, 0 },
+ /* 5c */ { Cache_TLBd, 0 },
+ /* 5d */ { Cache_TLBd, 0 },
+ /* 5e */ { Cache_UNKNOWN, 0 },
+ /* 5f */ { Cache_UNKNOWN, 0 },
+ /* 60 */ { Cache_UNKNOWN, 0 },
+ /* 61 */ { Cache_UNKNOWN, 0 },
+ /* 62 */ { Cache_UNKNOWN, 0 },
+ /* 63 */ { Cache_UNKNOWN, 0 },
+ /* 64 */ { Cache_UNKNOWN, 0 },
+ /* 65 */ { Cache_UNKNOWN, 0 },
+ /* 66 */ { Cache_L1d, 64 },
+ /* 67 */ { Cache_L1d, 64 },
+ /* 68 */ { Cache_L1d, 64 },
+ /* 69 */ { Cache_UNKNOWN, 0 },
+ /* 6a */ { Cache_UNKNOWN, 0 },
+ /* 6b */ { Cache_UNKNOWN, 0 },
+ /* 6c */ { Cache_UNKNOWN, 0 },
+ /* 6d */ { Cache_UNKNOWN, 0 },
+ /* 6e */ { Cache_UNKNOWN, 0 },
+ /* 6f */ { Cache_UNKNOWN, 0 },
+ /* 70 */ { Cache_Trace, 1 },
+ /* 71 */ { Cache_Trace, 1 },
+ /* 72 */ { Cache_Trace, 1 },
+ /* 73 */ { Cache_UNKNOWN, 0 },
+ /* 74 */ { Cache_UNKNOWN, 0 },
+ /* 75 */ { Cache_UNKNOWN, 0 },
+ /* 76 */ { Cache_UNKNOWN, 0 },
+ /* 77 */ { Cache_UNKNOWN, 0 },
+ /* 78 */ { Cache_UNKNOWN, 0 },
+ /* 79 */ { Cache_L2, 64 },
+ /* 7a */ { Cache_L2, 64 },
+ /* 7b */ { Cache_L2, 64 },
+ /* 7c */ { Cache_L2, 64 },
+ /* 7d */ { Cache_UNKNOWN, 0 },
+ /* 7e */ { Cache_UNKNOWN, 0 },
+ /* 7f */ { Cache_UNKNOWN, 0 },
+ /* 80 */ { Cache_UNKNOWN, 0 },
+ /* 81 */ { Cache_UNKNOWN, 0 },
+ /* 82 */ { Cache_L2, 32 },
+ /* 83 */ { Cache_L2, 32 },
+ /* 84 */ { Cache_L2, 32 },
+ /* 85 */ { Cache_L2, 32 },
+ /* 86 */ { Cache_L2, 64 },
+ /* 87 */ { Cache_L2, 64 },
+ /* 88 */ { Cache_UNKNOWN, 0 },
+ /* 89 */ { Cache_UNKNOWN, 0 },
+ /* 8a */ { Cache_UNKNOWN, 0 },
+ /* 8b */ { Cache_UNKNOWN, 0 },
+ /* 8c */ { Cache_UNKNOWN, 0 },
+ /* 8d */ { Cache_UNKNOWN, 0 },
+ /* 8e */ { Cache_UNKNOWN, 0 },
+ /* 8f */ { Cache_UNKNOWN, 0 },
+ /* 90 */ { Cache_UNKNOWN, 0 },
+ /* 91 */ { Cache_UNKNOWN, 0 },
+ /* 92 */ { Cache_UNKNOWN, 0 },
+ /* 93 */ { Cache_UNKNOWN, 0 },
+ /* 94 */ { Cache_UNKNOWN, 0 },
+ /* 95 */ { Cache_UNKNOWN, 0 },
+ /* 96 */ { Cache_UNKNOWN, 0 },
+ /* 97 */ { Cache_UNKNOWN, 0 },
+ /* 98 */ { Cache_UNKNOWN, 0 },
+ /* 99 */ { Cache_UNKNOWN, 0 },
+ /* 9a */ { Cache_UNKNOWN, 0 },
+ /* 9b */ { Cache_UNKNOWN, 0 },
+ /* 9c */ { Cache_UNKNOWN, 0 },
+ /* 9d */ { Cache_UNKNOWN, 0 },
+ /* 9e */ { Cache_UNKNOWN, 0 },
+ /* 9f */ { Cache_UNKNOWN, 0 },
+ /* a0 */ { Cache_UNKNOWN, 0 },
+ /* a1 */ { Cache_UNKNOWN, 0 },
+ /* a2 */ { Cache_UNKNOWN, 0 },
+ /* a3 */ { Cache_UNKNOWN, 0 },
+ /* a4 */ { Cache_UNKNOWN, 0 },
+ /* a5 */ { Cache_UNKNOWN, 0 },
+ /* a6 */ { Cache_UNKNOWN, 0 },
+ /* a7 */ { Cache_UNKNOWN, 0 },
+ /* a8 */ { Cache_UNKNOWN, 0 },
+ /* a9 */ { Cache_UNKNOWN, 0 },
+ /* aa */ { Cache_UNKNOWN, 0 },
+ /* ab */ { Cache_UNKNOWN, 0 },
+ /* ac */ { Cache_UNKNOWN, 0 },
+ /* ad */ { Cache_UNKNOWN, 0 },
+ /* ae */ { Cache_UNKNOWN, 0 },
+ /* af */ { Cache_UNKNOWN, 0 },
+ /* b0 */ { Cache_TLBi, 0 },
+ /* b1 */ { Cache_UNKNOWN, 0 },
+ /* b2 */ { Cache_UNKNOWN, 0 },
+ /* b3 */ { Cache_TLBd, 0 },
+ /* b4 */ { Cache_UNKNOWN, 0 },
+ /* b5 */ { Cache_UNKNOWN, 0 },
+ /* b6 */ { Cache_UNKNOWN, 0 },
+ /* b7 */ { Cache_UNKNOWN, 0 },
+ /* b8 */ { Cache_UNKNOWN, 0 },
+ /* b9 */ { Cache_UNKNOWN, 0 },
+ /* ba */ { Cache_UNKNOWN, 0 },
+ /* bb */ { Cache_UNKNOWN, 0 },
+ /* bc */ { Cache_UNKNOWN, 0 },
+ /* bd */ { Cache_UNKNOWN, 0 },
+ /* be */ { Cache_UNKNOWN, 0 },
+ /* bf */ { Cache_UNKNOWN, 0 },
+ /* c0 */ { Cache_UNKNOWN, 0 },
+ /* c1 */ { Cache_UNKNOWN, 0 },
+ /* c2 */ { Cache_UNKNOWN, 0 },
+ /* c3 */ { Cache_UNKNOWN, 0 },
+ /* c4 */ { Cache_UNKNOWN, 0 },
+ /* c5 */ { Cache_UNKNOWN, 0 },
+ /* c6 */ { Cache_UNKNOWN, 0 },
+ /* c7 */ { Cache_UNKNOWN, 0 },
+ /* c8 */ { Cache_UNKNOWN, 0 },
+ /* c9 */ { Cache_UNKNOWN, 0 },
+ /* ca */ { Cache_UNKNOWN, 0 },
+ /* cb */ { Cache_UNKNOWN, 0 },
+ /* cc */ { Cache_UNKNOWN, 0 },
+ /* cd */ { Cache_UNKNOWN, 0 },
+ /* ce */ { Cache_UNKNOWN, 0 },
+ /* cf */ { Cache_UNKNOWN, 0 },
+ /* d0 */ { Cache_UNKNOWN, 0 },
+ /* d1 */ { Cache_UNKNOWN, 0 },
+ /* d2 */ { Cache_UNKNOWN, 0 },
+ /* d3 */ { Cache_UNKNOWN, 0 },
+ /* d4 */ { Cache_UNKNOWN, 0 },
+ /* d5 */ { Cache_UNKNOWN, 0 },
+ /* d6 */ { Cache_UNKNOWN, 0 },
+ /* d7 */ { Cache_UNKNOWN, 0 },
+ /* d8 */ { Cache_UNKNOWN, 0 },
+ /* d9 */ { Cache_UNKNOWN, 0 },
+ /* da */ { Cache_UNKNOWN, 0 },
+ /* db */ { Cache_UNKNOWN, 0 },
+ /* dc */ { Cache_UNKNOWN, 0 },
+ /* dd */ { Cache_UNKNOWN, 0 },
+ /* de */ { Cache_UNKNOWN, 0 },
+ /* df */ { Cache_UNKNOWN, 0 },
+ /* e0 */ { Cache_UNKNOWN, 0 },
+ /* e1 */ { Cache_UNKNOWN, 0 },
+ /* e2 */ { Cache_UNKNOWN, 0 },
+ /* e3 */ { Cache_UNKNOWN, 0 },
+ /* e4 */ { Cache_UNKNOWN, 0 },
+ /* e5 */ { Cache_UNKNOWN, 0 },
+ /* e6 */ { Cache_UNKNOWN, 0 },
+ /* e7 */ { Cache_UNKNOWN, 0 },
+ /* e8 */ { Cache_UNKNOWN, 0 },
+ /* e9 */ { Cache_UNKNOWN, 0 },
+ /* ea */ { Cache_UNKNOWN, 0 },
+ /* eb */ { Cache_UNKNOWN, 0 },
+ /* ec */ { Cache_UNKNOWN, 0 },
+ /* ed */ { Cache_UNKNOWN, 0 },
+ /* ee */ { Cache_UNKNOWN, 0 },
+ /* ef */ { Cache_UNKNOWN, 0 },
+ /* f0 */ { Cache_UNKNOWN, 0 },
+ /* f1 */ { Cache_UNKNOWN, 0 },
+ /* f2 */ { Cache_UNKNOWN, 0 },
+ /* f3 */ { Cache_UNKNOWN, 0 },
+ /* f4 */ { Cache_UNKNOWN, 0 },
+ /* f5 */ { Cache_UNKNOWN, 0 },
+ /* f6 */ { Cache_UNKNOWN, 0 },
+ /* f7 */ { Cache_UNKNOWN, 0 },
+ /* f8 */ { Cache_UNKNOWN, 0 },
+ /* f9 */ { Cache_UNKNOWN, 0 },
+ /* fa */ { Cache_UNKNOWN, 0 },
+ /* fb */ { Cache_UNKNOWN, 0 },
+ /* fc */ { Cache_UNKNOWN, 0 },
+ /* fd */ { Cache_UNKNOWN, 0 },
+ /* fe */ { Cache_UNKNOWN, 0 },
+ /* ff */ { Cache_UNKNOWN, 0 }
+};
+
+/*
+ * use the above table to determine the CacheEntryLineSize.
+ */
+static void
+getIntelCacheEntryLineSize(unsigned long val, int *level,
+ unsigned long *lineSize)
+{
+ CacheType type;
+
+ type = CacheMap[val].type;
+ /* only interested in data caches */
+ /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
+ * this data check has the side effect of rejecting that entry. If
+ * that wasn't the case, we could have to reject it explicitly */
+ if (CacheMap[val].lineSize == 0) {
+ return;
+ }
+ /* look at the caches, skip types we aren't interested in.
+ * if we already have a value for a lower level cache, skip the
+ * current entry */
+ if ((type == Cache_L1) || (type == Cache_L1d)) {
+ *level = 1;
+ *lineSize = CacheMap[val].lineSize;
+ } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
+ *level = 2;
+ *lineSize = CacheMap[val].lineSize;
+ } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
+ *level = 3;
+ *lineSize = CacheMap[val].lineSize;
+ }
+ return;
+}
+
+static void
+getIntelRegisterCacheLineSize(unsigned long val,
+ int *level, unsigned long *lineSize)
+{
+ getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
+ getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
+ getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
+ getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
+}
+
+/*
+ * returns '0' if no recognized cache is found, or if the cache
+ * information is supported by this processor
+ */
+static unsigned long
+getIntelCacheLineSize(int cpuidLevel)
+{
+ int level = 4;
+ unsigned long lineSize = 0;
+ unsigned long eax, ebx, ecx, edx;
+ int repeat, count;
+
+ if (cpuidLevel < 2) {
+ return 0;
+ }
+
+ /* command '2' of the cpuid is intel's cache info call. Each byte of the
+ * 4 registers contain a potential descriptor for the cache. The CacheMap
+ * table maps the cache entry with the processor cache. Register 'al'
+ * contains a count value that cpuid '2' needs to be called in order to
+ * find all the cache descriptors. Only registers with the high bit set
+ * to 'zero' have valid descriptors. This code loops through all the
+ * required calls to cpuid '2' and passes any valid descriptors it finds
+ * to the getIntelRegisterCacheLineSize code, which breaks the registers
+ * down into their component descriptors. In the end the lineSize of the
+ * lowest level cache data cache is returned. */
+ freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
+ repeat = eax & 0xf;
+ for (count = 0; count < repeat; count++) {
+ if ((eax & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
+ }
+ if ((ebx & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
+ }
+ if ((ecx & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
+ }
+ if ((edx & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(edx, &level, &lineSize);
+ }
+ if (count + 1 != repeat) {
+ freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
+ }
+ }
+ return lineSize;
+}
+
+/*
+ * returns '0' if the cache info is not supported by this processor.
+ * This is based on the AMD extended cache commands for cpuid.
+ * (see "AMD Processor Recognition Application Note" Publication 20734).
+ * Some other processors use the identical scheme.
+ * (see "Processor Recognition, Transmeta Corporation").
+ */
+static unsigned long
+getOtherCacheLineSize(unsigned long cpuidLevel)
+{
+ unsigned long lineSize = 0;
+ unsigned long eax, ebx, ecx, edx;
+
+ /* get the Extended CPUID level */
+ freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+ cpuidLevel = eax;
+
+ if (cpuidLevel >= 0x80000005) {
+ freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+ lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
+ }
+ return lineSize;
+}
+
+static const char *const manMap[] = {
+#define INTEL 0
+ "GenuineIntel",
+#define AMD 1
+ "AuthenticAMD",
+#define CYRIX 2
+ "CyrixInstead",
+#define CENTAUR 2
+ "CentaurHauls",
+#define NEXGEN 3
+ "NexGenDriven",
+#define TRANSMETA 4
+ "GenuineTMx86",
+#define RISE 5
+ "RiseRiseRise",
+#define UMC 6
+ "UMC UMC UMC ",
+#define SIS 7
+ "Sis Sis Sis ",
+#define NATIONAL 8
+ "Geode by NSC",
+};
+
+static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]);
+
+#define MAN_UNKNOWN 9
+
+#if !defined(AMD_64)
+#define SSE2_FLAG (1 << 26)
+unsigned long
+s_mpi_is_sse2()
+{
+ unsigned long eax, ebx, ecx, edx;
+
+ if (is386() || is486()) {
+ return 0;
+ }
+ freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
+
+ /* has no SSE2 extensions */
+ if (eax == 0) {
+ return 0;
+ }
+
+ freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
+ return (edx & SSE2_FLAG) == SSE2_FLAG;
+}
+#endif
+
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ unsigned long eax, ebx, ecx, edx;
+ PRUint32 cpuid[3];
+ unsigned long cpuidLevel;
+ unsigned long cacheLineSize = 0;
+ int manufacturer = MAN_UNKNOWN;
+ int i;
+ char string[13];
+
+#if !defined(AMD_64)
+ if (is386()) {
+ return 0; /* 386 had no cache */
+ }
+ if (is486()) {
+ return 32; /* really? need more info */
+ }
+#endif
+
+ /* Pentium, cpuid command is available */
+ freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
+ cpuidLevel = eax;
+ /* string holds the CPU's manufacturer ID string - a twelve
+ * character ASCII string stored in ebx, edx, ecx, and
+ * the 32-bit extended feature flags are in edx, ecx.
+ */
+ cpuid[0] = ebx;
+ cpuid[1] = ecx;
+ cpuid[2] = edx;
+ memcpy(string, cpuid, sizeof(cpuid));
+ string[12] = 0;
+
+ manufacturer = MAN_UNKNOWN;
+ for (i = 0; i < n_manufacturers; i++) {
+ if (strcmp(manMap[i], string) == 0) {
+ manufacturer = i;
+ }
+ }
+
+ if (manufacturer == INTEL) {
+ cacheLineSize = getIntelCacheLineSize(cpuidLevel);
+ } else {
+ cacheLineSize = getOtherCacheLineSize(cpuidLevel);
+ }
+ /* doesn't support cache info based on cpuid. This means
+ * an old pentium class processor, which have cache lines of
+ * 32. If we learn differently, we can use a switch based on
+ * the Manufacturer id */
+ if (cacheLineSize == 0) {
+ cacheLineSize = 32;
+ }
+ return cacheLineSize;
+}
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+#if defined(__ppc64__)
+/*
+ * Sigh, The PPC has some really nice features to help us determine cache
+ * size, since it had lots of direct control functions to do so. The POWER
+ * processor even has an instruction to do this, but it was dropped in
+ * PowerPC. Unfortunately most of them are not available in user mode.
+ *
+ * The dcbz function would be a great way to determine cache line size except
+ * 1) it only works on write-back memory (it throws an exception otherwise),
+ * and 2) because so many mac programs 'knew' the processor cache size was
+ * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
+ * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
+ * these programs happy. dcbzl work if 64 bit instructions are supported.
+ * If you know 64 bit instructions are supported, and that stack is
+ * write-back, you can use this code.
+ */
+#include "memory.h"
+
+/* clear the cache line that contains 'array' */
+static inline void
+dcbzl(char *array)
+{
+ register char *a asm("r2") = array;
+ __asm__ __volatile__("dcbzl %0,0"
+ : "=r"(a)
+ : "0"(a));
+}
+
+#define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1)))
+
+#define PPC_MAX_LINE_SIZE 256
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ char testArray[2 * PPC_MAX_LINE_SIZE + 1];
+ char *test;
+ int i;
+
+ /* align the array on a maximum line size boundary, so we
+ * know we are starting to clear from the first address */
+ test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
+ /* set all the values to 1's */
+ memset(test, 0xff, PPC_MAX_LINE_SIZE);
+ /* clear one cache block starting at 'test' */
+ dcbzl(test);
+
+ /* find the size of the cleared area, that's our block size */
+ for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) {
+ if (test[i - 1] == 0) {
+ return i;
+ }
+ }
+ return 0;
+}
+
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+/*
+ * put other processor and platform specific cache code here
+ * return the smallest cache line size in bytes on the processor
+ * (usually the L1 cache). If the OS has a call, this would be
+ * a greate place to put it.
+ *
+ * If there is no cache, return 0;
+ *
+ * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
+ * below aren't compiled.
+ *
+ */
+
+/* If no way to get the processor cache line size has been defined, assume
+ * it's 32 bytes (most common value, does not significantly impact performance)
+ */
+#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ return 32;
+}
+#endif