diff options
Diffstat (limited to 'security/nss/lib/freebl/mpi/mpcpucache.c')
-rw-r--r-- | security/nss/lib/freebl/mpi/mpcpucache.c | 788 |
1 files changed, 788 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpcpucache.c b/security/nss/lib/freebl/mpi/mpcpucache.c new file mode 100644 index 0000000000..ddc21ec1cb --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpcpucache.c @@ -0,0 +1,788 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "prtypes.h" + +/* + * This file implements a single function: s_mpi_getProcessorLineSize(); + * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line + * if a cache exists, or zero if there is no cache. If more than one + * cache line exists, it should return the smallest line size (which is + * usually the L1 cache). + * + * mp_modexp uses this information to make sure that private key information + * isn't being leaked through the cache. + * + * Currently the file returns good data for most modern x86 processors, and + * reasonable data on 64-bit ppc processors. All other processors are assumed + * to have a cache line size of 32 bytes. + * + */ + +#if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) +/* X86 processors have special instructions that tell us about the cache */ +#include "string.h" + +#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) +#define AMD_64 1 +#endif + +/* Generic CPUID function */ +#if defined(AMD_64) + +#if defined(__GNUC__) + +void +freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx) +{ + __asm__("xor %%ecx, %%ecx\n\t" + "cpuid\n\t" + : "=a"(*eax), + "=b"(*ebx), + "=c"(*ecx), + "=d"(*edx) + : "0"(op)); +} + +#elif defined(_MSC_VER) + +#include <intrin.h> + +void +freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx) +{ + int intrinsic_out[4]; + + __cpuid(intrinsic_out, op); + *eax = intrinsic_out[0]; + *ebx = intrinsic_out[1]; + *ecx = intrinsic_out[2]; + *edx = intrinsic_out[3]; +} + +#endif + +#else /* !defined(AMD_64) */ + +/* x86 */ + +#if defined(__GNUC__) +void +freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx) +{ + /* Some older processors don't fill the ecx register with cpuid, so clobber it + * before calling cpuid, so that there's no risk of picking random bits that + * erroneously indicate that absent CPU features are present. + * Also, GCC isn't smart enough to save the ebx PIC register on its own + * in this case, so do it by hand. Use edi to store ebx and pass the + * value returned in ebx from cpuid through edi. */ + __asm__("xor %%ecx, %%ecx\n\t" + "mov %%ebx,%%edi\n\t" + "cpuid\n\t" + "xchgl %%ebx,%%edi\n\t" + : "=a"(*eax), + "=D"(*ebx), + "=c"(*ecx), + "=d"(*edx) + : "0"(op)); +} + +/* + * try flipping a processor flag to determine CPU type + */ +static unsigned long +changeFlag(unsigned long flag) +{ + unsigned long changedFlags, originalFlags; + __asm__("pushfl\n\t" /* get the flags */ + "popl %0\n\t" + "movl %0,%1\n\t" /* save the original flags */ + "xorl %2,%0\n\t" /* flip the bit */ + "pushl %0\n\t" /* set the flags */ + "popfl\n\t" + "pushfl\n\t" /* get the flags again (for return) */ + "popl %0\n\t" + "pushl %1\n\t" /* restore the original flags */ + "popfl\n\t" + : "=r"(changedFlags), + "=r"(originalFlags), + "=r"(flag) + : "2"(flag)); + return changedFlags ^ originalFlags; +} + +#elif defined(_MSC_VER) + +/* + * windows versions of the above assembler + */ +#define wcpuid __asm __emit 0fh __asm __emit 0a2h +void +freebl_cpuid(unsigned long op, unsigned long *Reax, + unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) +{ + unsigned long Leax, Lebx, Lecx, Ledx; + __asm { + pushad + xor ecx,ecx + mov eax,op + wcpuid + mov Leax,eax + mov Lebx,ebx + mov Lecx,ecx + mov Ledx,edx + popad + } + *Reax = Leax; + *Rebx = Lebx; + *Recx = Lecx; + *Redx = Ledx; +} + +static unsigned long +changeFlag(unsigned long flag) +{ + unsigned long changedFlags, originalFlags; + __asm { + push eax + push ebx + pushfd /* get the flags */ + pop eax + push eax /* save the flags on the stack */ + mov originalFlags,eax /* save the original flags */ + mov ebx,flag + xor eax,ebx /* flip the bit */ + push eax /* set the flags */ + popfd + pushfd /* get the flags again (for return) */ + pop eax + popfd /* restore the original flags */ + mov changedFlags,eax + pop ebx + pop eax + } + return changedFlags ^ originalFlags; +} +#endif + +#endif + +#if !defined(AMD_64) +#define AC_FLAG 0x40000 +#define ID_FLAG 0x200000 + +/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ +static int +is386() +{ + return changeFlag(AC_FLAG) == 0; +} + +/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ +static int +is486() +{ + return changeFlag(ID_FLAG) == 0; +} +#endif + +/* + * table for Intel Cache. + * See Intel Application Note AP-485 for more information + */ + +typedef unsigned char CacheTypeEntry; + +typedef enum { + Cache_NONE = 0, + Cache_UNKNOWN = 1, + Cache_TLB = 2, + Cache_TLBi = 3, + Cache_TLBd = 4, + Cache_Trace = 5, + Cache_L1 = 6, + Cache_L1i = 7, + Cache_L1d = 8, + Cache_L2 = 9, + Cache_L2i = 10, + Cache_L2d = 11, + Cache_L3 = 12, + Cache_L3i = 13, + Cache_L3d = 14 +} CacheType; + +struct _cache { + CacheTypeEntry type; + unsigned char lineSize; +}; +static const struct _cache CacheMap[256] = { + /* 00 */ { Cache_NONE, 0 }, + /* 01 */ { Cache_TLBi, 0 }, + /* 02 */ { Cache_TLBi, 0 }, + /* 03 */ { Cache_TLBd, 0 }, + /* 04 */ { + Cache_TLBd, + }, + /* 05 */ { Cache_UNKNOWN, 0 }, + /* 06 */ { Cache_L1i, 32 }, + /* 07 */ { Cache_UNKNOWN, 0 }, + /* 08 */ { Cache_L1i, 32 }, + /* 09 */ { Cache_UNKNOWN, 0 }, + /* 0a */ { Cache_L1d, 32 }, + /* 0b */ { Cache_UNKNOWN, 0 }, + /* 0c */ { Cache_L1d, 32 }, + /* 0d */ { Cache_UNKNOWN, 0 }, + /* 0e */ { Cache_UNKNOWN, 0 }, + /* 0f */ { Cache_UNKNOWN, 0 }, + /* 10 */ { Cache_UNKNOWN, 0 }, + /* 11 */ { Cache_UNKNOWN, 0 }, + /* 12 */ { Cache_UNKNOWN, 0 }, + /* 13 */ { Cache_UNKNOWN, 0 }, + /* 14 */ { Cache_UNKNOWN, 0 }, + /* 15 */ { Cache_UNKNOWN, 0 }, + /* 16 */ { Cache_UNKNOWN, 0 }, + /* 17 */ { Cache_UNKNOWN, 0 }, + /* 18 */ { Cache_UNKNOWN, 0 }, + /* 19 */ { Cache_UNKNOWN, 0 }, + /* 1a */ { Cache_UNKNOWN, 0 }, + /* 1b */ { Cache_UNKNOWN, 0 }, + /* 1c */ { Cache_UNKNOWN, 0 }, + /* 1d */ { Cache_UNKNOWN, 0 }, + /* 1e */ { Cache_UNKNOWN, 0 }, + /* 1f */ { Cache_UNKNOWN, 0 }, + /* 20 */ { Cache_UNKNOWN, 0 }, + /* 21 */ { Cache_UNKNOWN, 0 }, + /* 22 */ { Cache_L3, 64 }, + /* 23 */ { Cache_L3, 64 }, + /* 24 */ { Cache_UNKNOWN, 0 }, + /* 25 */ { Cache_L3, 64 }, + /* 26 */ { Cache_UNKNOWN, 0 }, + /* 27 */ { Cache_UNKNOWN, 0 }, + /* 28 */ { Cache_UNKNOWN, 0 }, + /* 29 */ { Cache_L3, 64 }, + /* 2a */ { Cache_UNKNOWN, 0 }, + /* 2b */ { Cache_UNKNOWN, 0 }, + /* 2c */ { Cache_L1d, 64 }, + /* 2d */ { Cache_UNKNOWN, 0 }, + /* 2e */ { Cache_UNKNOWN, 0 }, + /* 2f */ { Cache_UNKNOWN, 0 }, + /* 30 */ { Cache_L1i, 64 }, + /* 31 */ { Cache_UNKNOWN, 0 }, + /* 32 */ { Cache_UNKNOWN, 0 }, + /* 33 */ { Cache_UNKNOWN, 0 }, + /* 34 */ { Cache_UNKNOWN, 0 }, + /* 35 */ { Cache_UNKNOWN, 0 }, + /* 36 */ { Cache_UNKNOWN, 0 }, + /* 37 */ { Cache_UNKNOWN, 0 }, + /* 38 */ { Cache_UNKNOWN, 0 }, + /* 39 */ { Cache_L2, 64 }, + /* 3a */ { Cache_UNKNOWN, 0 }, + /* 3b */ { Cache_L2, 64 }, + /* 3c */ { Cache_L2, 64 }, + /* 3d */ { Cache_UNKNOWN, 0 }, + /* 3e */ { Cache_UNKNOWN, 0 }, + /* 3f */ { Cache_UNKNOWN, 0 }, + /* 40 */ { Cache_L2, 0 }, + /* 41 */ { Cache_L2, 32 }, + /* 42 */ { Cache_L2, 32 }, + /* 43 */ { Cache_L2, 32 }, + /* 44 */ { Cache_L2, 32 }, + /* 45 */ { Cache_L2, 32 }, + /* 46 */ { Cache_UNKNOWN, 0 }, + /* 47 */ { Cache_UNKNOWN, 0 }, + /* 48 */ { Cache_UNKNOWN, 0 }, + /* 49 */ { Cache_UNKNOWN, 0 }, + /* 4a */ { Cache_UNKNOWN, 0 }, + /* 4b */ { Cache_UNKNOWN, 0 }, + /* 4c */ { Cache_UNKNOWN, 0 }, + /* 4d */ { Cache_UNKNOWN, 0 }, + /* 4e */ { Cache_UNKNOWN, 0 }, + /* 4f */ { Cache_UNKNOWN, 0 }, + /* 50 */ { Cache_TLBi, 0 }, + /* 51 */ { Cache_TLBi, 0 }, + /* 52 */ { Cache_TLBi, 0 }, + /* 53 */ { Cache_UNKNOWN, 0 }, + /* 54 */ { Cache_UNKNOWN, 0 }, + /* 55 */ { Cache_UNKNOWN, 0 }, + /* 56 */ { Cache_UNKNOWN, 0 }, + /* 57 */ { Cache_UNKNOWN, 0 }, + /* 58 */ { Cache_UNKNOWN, 0 }, + /* 59 */ { Cache_UNKNOWN, 0 }, + /* 5a */ { Cache_UNKNOWN, 0 }, + /* 5b */ { Cache_TLBd, 0 }, + /* 5c */ { Cache_TLBd, 0 }, + /* 5d */ { Cache_TLBd, 0 }, + /* 5e */ { Cache_UNKNOWN, 0 }, + /* 5f */ { Cache_UNKNOWN, 0 }, + /* 60 */ { Cache_UNKNOWN, 0 }, + /* 61 */ { Cache_UNKNOWN, 0 }, + /* 62 */ { Cache_UNKNOWN, 0 }, + /* 63 */ { Cache_UNKNOWN, 0 }, + /* 64 */ { Cache_UNKNOWN, 0 }, + /* 65 */ { Cache_UNKNOWN, 0 }, + /* 66 */ { Cache_L1d, 64 }, + /* 67 */ { Cache_L1d, 64 }, + /* 68 */ { Cache_L1d, 64 }, + /* 69 */ { Cache_UNKNOWN, 0 }, + /* 6a */ { Cache_UNKNOWN, 0 }, + /* 6b */ { Cache_UNKNOWN, 0 }, + /* 6c */ { Cache_UNKNOWN, 0 }, + /* 6d */ { Cache_UNKNOWN, 0 }, + /* 6e */ { Cache_UNKNOWN, 0 }, + /* 6f */ { Cache_UNKNOWN, 0 }, + /* 70 */ { Cache_Trace, 1 }, + /* 71 */ { Cache_Trace, 1 }, + /* 72 */ { Cache_Trace, 1 }, + /* 73 */ { Cache_UNKNOWN, 0 }, + /* 74 */ { Cache_UNKNOWN, 0 }, + /* 75 */ { Cache_UNKNOWN, 0 }, + /* 76 */ { Cache_UNKNOWN, 0 }, + /* 77 */ { Cache_UNKNOWN, 0 }, + /* 78 */ { Cache_UNKNOWN, 0 }, + /* 79 */ { Cache_L2, 64 }, + /* 7a */ { Cache_L2, 64 }, + /* 7b */ { Cache_L2, 64 }, + /* 7c */ { Cache_L2, 64 }, + /* 7d */ { Cache_UNKNOWN, 0 }, + /* 7e */ { Cache_UNKNOWN, 0 }, + /* 7f */ { Cache_UNKNOWN, 0 }, + /* 80 */ { Cache_UNKNOWN, 0 }, + /* 81 */ { Cache_UNKNOWN, 0 }, + /* 82 */ { Cache_L2, 32 }, + /* 83 */ { Cache_L2, 32 }, + /* 84 */ { Cache_L2, 32 }, + /* 85 */ { Cache_L2, 32 }, + /* 86 */ { Cache_L2, 64 }, + /* 87 */ { Cache_L2, 64 }, + /* 88 */ { Cache_UNKNOWN, 0 }, + /* 89 */ { Cache_UNKNOWN, 0 }, + /* 8a */ { Cache_UNKNOWN, 0 }, + /* 8b */ { Cache_UNKNOWN, 0 }, + /* 8c */ { Cache_UNKNOWN, 0 }, + /* 8d */ { Cache_UNKNOWN, 0 }, + /* 8e */ { Cache_UNKNOWN, 0 }, + /* 8f */ { Cache_UNKNOWN, 0 }, + /* 90 */ { Cache_UNKNOWN, 0 }, + /* 91 */ { Cache_UNKNOWN, 0 }, + /* 92 */ { Cache_UNKNOWN, 0 }, + /* 93 */ { Cache_UNKNOWN, 0 }, + /* 94 */ { Cache_UNKNOWN, 0 }, + /* 95 */ { Cache_UNKNOWN, 0 }, + /* 96 */ { Cache_UNKNOWN, 0 }, + /* 97 */ { Cache_UNKNOWN, 0 }, + /* 98 */ { Cache_UNKNOWN, 0 }, + /* 99 */ { Cache_UNKNOWN, 0 }, + /* 9a */ { Cache_UNKNOWN, 0 }, + /* 9b */ { Cache_UNKNOWN, 0 }, + /* 9c */ { Cache_UNKNOWN, 0 }, + /* 9d */ { Cache_UNKNOWN, 0 }, + /* 9e */ { Cache_UNKNOWN, 0 }, + /* 9f */ { Cache_UNKNOWN, 0 }, + /* a0 */ { Cache_UNKNOWN, 0 }, + /* a1 */ { Cache_UNKNOWN, 0 }, + /* a2 */ { Cache_UNKNOWN, 0 }, + /* a3 */ { Cache_UNKNOWN, 0 }, + /* a4 */ { Cache_UNKNOWN, 0 }, + /* a5 */ { Cache_UNKNOWN, 0 }, + /* a6 */ { Cache_UNKNOWN, 0 }, + /* a7 */ { Cache_UNKNOWN, 0 }, + /* a8 */ { Cache_UNKNOWN, 0 }, + /* a9 */ { Cache_UNKNOWN, 0 }, + /* aa */ { Cache_UNKNOWN, 0 }, + /* ab */ { Cache_UNKNOWN, 0 }, + /* ac */ { Cache_UNKNOWN, 0 }, + /* ad */ { Cache_UNKNOWN, 0 }, + /* ae */ { Cache_UNKNOWN, 0 }, + /* af */ { Cache_UNKNOWN, 0 }, + /* b0 */ { Cache_TLBi, 0 }, + /* b1 */ { Cache_UNKNOWN, 0 }, + /* b2 */ { Cache_UNKNOWN, 0 }, + /* b3 */ { Cache_TLBd, 0 }, + /* b4 */ { Cache_UNKNOWN, 0 }, + /* b5 */ { Cache_UNKNOWN, 0 }, + /* b6 */ { Cache_UNKNOWN, 0 }, + /* b7 */ { Cache_UNKNOWN, 0 }, + /* b8 */ { Cache_UNKNOWN, 0 }, + /* b9 */ { Cache_UNKNOWN, 0 }, + /* ba */ { Cache_UNKNOWN, 0 }, + /* bb */ { Cache_UNKNOWN, 0 }, + /* bc */ { Cache_UNKNOWN, 0 }, + /* bd */ { Cache_UNKNOWN, 0 }, + /* be */ { Cache_UNKNOWN, 0 }, + /* bf */ { Cache_UNKNOWN, 0 }, + /* c0 */ { Cache_UNKNOWN, 0 }, + /* c1 */ { Cache_UNKNOWN, 0 }, + /* c2 */ { Cache_UNKNOWN, 0 }, + /* c3 */ { Cache_UNKNOWN, 0 }, + /* c4 */ { Cache_UNKNOWN, 0 }, + /* c5 */ { Cache_UNKNOWN, 0 }, + /* c6 */ { Cache_UNKNOWN, 0 }, + /* c7 */ { Cache_UNKNOWN, 0 }, + /* c8 */ { Cache_UNKNOWN, 0 }, + /* c9 */ { Cache_UNKNOWN, 0 }, + /* ca */ { Cache_UNKNOWN, 0 }, + /* cb */ { Cache_UNKNOWN, 0 }, + /* cc */ { Cache_UNKNOWN, 0 }, + /* cd */ { Cache_UNKNOWN, 0 }, + /* ce */ { Cache_UNKNOWN, 0 }, + /* cf */ { Cache_UNKNOWN, 0 }, + /* d0 */ { Cache_UNKNOWN, 0 }, + /* d1 */ { Cache_UNKNOWN, 0 }, + /* d2 */ { Cache_UNKNOWN, 0 }, + /* d3 */ { Cache_UNKNOWN, 0 }, + /* d4 */ { Cache_UNKNOWN, 0 }, + /* d5 */ { Cache_UNKNOWN, 0 }, + /* d6 */ { Cache_UNKNOWN, 0 }, + /* d7 */ { Cache_UNKNOWN, 0 }, + /* d8 */ { Cache_UNKNOWN, 0 }, + /* d9 */ { Cache_UNKNOWN, 0 }, + /* da */ { Cache_UNKNOWN, 0 }, + /* db */ { Cache_UNKNOWN, 0 }, + /* dc */ { Cache_UNKNOWN, 0 }, + /* dd */ { Cache_UNKNOWN, 0 }, + /* de */ { Cache_UNKNOWN, 0 }, + /* df */ { Cache_UNKNOWN, 0 }, + /* e0 */ { Cache_UNKNOWN, 0 }, + /* e1 */ { Cache_UNKNOWN, 0 }, + /* e2 */ { Cache_UNKNOWN, 0 }, + /* e3 */ { Cache_UNKNOWN, 0 }, + /* e4 */ { Cache_UNKNOWN, 0 }, + /* e5 */ { Cache_UNKNOWN, 0 }, + /* e6 */ { Cache_UNKNOWN, 0 }, + /* e7 */ { Cache_UNKNOWN, 0 }, + /* e8 */ { Cache_UNKNOWN, 0 }, + /* e9 */ { Cache_UNKNOWN, 0 }, + /* ea */ { Cache_UNKNOWN, 0 }, + /* eb */ { Cache_UNKNOWN, 0 }, + /* ec */ { Cache_UNKNOWN, 0 }, + /* ed */ { Cache_UNKNOWN, 0 }, + /* ee */ { Cache_UNKNOWN, 0 }, + /* ef */ { Cache_UNKNOWN, 0 }, + /* f0 */ { Cache_UNKNOWN, 0 }, + /* f1 */ { Cache_UNKNOWN, 0 }, + /* f2 */ { Cache_UNKNOWN, 0 }, + /* f3 */ { Cache_UNKNOWN, 0 }, + /* f4 */ { Cache_UNKNOWN, 0 }, + /* f5 */ { Cache_UNKNOWN, 0 }, + /* f6 */ { Cache_UNKNOWN, 0 }, + /* f7 */ { Cache_UNKNOWN, 0 }, + /* f8 */ { Cache_UNKNOWN, 0 }, + /* f9 */ { Cache_UNKNOWN, 0 }, + /* fa */ { Cache_UNKNOWN, 0 }, + /* fb */ { Cache_UNKNOWN, 0 }, + /* fc */ { Cache_UNKNOWN, 0 }, + /* fd */ { Cache_UNKNOWN, 0 }, + /* fe */ { Cache_UNKNOWN, 0 }, + /* ff */ { Cache_UNKNOWN, 0 } +}; + +/* + * use the above table to determine the CacheEntryLineSize. + */ +static void +getIntelCacheEntryLineSize(unsigned long val, int *level, + unsigned long *lineSize) +{ + CacheType type; + + type = CacheMap[val].type; + /* only interested in data caches */ + /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. + * this data check has the side effect of rejecting that entry. If + * that wasn't the case, we could have to reject it explicitly */ + if (CacheMap[val].lineSize == 0) { + return; + } + /* look at the caches, skip types we aren't interested in. + * if we already have a value for a lower level cache, skip the + * current entry */ + if ((type == Cache_L1) || (type == Cache_L1d)) { + *level = 1; + *lineSize = CacheMap[val].lineSize; + } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { + *level = 2; + *lineSize = CacheMap[val].lineSize; + } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { + *level = 3; + *lineSize = CacheMap[val].lineSize; + } + return; +} + +static void +getIntelRegisterCacheLineSize(unsigned long val, + int *level, unsigned long *lineSize) +{ + getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); + getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); + getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); + getIntelCacheEntryLineSize(val & 0xff, level, lineSize); +} + +/* + * returns '0' if no recognized cache is found, or if the cache + * information is supported by this processor + */ +static unsigned long +getIntelCacheLineSize(int cpuidLevel) +{ + int level = 4; + unsigned long lineSize = 0; + unsigned long eax, ebx, ecx, edx; + int repeat, count; + + if (cpuidLevel < 2) { + return 0; + } + + /* command '2' of the cpuid is intel's cache info call. Each byte of the + * 4 registers contain a potential descriptor for the cache. The CacheMap + * table maps the cache entry with the processor cache. Register 'al' + * contains a count value that cpuid '2' needs to be called in order to + * find all the cache descriptors. Only registers with the high bit set + * to 'zero' have valid descriptors. This code loops through all the + * required calls to cpuid '2' and passes any valid descriptors it finds + * to the getIntelRegisterCacheLineSize code, which breaks the registers + * down into their component descriptors. In the end the lineSize of the + * lowest level cache data cache is returned. */ + freebl_cpuid(2, &eax, &ebx, &ecx, &edx); + repeat = eax & 0xf; + for (count = 0; count < repeat; count++) { + if ((eax & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); + } + if ((ebx & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(ebx, &level, &lineSize); + } + if ((ecx & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(ecx, &level, &lineSize); + } + if ((edx & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(edx, &level, &lineSize); + } + if (count + 1 != repeat) { + freebl_cpuid(2, &eax, &ebx, &ecx, &edx); + } + } + return lineSize; +} + +/* + * returns '0' if the cache info is not supported by this processor. + * This is based on the AMD extended cache commands for cpuid. + * (see "AMD Processor Recognition Application Note" Publication 20734). + * Some other processors use the identical scheme. + * (see "Processor Recognition, Transmeta Corporation"). + */ +static unsigned long +getOtherCacheLineSize(unsigned long cpuidLevel) +{ + unsigned long lineSize = 0; + unsigned long eax, ebx, ecx, edx; + + /* get the Extended CPUID level */ + freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); + cpuidLevel = eax; + + if (cpuidLevel >= 0x80000005) { + freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); + lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ + } + return lineSize; +} + +static const char *const manMap[] = { +#define INTEL 0 + "GenuineIntel", +#define AMD 1 + "AuthenticAMD", +#define CYRIX 2 + "CyrixInstead", +#define CENTAUR 2 + "CentaurHauls", +#define NEXGEN 3 + "NexGenDriven", +#define TRANSMETA 4 + "GenuineTMx86", +#define RISE 5 + "RiseRiseRise", +#define UMC 6 + "UMC UMC UMC ", +#define SIS 7 + "Sis Sis Sis ", +#define NATIONAL 8 + "Geode by NSC", +}; + +static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]); + +#define MAN_UNKNOWN 9 + +#if !defined(AMD_64) +#define SSE2_FLAG (1 << 26) +unsigned long +s_mpi_is_sse2() +{ + unsigned long eax, ebx, ecx, edx; + + if (is386() || is486()) { + return 0; + } + freebl_cpuid(0, &eax, &ebx, &ecx, &edx); + + /* has no SSE2 extensions */ + if (eax == 0) { + return 0; + } + + freebl_cpuid(1, &eax, &ebx, &ecx, &edx); + return (edx & SSE2_FLAG) == SSE2_FLAG; +} +#endif + +unsigned long +s_mpi_getProcessorLineSize() +{ + unsigned long eax, ebx, ecx, edx; + PRUint32 cpuid[3]; + unsigned long cpuidLevel; + unsigned long cacheLineSize = 0; + int manufacturer = MAN_UNKNOWN; + int i; + char string[13]; + +#if !defined(AMD_64) + if (is386()) { + return 0; /* 386 had no cache */ + } + if (is486()) { + return 32; /* really? need more info */ + } +#endif + + /* Pentium, cpuid command is available */ + freebl_cpuid(0, &eax, &ebx, &ecx, &edx); + cpuidLevel = eax; + /* string holds the CPU's manufacturer ID string - a twelve + * character ASCII string stored in ebx, edx, ecx, and + * the 32-bit extended feature flags are in edx, ecx. + */ + cpuid[0] = ebx; + cpuid[1] = ecx; + cpuid[2] = edx; + memcpy(string, cpuid, sizeof(cpuid)); + string[12] = 0; + + manufacturer = MAN_UNKNOWN; + for (i = 0; i < n_manufacturers; i++) { + if (strcmp(manMap[i], string) == 0) { + manufacturer = i; + } + } + + if (manufacturer == INTEL) { + cacheLineSize = getIntelCacheLineSize(cpuidLevel); + } else { + cacheLineSize = getOtherCacheLineSize(cpuidLevel); + } + /* doesn't support cache info based on cpuid. This means + * an old pentium class processor, which have cache lines of + * 32. If we learn differently, we can use a switch based on + * the Manufacturer id */ + if (cacheLineSize == 0) { + cacheLineSize = 32; + } + return cacheLineSize; +} +#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 +#endif + +#if defined(__ppc64__) +/* + * Sigh, The PPC has some really nice features to help us determine cache + * size, since it had lots of direct control functions to do so. The POWER + * processor even has an instruction to do this, but it was dropped in + * PowerPC. Unfortunately most of them are not available in user mode. + * + * The dcbz function would be a great way to determine cache line size except + * 1) it only works on write-back memory (it throws an exception otherwise), + * and 2) because so many mac programs 'knew' the processor cache size was + * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new + * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep + * these programs happy. dcbzl work if 64 bit instructions are supported. + * If you know 64 bit instructions are supported, and that stack is + * write-back, you can use this code. + */ +#include "memory.h" + +/* clear the cache line that contains 'array' */ +static inline void +dcbzl(char *array) +{ + __asm__("dcbzl %0, %1" + : /*no result*/ + : "b%"(array), "r"(0) + : "memory"); +} + +#define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1))) + +#define PPC_MAX_LINE_SIZE 256 +unsigned long +s_mpi_getProcessorLineSize() +{ + char testArray[2 * PPC_MAX_LINE_SIZE + 1]; + char *test; + int i; + + /* align the array on a maximum line size boundary, so we + * know we are starting to clear from the first address */ + test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); + /* set all the values to 1's */ + memset(test, 0xff, PPC_MAX_LINE_SIZE); + /* clear one cache block starting at 'test' */ + dcbzl(test); + + /* find the size of the cleared area, that's our block size */ + for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) { + if (test[i - 1] == 0) { + return i; + } + } + return 0; +} + +#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 +#endif + +/* + * put other processor and platform specific cache code here + * return the smallest cache line size in bytes on the processor + * (usually the L1 cache). If the OS has a call, this would be + * a greate place to put it. + * + * If there is no cache, return 0; + * + * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions + * below aren't compiled. + * + */ + +/* If no way to get the processor cache line size has been defined, assume + * it's 32 bytes (most common value, does not significantly impact performance) + */ +#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED +unsigned long +s_mpi_getProcessorLineSize() +{ + return 32; +} +#endif |