andre@0: /* This Source Code Form is subject to the terms of the Mozilla Public andre@0: * License, v. 2.0. If a copy of the MPL was not distributed with this andre@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ andre@0: andre@0: #include "mpi.h" andre@0: andre@0: /* andre@0: * This file implements a single function: s_mpi_getProcessorLineSize(); andre@0: * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line andre@0: * if a cache exists, or zero if there is no cache. If more than one andre@0: * cache line exists, it should return the smallest line size (which is andre@0: * usually the L1 cache). andre@0: * andre@0: * mp_modexp uses this information to make sure that private key information andre@0: * isn't being leaked through the cache. andre@0: * andre@0: * Currently the file returns good data for most modern x86 processors, and andre@0: * reasonable data on 64-bit ppc processors. All other processors are assumed andre@0: * to have a cache line size of 32 bytes unless modified by target.mk. andre@0: * andre@0: */ andre@0: andre@0: #if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) andre@0: /* X86 processors have special instructions that tell us about the cache */ andre@0: #include "string.h" andre@0: andre@0: #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) andre@0: #define AMD_64 1 andre@0: #endif andre@0: andre@0: /* Generic CPUID function */ andre@0: #if defined(AMD_64) andre@0: andre@0: #if defined(__GNUC__) andre@0: andre@0: void freebl_cpuid(unsigned long op, unsigned long *eax, andre@0: unsigned long *ebx, unsigned long *ecx, andre@0: unsigned long *edx) andre@0: { andre@0: __asm__("cpuid\n\t" andre@0: : "=a" (*eax), andre@0: "=b" (*ebx), andre@0: "=c" (*ecx), andre@0: "=d" (*edx) andre@0: : "0" (op)); andre@0: } andre@0: andre@0: #elif defined(_MSC_VER) andre@0: andre@0: #include andre@0: andre@0: void freebl_cpuid(unsigned long op, unsigned long *eax, andre@0: unsigned long *ebx, unsigned long *ecx, andre@0: unsigned long *edx) andre@0: { andre@0: int intrinsic_out[4]; andre@0: andre@0: __cpuid(intrinsic_out, op); andre@0: *eax = intrinsic_out[0]; andre@0: *ebx = intrinsic_out[1]; andre@0: *ecx = intrinsic_out[2]; andre@0: *edx = intrinsic_out[3]; andre@0: } andre@0: andre@0: #endif andre@0: andre@0: #else /* !defined(AMD_64) */ andre@0: andre@0: /* x86 */ andre@0: andre@0: #if defined(__GNUC__) andre@0: void freebl_cpuid(unsigned long op, unsigned long *eax, andre@0: unsigned long *ebx, unsigned long *ecx, andre@0: unsigned long *edx) andre@0: { andre@0: /* sigh GCC isn't smart enough to save the ebx PIC register on it's own andre@0: * in this case, so do it by hand. Use edi to store ebx and pass the andre@0: * value returned in ebx from cpuid through edi. */ andre@0: __asm__("mov %%ebx,%%edi\n\t" andre@0: "cpuid\n\t" andre@0: "xchgl %%ebx,%%edi\n\t" andre@0: : "=a" (*eax), andre@0: "=D" (*ebx), andre@0: "=c" (*ecx), andre@0: "=d" (*edx) andre@0: : "0" (op)); andre@0: } andre@0: andre@0: /* andre@0: * try flipping a processor flag to determine CPU type andre@0: */ andre@0: static unsigned long changeFlag(unsigned long flag) andre@0: { andre@0: unsigned long changedFlags, originalFlags; andre@0: __asm__("pushfl\n\t" /* get the flags */ andre@0: "popl %0\n\t" andre@0: "movl %0,%1\n\t" /* save the original flags */ andre@0: "xorl %2,%0\n\t" /* flip the bit */ andre@0: "pushl %0\n\t" /* set the flags */ andre@0: "popfl\n\t" andre@0: "pushfl\n\t" /* get the flags again (for return) */ andre@0: "popl %0\n\t" andre@0: "pushl %1\n\t" /* restore the original flags */ andre@0: "popfl\n\t" andre@0: : "=r" (changedFlags), andre@0: "=r" (originalFlags), andre@0: "=r" (flag) andre@0: : "2" (flag)); andre@0: return changedFlags ^ originalFlags; andre@0: } andre@0: andre@0: #elif defined(_MSC_VER) andre@0: andre@0: /* andre@0: * windows versions of the above assembler andre@0: */ andre@0: #define wcpuid __asm __emit 0fh __asm __emit 0a2h andre@0: void freebl_cpuid(unsigned long op, unsigned long *Reax, andre@0: unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) andre@0: { andre@0: unsigned long Leax, Lebx, Lecx, Ledx; andre@0: __asm { andre@0: pushad andre@0: mov eax,op andre@0: wcpuid andre@0: mov Leax,eax andre@0: mov Lebx,ebx andre@0: mov Lecx,ecx andre@0: mov Ledx,edx andre@0: popad andre@0: } andre@0: *Reax = Leax; andre@0: *Rebx = Lebx; andre@0: *Recx = Lecx; andre@0: *Redx = Ledx; andre@0: } andre@0: andre@0: static unsigned long changeFlag(unsigned long flag) andre@0: { andre@0: unsigned long changedFlags, originalFlags; andre@0: __asm { andre@0: push eax andre@0: push ebx andre@0: pushfd /* get the flags */ andre@0: pop eax andre@0: push eax /* save the flags on the stack */ andre@0: mov originalFlags,eax /* save the original flags */ andre@0: mov ebx,flag andre@0: xor eax,ebx /* flip the bit */ andre@0: push eax /* set the flags */ andre@0: popfd andre@0: pushfd /* get the flags again (for return) */ andre@0: pop eax andre@0: popfd /* restore the original flags */ andre@0: mov changedFlags,eax andre@0: pop ebx andre@0: pop eax andre@0: } andre@0: return changedFlags ^ originalFlags; andre@0: } andre@0: #endif andre@0: andre@0: #endif andre@0: andre@0: #if !defined(AMD_64) andre@0: #define AC_FLAG 0x40000 andre@0: #define ID_FLAG 0x200000 andre@0: andre@0: /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ andre@0: static int is386() andre@0: { andre@0: return changeFlag(AC_FLAG) == 0; andre@0: } andre@0: andre@0: /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ andre@0: static int is486() andre@0: { andre@0: return changeFlag(ID_FLAG) == 0; andre@0: } andre@0: #endif andre@0: andre@0: andre@0: /* andre@0: * table for Intel Cache. andre@0: * See Intel Application Note AP-485 for more information andre@0: */ andre@0: andre@0: typedef unsigned char CacheTypeEntry; andre@0: andre@0: typedef enum { andre@0: Cache_NONE = 0, andre@0: Cache_UNKNOWN = 1, andre@0: Cache_TLB = 2, andre@0: Cache_TLBi = 3, andre@0: Cache_TLBd = 4, andre@0: Cache_Trace = 5, andre@0: Cache_L1 = 6, andre@0: Cache_L1i = 7, andre@0: Cache_L1d = 8, andre@0: Cache_L2 = 9 , andre@0: Cache_L2i = 10 , andre@0: Cache_L2d = 11 , andre@0: Cache_L3 = 12 , andre@0: Cache_L3i = 13, andre@0: Cache_L3d = 14 andre@0: } CacheType; andre@0: andre@0: struct _cache { andre@0: CacheTypeEntry type; andre@0: unsigned char lineSize; andre@0: }; andre@0: static const struct _cache CacheMap[256] = { andre@0: /* 00 */ {Cache_NONE, 0 }, andre@0: /* 01 */ {Cache_TLBi, 0 }, andre@0: /* 02 */ {Cache_TLBi, 0 }, andre@0: /* 03 */ {Cache_TLBd, 0 }, andre@0: /* 04 */ {Cache_TLBd, }, andre@0: /* 05 */ {Cache_UNKNOWN, 0 }, andre@0: /* 06 */ {Cache_L1i, 32 }, andre@0: /* 07 */ {Cache_UNKNOWN, 0 }, andre@0: /* 08 */ {Cache_L1i, 32 }, andre@0: /* 09 */ {Cache_UNKNOWN, 0 }, andre@0: /* 0a */ {Cache_L1d, 32 }, andre@0: /* 0b */ {Cache_UNKNOWN, 0 }, andre@0: /* 0c */ {Cache_L1d, 32 }, andre@0: /* 0d */ {Cache_UNKNOWN, 0 }, andre@0: /* 0e */ {Cache_UNKNOWN, 0 }, andre@0: /* 0f */ {Cache_UNKNOWN, 0 }, andre@0: /* 10 */ {Cache_UNKNOWN, 0 }, andre@0: /* 11 */ {Cache_UNKNOWN, 0 }, andre@0: /* 12 */ {Cache_UNKNOWN, 0 }, andre@0: /* 13 */ {Cache_UNKNOWN, 0 }, andre@0: /* 14 */ {Cache_UNKNOWN, 0 }, andre@0: /* 15 */ {Cache_UNKNOWN, 0 }, andre@0: /* 16 */ {Cache_UNKNOWN, 0 }, andre@0: /* 17 */ {Cache_UNKNOWN, 0 }, andre@0: /* 18 */ {Cache_UNKNOWN, 0 }, andre@0: /* 19 */ {Cache_UNKNOWN, 0 }, andre@0: /* 1a */ {Cache_UNKNOWN, 0 }, andre@0: /* 1b */ {Cache_UNKNOWN, 0 }, andre@0: /* 1c */ {Cache_UNKNOWN, 0 }, andre@0: /* 1d */ {Cache_UNKNOWN, 0 }, andre@0: /* 1e */ {Cache_UNKNOWN, 0 }, andre@0: /* 1f */ {Cache_UNKNOWN, 0 }, andre@0: /* 20 */ {Cache_UNKNOWN, 0 }, andre@0: /* 21 */ {Cache_UNKNOWN, 0 }, andre@0: /* 22 */ {Cache_L3, 64 }, andre@0: /* 23 */ {Cache_L3, 64 }, andre@0: /* 24 */ {Cache_UNKNOWN, 0 }, andre@0: /* 25 */ {Cache_L3, 64 }, andre@0: /* 26 */ {Cache_UNKNOWN, 0 }, andre@0: /* 27 */ {Cache_UNKNOWN, 0 }, andre@0: /* 28 */ {Cache_UNKNOWN, 0 }, andre@0: /* 29 */ {Cache_L3, 64 }, andre@0: /* 2a */ {Cache_UNKNOWN, 0 }, andre@0: /* 2b */ {Cache_UNKNOWN, 0 }, andre@0: /* 2c */ {Cache_L1d, 64 }, andre@0: /* 2d */ {Cache_UNKNOWN, 0 }, andre@0: /* 2e */ {Cache_UNKNOWN, 0 }, andre@0: /* 2f */ {Cache_UNKNOWN, 0 }, andre@0: /* 30 */ {Cache_L1i, 64 }, andre@0: /* 31 */ {Cache_UNKNOWN, 0 }, andre@0: /* 32 */ {Cache_UNKNOWN, 0 }, andre@0: /* 33 */ {Cache_UNKNOWN, 0 }, andre@0: /* 34 */ {Cache_UNKNOWN, 0 }, andre@0: /* 35 */ {Cache_UNKNOWN, 0 }, andre@0: /* 36 */ {Cache_UNKNOWN, 0 }, andre@0: /* 37 */ {Cache_UNKNOWN, 0 }, andre@0: /* 38 */ {Cache_UNKNOWN, 0 }, andre@0: /* 39 */ {Cache_L2, 64 }, andre@0: /* 3a */ {Cache_UNKNOWN, 0 }, andre@0: /* 3b */ {Cache_L2, 64 }, andre@0: /* 3c */ {Cache_L2, 64 }, andre@0: /* 3d */ {Cache_UNKNOWN, 0 }, andre@0: /* 3e */ {Cache_UNKNOWN, 0 }, andre@0: /* 3f */ {Cache_UNKNOWN, 0 }, andre@0: /* 40 */ {Cache_L2, 0 }, andre@0: /* 41 */ {Cache_L2, 32 }, andre@0: /* 42 */ {Cache_L2, 32 }, andre@0: /* 43 */ {Cache_L2, 32 }, andre@0: /* 44 */ {Cache_L2, 32 }, andre@0: /* 45 */ {Cache_L2, 32 }, andre@0: /* 46 */ {Cache_UNKNOWN, 0 }, andre@0: /* 47 */ {Cache_UNKNOWN, 0 }, andre@0: /* 48 */ {Cache_UNKNOWN, 0 }, andre@0: /* 49 */ {Cache_UNKNOWN, 0 }, andre@0: /* 4a */ {Cache_UNKNOWN, 0 }, andre@0: /* 4b */ {Cache_UNKNOWN, 0 }, andre@0: /* 4c */ {Cache_UNKNOWN, 0 }, andre@0: /* 4d */ {Cache_UNKNOWN, 0 }, andre@0: /* 4e */ {Cache_UNKNOWN, 0 }, andre@0: /* 4f */ {Cache_UNKNOWN, 0 }, andre@0: /* 50 */ {Cache_TLBi, 0 }, andre@0: /* 51 */ {Cache_TLBi, 0 }, andre@0: /* 52 */ {Cache_TLBi, 0 }, andre@0: /* 53 */ {Cache_UNKNOWN, 0 }, andre@0: /* 54 */ {Cache_UNKNOWN, 0 }, andre@0: /* 55 */ {Cache_UNKNOWN, 0 }, andre@0: /* 56 */ {Cache_UNKNOWN, 0 }, andre@0: /* 57 */ {Cache_UNKNOWN, 0 }, andre@0: /* 58 */ {Cache_UNKNOWN, 0 }, andre@0: /* 59 */ {Cache_UNKNOWN, 0 }, andre@0: /* 5a */ {Cache_UNKNOWN, 0 }, andre@0: /* 5b */ {Cache_TLBd, 0 }, andre@0: /* 5c */ {Cache_TLBd, 0 }, andre@0: /* 5d */ {Cache_TLBd, 0 }, andre@0: /* 5e */ {Cache_UNKNOWN, 0 }, andre@0: /* 5f */ {Cache_UNKNOWN, 0 }, andre@0: /* 60 */ {Cache_UNKNOWN, 0 }, andre@0: /* 61 */ {Cache_UNKNOWN, 0 }, andre@0: /* 62 */ {Cache_UNKNOWN, 0 }, andre@0: /* 63 */ {Cache_UNKNOWN, 0 }, andre@0: /* 64 */ {Cache_UNKNOWN, 0 }, andre@0: /* 65 */ {Cache_UNKNOWN, 0 }, andre@0: /* 66 */ {Cache_L1d, 64 }, andre@0: /* 67 */ {Cache_L1d, 64 }, andre@0: /* 68 */ {Cache_L1d, 64 }, andre@0: /* 69 */ {Cache_UNKNOWN, 0 }, andre@0: /* 6a */ {Cache_UNKNOWN, 0 }, andre@0: /* 6b */ {Cache_UNKNOWN, 0 }, andre@0: /* 6c */ {Cache_UNKNOWN, 0 }, andre@0: /* 6d */ {Cache_UNKNOWN, 0 }, andre@0: /* 6e */ {Cache_UNKNOWN, 0 }, andre@0: /* 6f */ {Cache_UNKNOWN, 0 }, andre@0: /* 70 */ {Cache_Trace, 1 }, andre@0: /* 71 */ {Cache_Trace, 1 }, andre@0: /* 72 */ {Cache_Trace, 1 }, andre@0: /* 73 */ {Cache_UNKNOWN, 0 }, andre@0: /* 74 */ {Cache_UNKNOWN, 0 }, andre@0: /* 75 */ {Cache_UNKNOWN, 0 }, andre@0: /* 76 */ {Cache_UNKNOWN, 0 }, andre@0: /* 77 */ {Cache_UNKNOWN, 0 }, andre@0: /* 78 */ {Cache_UNKNOWN, 0 }, andre@0: /* 79 */ {Cache_L2, 64 }, andre@0: /* 7a */ {Cache_L2, 64 }, andre@0: /* 7b */ {Cache_L2, 64 }, andre@0: /* 7c */ {Cache_L2, 64 }, andre@0: /* 7d */ {Cache_UNKNOWN, 0 }, andre@0: /* 7e */ {Cache_UNKNOWN, 0 }, andre@0: /* 7f */ {Cache_UNKNOWN, 0 }, andre@0: /* 80 */ {Cache_UNKNOWN, 0 }, andre@0: /* 81 */ {Cache_UNKNOWN, 0 }, andre@0: /* 82 */ {Cache_L2, 32 }, andre@0: /* 83 */ {Cache_L2, 32 }, andre@0: /* 84 */ {Cache_L2, 32 }, andre@0: /* 85 */ {Cache_L2, 32 }, andre@0: /* 86 */ {Cache_L2, 64 }, andre@0: /* 87 */ {Cache_L2, 64 }, andre@0: /* 88 */ {Cache_UNKNOWN, 0 }, andre@0: /* 89 */ {Cache_UNKNOWN, 0 }, andre@0: /* 8a */ {Cache_UNKNOWN, 0 }, andre@0: /* 8b */ {Cache_UNKNOWN, 0 }, andre@0: /* 8c */ {Cache_UNKNOWN, 0 }, andre@0: /* 8d */ {Cache_UNKNOWN, 0 }, andre@0: /* 8e */ {Cache_UNKNOWN, 0 }, andre@0: /* 8f */ {Cache_UNKNOWN, 0 }, andre@0: /* 90 */ {Cache_UNKNOWN, 0 }, andre@0: /* 91 */ {Cache_UNKNOWN, 0 }, andre@0: /* 92 */ {Cache_UNKNOWN, 0 }, andre@0: /* 93 */ {Cache_UNKNOWN, 0 }, andre@0: /* 94 */ {Cache_UNKNOWN, 0 }, andre@0: /* 95 */ {Cache_UNKNOWN, 0 }, andre@0: /* 96 */ {Cache_UNKNOWN, 0 }, andre@0: /* 97 */ {Cache_UNKNOWN, 0 }, andre@0: /* 98 */ {Cache_UNKNOWN, 0 }, andre@0: /* 99 */ {Cache_UNKNOWN, 0 }, andre@0: /* 9a */ {Cache_UNKNOWN, 0 }, andre@0: /* 9b */ {Cache_UNKNOWN, 0 }, andre@0: /* 9c */ {Cache_UNKNOWN, 0 }, andre@0: /* 9d */ {Cache_UNKNOWN, 0 }, andre@0: /* 9e */ {Cache_UNKNOWN, 0 }, andre@0: /* 9f */ {Cache_UNKNOWN, 0 }, andre@0: /* a0 */ {Cache_UNKNOWN, 0 }, andre@0: /* a1 */ {Cache_UNKNOWN, 0 }, andre@0: /* a2 */ {Cache_UNKNOWN, 0 }, andre@0: /* a3 */ {Cache_UNKNOWN, 0 }, andre@0: /* a4 */ {Cache_UNKNOWN, 0 }, andre@0: /* a5 */ {Cache_UNKNOWN, 0 }, andre@0: /* a6 */ {Cache_UNKNOWN, 0 }, andre@0: /* a7 */ {Cache_UNKNOWN, 0 }, andre@0: /* a8 */ {Cache_UNKNOWN, 0 }, andre@0: /* a9 */ {Cache_UNKNOWN, 0 }, andre@0: /* aa */ {Cache_UNKNOWN, 0 }, andre@0: /* ab */ {Cache_UNKNOWN, 0 }, andre@0: /* ac */ {Cache_UNKNOWN, 0 }, andre@0: /* ad */ {Cache_UNKNOWN, 0 }, andre@0: /* ae */ {Cache_UNKNOWN, 0 }, andre@0: /* af */ {Cache_UNKNOWN, 0 }, andre@0: /* b0 */ {Cache_TLBi, 0 }, andre@0: /* b1 */ {Cache_UNKNOWN, 0 }, andre@0: /* b2 */ {Cache_UNKNOWN, 0 }, andre@0: /* b3 */ {Cache_TLBd, 0 }, andre@0: /* b4 */ {Cache_UNKNOWN, 0 }, andre@0: /* b5 */ {Cache_UNKNOWN, 0 }, andre@0: /* b6 */ {Cache_UNKNOWN, 0 }, andre@0: /* b7 */ {Cache_UNKNOWN, 0 }, andre@0: /* b8 */ {Cache_UNKNOWN, 0 }, andre@0: /* b9 */ {Cache_UNKNOWN, 0 }, andre@0: /* ba */ {Cache_UNKNOWN, 0 }, andre@0: /* bb */ {Cache_UNKNOWN, 0 }, andre@0: /* bc */ {Cache_UNKNOWN, 0 }, andre@0: /* bd */ {Cache_UNKNOWN, 0 }, andre@0: /* be */ {Cache_UNKNOWN, 0 }, andre@0: /* bf */ {Cache_UNKNOWN, 0 }, andre@0: /* c0 */ {Cache_UNKNOWN, 0 }, andre@0: /* c1 */ {Cache_UNKNOWN, 0 }, andre@0: /* c2 */ {Cache_UNKNOWN, 0 }, andre@0: /* c3 */ {Cache_UNKNOWN, 0 }, andre@0: /* c4 */ {Cache_UNKNOWN, 0 }, andre@0: /* c5 */ {Cache_UNKNOWN, 0 }, andre@0: /* c6 */ {Cache_UNKNOWN, 0 }, andre@0: /* c7 */ {Cache_UNKNOWN, 0 }, andre@0: /* c8 */ {Cache_UNKNOWN, 0 }, andre@0: /* c9 */ {Cache_UNKNOWN, 0 }, andre@0: /* ca */ {Cache_UNKNOWN, 0 }, andre@0: /* cb */ {Cache_UNKNOWN, 0 }, andre@0: /* cc */ {Cache_UNKNOWN, 0 }, andre@0: /* cd */ {Cache_UNKNOWN, 0 }, andre@0: /* ce */ {Cache_UNKNOWN, 0 }, andre@0: /* cf */ {Cache_UNKNOWN, 0 }, andre@0: /* d0 */ {Cache_UNKNOWN, 0 }, andre@0: /* d1 */ {Cache_UNKNOWN, 0 }, andre@0: /* d2 */ {Cache_UNKNOWN, 0 }, andre@0: /* d3 */ {Cache_UNKNOWN, 0 }, andre@0: /* d4 */ {Cache_UNKNOWN, 0 }, andre@0: /* d5 */ {Cache_UNKNOWN, 0 }, andre@0: /* d6 */ {Cache_UNKNOWN, 0 }, andre@0: /* d7 */ {Cache_UNKNOWN, 0 }, andre@0: /* d8 */ {Cache_UNKNOWN, 0 }, andre@0: /* d9 */ {Cache_UNKNOWN, 0 }, andre@0: /* da */ {Cache_UNKNOWN, 0 }, andre@0: /* db */ {Cache_UNKNOWN, 0 }, andre@0: /* dc */ {Cache_UNKNOWN, 0 }, andre@0: /* dd */ {Cache_UNKNOWN, 0 }, andre@0: /* de */ {Cache_UNKNOWN, 0 }, andre@0: /* df */ {Cache_UNKNOWN, 0 }, andre@0: /* e0 */ {Cache_UNKNOWN, 0 }, andre@0: /* e1 */ {Cache_UNKNOWN, 0 }, andre@0: /* e2 */ {Cache_UNKNOWN, 0 }, andre@0: /* e3 */ {Cache_UNKNOWN, 0 }, andre@0: /* e4 */ {Cache_UNKNOWN, 0 }, andre@0: /* e5 */ {Cache_UNKNOWN, 0 }, andre@0: /* e6 */ {Cache_UNKNOWN, 0 }, andre@0: /* e7 */ {Cache_UNKNOWN, 0 }, andre@0: /* e8 */ {Cache_UNKNOWN, 0 }, andre@0: /* e9 */ {Cache_UNKNOWN, 0 }, andre@0: /* ea */ {Cache_UNKNOWN, 0 }, andre@0: /* eb */ {Cache_UNKNOWN, 0 }, andre@0: /* ec */ {Cache_UNKNOWN, 0 }, andre@0: /* ed */ {Cache_UNKNOWN, 0 }, andre@0: /* ee */ {Cache_UNKNOWN, 0 }, andre@0: /* ef */ {Cache_UNKNOWN, 0 }, andre@0: /* f0 */ {Cache_UNKNOWN, 0 }, andre@0: /* f1 */ {Cache_UNKNOWN, 0 }, andre@0: /* f2 */ {Cache_UNKNOWN, 0 }, andre@0: /* f3 */ {Cache_UNKNOWN, 0 }, andre@0: /* f4 */ {Cache_UNKNOWN, 0 }, andre@0: /* f5 */ {Cache_UNKNOWN, 0 }, andre@0: /* f6 */ {Cache_UNKNOWN, 0 }, andre@0: /* f7 */ {Cache_UNKNOWN, 0 }, andre@0: /* f8 */ {Cache_UNKNOWN, 0 }, andre@0: /* f9 */ {Cache_UNKNOWN, 0 }, andre@0: /* fa */ {Cache_UNKNOWN, 0 }, andre@0: /* fb */ {Cache_UNKNOWN, 0 }, andre@0: /* fc */ {Cache_UNKNOWN, 0 }, andre@0: /* fd */ {Cache_UNKNOWN, 0 }, andre@0: /* fe */ {Cache_UNKNOWN, 0 }, andre@0: /* ff */ {Cache_UNKNOWN, 0 } andre@0: }; andre@0: andre@0: andre@0: /* andre@0: * use the above table to determine the CacheEntryLineSize. andre@0: */ andre@0: static void andre@0: getIntelCacheEntryLineSize(unsigned long val, int *level, andre@0: unsigned long *lineSize) andre@0: { andre@0: CacheType type; andre@0: andre@0: type = CacheMap[val].type; andre@0: /* only interested in data caches */ andre@0: /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. andre@0: * this data check has the side effect of rejecting that entry. If andre@0: * that wasn't the case, we could have to reject it explicitly */ andre@0: if (CacheMap[val].lineSize == 0) { andre@0: return; andre@0: } andre@0: /* look at the caches, skip types we aren't interested in. andre@0: * if we already have a value for a lower level cache, skip the andre@0: * current entry */ andre@0: if ((type == Cache_L1)|| (type == Cache_L1d)) { andre@0: *level = 1; andre@0: *lineSize = CacheMap[val].lineSize; andre@0: } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { andre@0: *level = 2; andre@0: *lineSize = CacheMap[val].lineSize; andre@0: } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { andre@0: *level = 3; andre@0: *lineSize = CacheMap[val].lineSize; andre@0: } andre@0: return; andre@0: } andre@0: andre@0: andre@0: static void andre@0: getIntelRegisterCacheLineSize(unsigned long val, andre@0: int *level, unsigned long *lineSize) andre@0: { andre@0: getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); andre@0: getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); andre@0: getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); andre@0: getIntelCacheEntryLineSize(val & 0xff, level, lineSize); andre@0: } andre@0: andre@0: /* andre@0: * returns '0' if no recognized cache is found, or if the cache andre@0: * information is supported by this processor andre@0: */ andre@0: static unsigned long andre@0: getIntelCacheLineSize(int cpuidLevel) andre@0: { andre@0: int level = 4; andre@0: unsigned long lineSize = 0; andre@0: unsigned long eax, ebx, ecx, edx; andre@0: int repeat, count; andre@0: andre@0: if (cpuidLevel < 2) { andre@0: return 0; andre@0: } andre@0: andre@0: /* command '2' of the cpuid is intel's cache info call. Each byte of the andre@0: * 4 registers contain a potential descriptor for the cache. The CacheMap andre@0: * table maps the cache entry with the processor cache. Register 'al' andre@0: * contains a count value that cpuid '2' needs to be called in order to andre@0: * find all the cache descriptors. Only registers with the high bit set andre@0: * to 'zero' have valid descriptors. This code loops through all the andre@0: * required calls to cpuid '2' and passes any valid descriptors it finds andre@0: * to the getIntelRegisterCacheLineSize code, which breaks the registers andre@0: * down into their component descriptors. In the end the lineSize of the andre@0: * lowest level cache data cache is returned. */ andre@0: freebl_cpuid(2, &eax, &ebx, &ecx, &edx); andre@0: repeat = eax & 0xf; andre@0: for (count = 0; count < repeat; count++) { andre@0: if ((eax & 0x80000000) == 0) { andre@0: getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); andre@0: } andre@0: if ((ebx & 0x80000000) == 0) { andre@0: getIntelRegisterCacheLineSize(ebx, &level, &lineSize); andre@0: } andre@0: if ((ecx & 0x80000000) == 0) { andre@0: getIntelRegisterCacheLineSize(ecx, &level, &lineSize); andre@0: } andre@0: if ((edx & 0x80000000) == 0) { andre@0: getIntelRegisterCacheLineSize(edx, &level, &lineSize); andre@0: } andre@0: if (count+1 != repeat) { andre@0: freebl_cpuid(2, &eax, &ebx, &ecx, &edx); andre@0: } andre@0: } andre@0: return lineSize; andre@0: } andre@0: andre@0: /* andre@0: * returns '0' if the cache info is not supported by this processor. andre@0: * This is based on the AMD extended cache commands for cpuid. andre@0: * (see "AMD Processor Recognition Application Note" Publication 20734). andre@0: * Some other processors use the identical scheme. andre@0: * (see "Processor Recognition, Transmeta Corporation"). andre@0: */ andre@0: static unsigned long andre@0: getOtherCacheLineSize(unsigned long cpuidLevel) andre@0: { andre@0: unsigned long lineSize = 0; andre@0: unsigned long eax, ebx, ecx, edx; andre@0: andre@0: /* get the Extended CPUID level */ andre@0: freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); andre@0: cpuidLevel = eax; andre@0: andre@0: if (cpuidLevel >= 0x80000005) { andre@0: freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); andre@0: lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ andre@0: } andre@0: return lineSize; andre@0: } andre@0: andre@0: static const char * const manMap[] = { andre@0: #define INTEL 0 andre@0: "GenuineIntel", andre@0: #define AMD 1 andre@0: "AuthenticAMD", andre@0: #define CYRIX 2 andre@0: "CyrixInstead", andre@0: #define CENTAUR 2 andre@0: "CentaurHauls", andre@0: #define NEXGEN 3 andre@0: "NexGenDriven", andre@0: #define TRANSMETA 4 andre@0: "GenuineTMx86", andre@0: #define RISE 5 andre@0: "RiseRiseRise", andre@0: #define UMC 6 andre@0: "UMC UMC UMC ", andre@0: #define SIS 7 andre@0: "Sis Sis Sis ", andre@0: #define NATIONAL 8 andre@0: "Geode by NSC", andre@0: }; andre@0: andre@0: static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]); andre@0: andre@0: andre@0: #define MAN_UNKNOWN 9 andre@0: andre@0: #if !defined(AMD_64) andre@0: #define SSE2_FLAG (1<<26) andre@0: unsigned long andre@0: s_mpi_is_sse2() andre@0: { andre@0: unsigned long eax, ebx, ecx, edx; andre@0: int manufacturer = MAN_UNKNOWN; andre@0: int i; andre@0: char string[13]; andre@0: andre@0: if (is386() || is486()) { andre@0: return 0; andre@0: } andre@0: freebl_cpuid(0, &eax, &ebx, &ecx, &edx); andre@0: /* string holds the CPU's manufacturer ID string - a twelve andre@0: * character ASCII string stored in ebx, edx, ecx, and andre@0: * the 32-bit extended feature flags are in edx, ecx. andre@0: */ andre@0: *(int *)string = ebx; andre@0: *(int *)&string[4] = (int)edx; andre@0: *(int *)&string[8] = (int)ecx; andre@0: string[12] = 0; andre@0: andre@0: /* has no SSE2 extensions */ andre@0: if (eax == 0) { andre@0: return 0; andre@0: } andre@0: andre@0: for (i=0; i < n_manufacturers; i++) { andre@0: if ( strcmp(manMap[i],string) == 0) { andre@0: manufacturer = i; andre@0: break; andre@0: } andre@0: } andre@0: andre@0: freebl_cpuid(1,&eax,&ebx,&ecx,&edx); andre@0: return (edx & SSE2_FLAG) == SSE2_FLAG; andre@0: } andre@0: #endif andre@0: andre@0: unsigned long andre@0: s_mpi_getProcessorLineSize() andre@0: { andre@0: unsigned long eax, ebx, ecx, edx; andre@0: unsigned long cpuidLevel; andre@0: unsigned long cacheLineSize = 0; andre@0: int manufacturer = MAN_UNKNOWN; andre@0: int i; andre@0: char string[65]; andre@0: andre@0: #if !defined(AMD_64) andre@0: if (is386()) { andre@0: return 0; /* 386 had no cache */ andre@0: } if (is486()) { andre@0: return 32; /* really? need more info */ andre@0: } andre@0: #endif andre@0: andre@0: /* Pentium, cpuid command is available */ andre@0: freebl_cpuid(0, &eax, &ebx, &ecx, &edx); andre@0: cpuidLevel = eax; andre@0: /* string holds the CPU's manufacturer ID string - a twelve andre@0: * character ASCII string stored in ebx, edx, ecx, and andre@0: * the 32-bit extended feature flags are in edx, ecx. andre@0: */ andre@0: *(int *)string = ebx; andre@0: *(int *)&string[4] = (int)edx; andre@0: *(int *)&string[8] = (int)ecx; andre@0: string[12] = 0; andre@0: andre@0: manufacturer = MAN_UNKNOWN; andre@0: for (i=0; i < n_manufacturers; i++) { andre@0: if ( strcmp(manMap[i],string) == 0) { andre@0: manufacturer = i; andre@0: } andre@0: } andre@0: andre@0: if (manufacturer == INTEL) { andre@0: cacheLineSize = getIntelCacheLineSize(cpuidLevel); andre@0: } else { andre@0: cacheLineSize = getOtherCacheLineSize(cpuidLevel); andre@0: } andre@0: /* doesn't support cache info based on cpuid. This means andre@0: * an old pentium class processor, which have cache lines of andre@0: * 32. If we learn differently, we can use a switch based on andre@0: * the Manufacturer id */ andre@0: if (cacheLineSize == 0) { andre@0: cacheLineSize = 32; andre@0: } andre@0: return cacheLineSize; andre@0: } andre@0: #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 andre@0: #endif andre@0: andre@0: #if defined(__ppc64__) andre@0: /* andre@0: * Sigh, The PPC has some really nice features to help us determine cache andre@0: * size, since it had lots of direct control functions to do so. The POWER andre@0: * processor even has an instruction to do this, but it was dropped in andre@0: * PowerPC. Unfortunately most of them are not available in user mode. andre@0: * andre@0: * The dcbz function would be a great way to determine cache line size except andre@0: * 1) it only works on write-back memory (it throws an exception otherwise), andre@0: * and 2) because so many mac programs 'knew' the processor cache size was andre@0: * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new andre@0: * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep andre@0: * these programs happy. dcbzl work if 64 bit instructions are supported. andre@0: * If you know 64 bit instructions are supported, and that stack is andre@0: * write-back, you can use this code. andre@0: */ andre@0: #include "memory.h" andre@0: andre@0: /* clear the cache line that contains 'array' */ andre@0: static inline void dcbzl(char *array) andre@0: { andre@0: register char *a asm("r2") = array; andre@0: __asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) ); andre@0: } andre@0: andre@0: andre@0: #define PPC_DO_ALIGN(x,y) ((char *)\ andre@0: ((((long long) (x))+((y)-1))&~((y)-1))) andre@0: andre@0: #define PPC_MAX_LINE_SIZE 256 andre@0: unsigned long andre@0: s_mpi_getProcessorLineSize() andre@0: { andre@0: char testArray[2*PPC_MAX_LINE_SIZE+1]; andre@0: char *test; andre@0: int i; andre@0: andre@0: /* align the array on a maximum line size boundary, so we andre@0: * know we are starting to clear from the first address */ andre@0: test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); andre@0: /* set all the values to 1's */ andre@0: memset(test, 0xff, PPC_MAX_LINE_SIZE); andre@0: /* clear one cache block starting at 'test' */ andre@0: dcbzl(test); andre@0: andre@0: /* find the size of the cleared area, that's our block size */ andre@0: for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) { andre@0: if (test[i-1] == 0) { andre@0: return i; andre@0: } andre@0: } andre@0: return 0; andre@0: } andre@0: andre@0: #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 andre@0: #endif andre@0: andre@0: andre@0: /* andre@0: * put other processor and platform specific cache code here andre@0: * return the smallest cache line size in bytes on the processor andre@0: * (usually the L1 cache). If the OS has a call, this would be andre@0: * a greate place to put it. andre@0: * andre@0: * If there is no cache, return 0; andre@0: * andre@0: * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions andre@0: * below aren't compiled. andre@0: * andre@0: */ andre@0: andre@0: andre@0: /* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or andre@0: * OS */ andre@0: #if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED) andre@0: andre@0: unsigned long andre@0: s_mpi_getProcessorLineSize() andre@0: { andre@0: return MPI_CACHE_LINE_SIZE; andre@0: } andre@0: #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 andre@0: #endif andre@0: andre@0: andre@0: /* If no way to get the processor cache line size has been defined, assume andre@0: * it's 32 bytes (most common value, does not significantly impact performance) andre@0: */ andre@0: #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED andre@0: unsigned long andre@0: s_mpi_getProcessorLineSize() andre@0: { andre@0: return 32; andre@0: } andre@0: #endif andre@0: andre@0: #ifdef TEST_IT andre@0: #include andre@0: andre@0: main() andre@0: { andre@0: printf("line size = %d\n", s_mpi_getProcessorLineSize()); andre@0: } andre@0: #endif