diff nss/lib/freebl/mpi/mpcpucache.c @ 0:1e5118fa0cb1

This is NSS with a Cmake Buildsyste To compile a static NSS library for Windows we've used the Chromium-NSS fork and added a Cmake buildsystem to compile it statically for Windows. See README.chromium for chromium changes and README.trustbridge for our modifications.
author Andre Heinecke <andre.heinecke@intevation.de>
date Mon, 28 Jul 2014 10:47:06 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nss/lib/freebl/mpi/mpcpucache.c	Mon Jul 28 10:47:06 2014 +0200
@@ -0,0 +1,813 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+
+/*
+ * This file implements a single function: s_mpi_getProcessorLineSize();
+ * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
+ * if a cache exists, or zero if there is no cache. If more than one
+ * cache line exists, it should return the smallest line size (which is 
+ * usually the L1 cache).
+ *
+ * mp_modexp uses this information to make sure that private key information
+ * isn't being leaked through the cache.
+ *
+ * Currently the file returns good data for most modern x86 processors, and
+ * reasonable data on 64-bit ppc processors. All other processors are assumed
+ * to have a cache line size of 32 bytes unless modified by target.mk.
+ * 
+ */
+
+#if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+/* X86 processors have special instructions that tell us about the cache */
+#include "string.h"
+
+#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+#define AMD_64 1
+#endif
+
+/* Generic CPUID function */
+#if defined(AMD_64)
+
+#if defined(__GNUC__)
+
+void freebl_cpuid(unsigned long op, unsigned long *eax, 
+	                 unsigned long *ebx, unsigned long *ecx, 
+                         unsigned long *edx)
+{
+	__asm__("cpuid\n\t"
+		: "=a" (*eax),
+		  "=b" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (op));
+}
+
+#elif defined(_MSC_VER)
+
+#include <intrin.h>
+
+void freebl_cpuid(unsigned long op, unsigned long *eax, 
+           unsigned long *ebx, unsigned long *ecx, 
+           unsigned long *edx)
+{
+    int intrinsic_out[4];
+
+    __cpuid(intrinsic_out, op);
+    *eax = intrinsic_out[0];
+    *ebx = intrinsic_out[1];
+    *ecx = intrinsic_out[2];
+    *edx = intrinsic_out[3];
+}
+
+#endif
+
+#else /* !defined(AMD_64) */
+
+/* x86 */
+
+#if defined(__GNUC__)
+void freebl_cpuid(unsigned long op, unsigned long *eax, 
+	                 unsigned long *ebx, unsigned long *ecx, 
+                         unsigned long *edx)
+{
+/* sigh GCC isn't smart enough to save the ebx PIC register on it's own
+ * in this case, so do it by hand. Use edi to store ebx and pass the
+ * value returned in ebx from cpuid through edi. */
+	__asm__("mov %%ebx,%%edi\n\t"
+		  "cpuid\n\t"
+		  "xchgl %%ebx,%%edi\n\t"
+		: "=a" (*eax),
+		  "=D" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (op));
+}
+
+/*
+ * try flipping a processor flag to determine CPU type
+ */
+static unsigned long changeFlag(unsigned long flag)
+{
+	unsigned long changedFlags, originalFlags;
+	__asm__("pushfl\n\t"            /* get the flags */
+	        "popl %0\n\t"
+	        "movl %0,%1\n\t"	/* save the original flags */
+	        "xorl %2,%0\n\t" 	/* flip the bit */
+		"pushl %0\n\t"  	/* set the flags */
+	        "popfl\n\t"
+		"pushfl\n\t"		/* get the flags again (for return) */
+		"popl %0\n\t"
+		"pushl %1\n\t"		/* restore the original flags */
+		 "popfl\n\t"
+		: "=r" (changedFlags),
+		  "=r" (originalFlags),
+		  "=r" (flag)
+		: "2" (flag));
+	return changedFlags ^ originalFlags;
+}
+
+#elif defined(_MSC_VER)
+
+/*
+ * windows versions of the above assembler
+ */
+#define wcpuid __asm __emit 0fh __asm __emit 0a2h
+void freebl_cpuid(unsigned long op,    unsigned long *Reax, 
+    unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
+{
+        unsigned long  Leax, Lebx, Lecx, Ledx;
+        __asm {
+        pushad
+        mov     eax,op
+        wcpuid
+        mov     Leax,eax
+        mov     Lebx,ebx
+        mov     Lecx,ecx
+        mov     Ledx,edx
+        popad
+        }
+        *Reax = Leax;
+        *Rebx = Lebx;
+        *Recx = Lecx;
+        *Redx = Ledx;
+}
+
+static unsigned long changeFlag(unsigned long flag)
+{
+	unsigned long changedFlags, originalFlags;
+	__asm {
+		push eax
+		push ebx
+		pushfd 	                /* get the flags */
+	        pop  eax
+		push eax		/* save the flags on the stack */
+	        mov  originalFlags,eax  /* save the original flags */
+		mov  ebx,flag
+	        xor  eax,ebx            /* flip the bit */
+		push eax                /* set the flags */
+	        popfd
+		pushfd                  /* get the flags again (for return) */
+		pop  eax	
+		popfd                   /* restore the original flags */
+		mov changedFlags,eax
+		pop ebx
+		pop eax
+	}
+	return changedFlags ^ originalFlags;
+}
+#endif
+
+#endif
+
+#if !defined(AMD_64)
+#define AC_FLAG 0x40000
+#define ID_FLAG 0x200000
+
+/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
+static int is386()
+{
+    return changeFlag(AC_FLAG) == 0;
+}
+
+/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
+static int is486()
+{
+    return changeFlag(ID_FLAG) == 0;
+}
+#endif
+
+
+/*
+ * table for Intel Cache.
+ * See Intel Application Note AP-485 for more information 
+ */
+
+typedef unsigned char CacheTypeEntry;
+
+typedef enum {
+    Cache_NONE    = 0,
+    Cache_UNKNOWN = 1,
+    Cache_TLB     = 2,
+    Cache_TLBi    = 3,
+    Cache_TLBd    = 4,
+    Cache_Trace   = 5,
+    Cache_L1      = 6,
+    Cache_L1i     = 7,
+    Cache_L1d     = 8,
+    Cache_L2      = 9 ,
+    Cache_L2i     = 10 ,
+    Cache_L2d     = 11 ,
+    Cache_L3      = 12 ,
+    Cache_L3i     = 13,
+    Cache_L3d     = 14
+} CacheType;
+
+struct _cache {
+    CacheTypeEntry type;
+    unsigned char lineSize;
+};
+static const struct _cache CacheMap[256] = {
+/* 00 */ {Cache_NONE,    0   },
+/* 01 */ {Cache_TLBi,    0   },
+/* 02 */ {Cache_TLBi,    0   },
+/* 03 */ {Cache_TLBd,    0   },
+/* 04 */ {Cache_TLBd,        },
+/* 05 */ {Cache_UNKNOWN, 0   },
+/* 06 */ {Cache_L1i,     32  },
+/* 07 */ {Cache_UNKNOWN, 0   },
+/* 08 */ {Cache_L1i,     32  },
+/* 09 */ {Cache_UNKNOWN, 0   },
+/* 0a */ {Cache_L1d,     32  },
+/* 0b */ {Cache_UNKNOWN, 0   },
+/* 0c */ {Cache_L1d,     32  },
+/* 0d */ {Cache_UNKNOWN, 0   },
+/* 0e */ {Cache_UNKNOWN, 0   },
+/* 0f */ {Cache_UNKNOWN, 0   },
+/* 10 */ {Cache_UNKNOWN, 0   },
+/* 11 */ {Cache_UNKNOWN, 0   },
+/* 12 */ {Cache_UNKNOWN, 0   },
+/* 13 */ {Cache_UNKNOWN, 0   },
+/* 14 */ {Cache_UNKNOWN, 0   },
+/* 15 */ {Cache_UNKNOWN, 0   },
+/* 16 */ {Cache_UNKNOWN, 0   },
+/* 17 */ {Cache_UNKNOWN, 0   },
+/* 18 */ {Cache_UNKNOWN, 0   },
+/* 19 */ {Cache_UNKNOWN, 0   },
+/* 1a */ {Cache_UNKNOWN, 0   },
+/* 1b */ {Cache_UNKNOWN, 0   },
+/* 1c */ {Cache_UNKNOWN, 0   },
+/* 1d */ {Cache_UNKNOWN, 0   },
+/* 1e */ {Cache_UNKNOWN, 0   },
+/* 1f */ {Cache_UNKNOWN, 0   },
+/* 20 */ {Cache_UNKNOWN, 0   },
+/* 21 */ {Cache_UNKNOWN, 0   },
+/* 22 */ {Cache_L3,      64  },
+/* 23 */ {Cache_L3,      64  },
+/* 24 */ {Cache_UNKNOWN, 0   },
+/* 25 */ {Cache_L3,      64  },
+/* 26 */ {Cache_UNKNOWN, 0   },
+/* 27 */ {Cache_UNKNOWN, 0   },
+/* 28 */ {Cache_UNKNOWN, 0   },
+/* 29 */ {Cache_L3,      64  },
+/* 2a */ {Cache_UNKNOWN, 0   },
+/* 2b */ {Cache_UNKNOWN, 0   },
+/* 2c */ {Cache_L1d,     64  },
+/* 2d */ {Cache_UNKNOWN, 0   },
+/* 2e */ {Cache_UNKNOWN, 0   },
+/* 2f */ {Cache_UNKNOWN, 0   },
+/* 30 */ {Cache_L1i,     64  },
+/* 31 */ {Cache_UNKNOWN, 0   },
+/* 32 */ {Cache_UNKNOWN, 0   },
+/* 33 */ {Cache_UNKNOWN, 0   },
+/* 34 */ {Cache_UNKNOWN, 0   },
+/* 35 */ {Cache_UNKNOWN, 0   },
+/* 36 */ {Cache_UNKNOWN, 0   },
+/* 37 */ {Cache_UNKNOWN, 0   },
+/* 38 */ {Cache_UNKNOWN, 0   },
+/* 39 */ {Cache_L2,      64  },
+/* 3a */ {Cache_UNKNOWN, 0   },
+/* 3b */ {Cache_L2,      64  },
+/* 3c */ {Cache_L2,      64  },
+/* 3d */ {Cache_UNKNOWN, 0   },
+/* 3e */ {Cache_UNKNOWN, 0   },
+/* 3f */ {Cache_UNKNOWN, 0   },
+/* 40 */ {Cache_L2,      0   },
+/* 41 */ {Cache_L2,      32  },
+/* 42 */ {Cache_L2,      32  },
+/* 43 */ {Cache_L2,      32  },
+/* 44 */ {Cache_L2,      32  },
+/* 45 */ {Cache_L2,      32  },
+/* 46 */ {Cache_UNKNOWN, 0   },
+/* 47 */ {Cache_UNKNOWN, 0   },
+/* 48 */ {Cache_UNKNOWN, 0   },
+/* 49 */ {Cache_UNKNOWN, 0   },
+/* 4a */ {Cache_UNKNOWN, 0   },
+/* 4b */ {Cache_UNKNOWN, 0   },
+/* 4c */ {Cache_UNKNOWN, 0   },
+/* 4d */ {Cache_UNKNOWN, 0   },
+/* 4e */ {Cache_UNKNOWN, 0   },
+/* 4f */ {Cache_UNKNOWN, 0   },
+/* 50 */ {Cache_TLBi,    0   },
+/* 51 */ {Cache_TLBi,    0   },
+/* 52 */ {Cache_TLBi,    0   },
+/* 53 */ {Cache_UNKNOWN, 0   },
+/* 54 */ {Cache_UNKNOWN, 0   },
+/* 55 */ {Cache_UNKNOWN, 0   },
+/* 56 */ {Cache_UNKNOWN, 0   },
+/* 57 */ {Cache_UNKNOWN, 0   },
+/* 58 */ {Cache_UNKNOWN, 0   },
+/* 59 */ {Cache_UNKNOWN, 0   },
+/* 5a */ {Cache_UNKNOWN, 0   },
+/* 5b */ {Cache_TLBd,    0   },
+/* 5c */ {Cache_TLBd,    0   },
+/* 5d */ {Cache_TLBd,    0   },
+/* 5e */ {Cache_UNKNOWN, 0   },
+/* 5f */ {Cache_UNKNOWN, 0   },
+/* 60 */ {Cache_UNKNOWN, 0   },
+/* 61 */ {Cache_UNKNOWN, 0   },
+/* 62 */ {Cache_UNKNOWN, 0   },
+/* 63 */ {Cache_UNKNOWN, 0   },
+/* 64 */ {Cache_UNKNOWN, 0   },
+/* 65 */ {Cache_UNKNOWN, 0   },
+/* 66 */ {Cache_L1d,     64  },
+/* 67 */ {Cache_L1d,     64  },
+/* 68 */ {Cache_L1d,     64  },
+/* 69 */ {Cache_UNKNOWN, 0   },
+/* 6a */ {Cache_UNKNOWN, 0   },
+/* 6b */ {Cache_UNKNOWN, 0   },
+/* 6c */ {Cache_UNKNOWN, 0   },
+/* 6d */ {Cache_UNKNOWN, 0   },
+/* 6e */ {Cache_UNKNOWN, 0   },
+/* 6f */ {Cache_UNKNOWN, 0   },
+/* 70 */ {Cache_Trace,   1   },
+/* 71 */ {Cache_Trace,   1   },
+/* 72 */ {Cache_Trace,   1   },
+/* 73 */ {Cache_UNKNOWN, 0   },
+/* 74 */ {Cache_UNKNOWN, 0   },
+/* 75 */ {Cache_UNKNOWN, 0   },
+/* 76 */ {Cache_UNKNOWN, 0   },
+/* 77 */ {Cache_UNKNOWN, 0   },
+/* 78 */ {Cache_UNKNOWN, 0   },
+/* 79 */ {Cache_L2,      64  },
+/* 7a */ {Cache_L2,      64  },
+/* 7b */ {Cache_L2,      64  },
+/* 7c */ {Cache_L2,      64  },
+/* 7d */ {Cache_UNKNOWN, 0   },
+/* 7e */ {Cache_UNKNOWN, 0   },
+/* 7f */ {Cache_UNKNOWN, 0   },
+/* 80 */ {Cache_UNKNOWN, 0   },
+/* 81 */ {Cache_UNKNOWN, 0   },
+/* 82 */ {Cache_L2,      32  },
+/* 83 */ {Cache_L2,      32  },
+/* 84 */ {Cache_L2,      32  },
+/* 85 */ {Cache_L2,      32  },
+/* 86 */ {Cache_L2,      64  },
+/* 87 */ {Cache_L2,      64  },
+/* 88 */ {Cache_UNKNOWN, 0   },
+/* 89 */ {Cache_UNKNOWN, 0   },
+/* 8a */ {Cache_UNKNOWN, 0   },
+/* 8b */ {Cache_UNKNOWN, 0   },
+/* 8c */ {Cache_UNKNOWN, 0   },
+/* 8d */ {Cache_UNKNOWN, 0   },
+/* 8e */ {Cache_UNKNOWN, 0   },
+/* 8f */ {Cache_UNKNOWN, 0   },
+/* 90 */ {Cache_UNKNOWN, 0   },
+/* 91 */ {Cache_UNKNOWN, 0   },
+/* 92 */ {Cache_UNKNOWN, 0   },
+/* 93 */ {Cache_UNKNOWN, 0   },
+/* 94 */ {Cache_UNKNOWN, 0   },
+/* 95 */ {Cache_UNKNOWN, 0   },
+/* 96 */ {Cache_UNKNOWN, 0   },
+/* 97 */ {Cache_UNKNOWN, 0   },
+/* 98 */ {Cache_UNKNOWN, 0   },
+/* 99 */ {Cache_UNKNOWN, 0   },
+/* 9a */ {Cache_UNKNOWN, 0   },
+/* 9b */ {Cache_UNKNOWN, 0   },
+/* 9c */ {Cache_UNKNOWN, 0   },
+/* 9d */ {Cache_UNKNOWN, 0   },
+/* 9e */ {Cache_UNKNOWN, 0   },
+/* 9f */ {Cache_UNKNOWN, 0   },
+/* a0 */ {Cache_UNKNOWN, 0   },
+/* a1 */ {Cache_UNKNOWN, 0   },
+/* a2 */ {Cache_UNKNOWN, 0   },
+/* a3 */ {Cache_UNKNOWN, 0   },
+/* a4 */ {Cache_UNKNOWN, 0   },
+/* a5 */ {Cache_UNKNOWN, 0   },
+/* a6 */ {Cache_UNKNOWN, 0   },
+/* a7 */ {Cache_UNKNOWN, 0   },
+/* a8 */ {Cache_UNKNOWN, 0   },
+/* a9 */ {Cache_UNKNOWN, 0   },
+/* aa */ {Cache_UNKNOWN, 0   },
+/* ab */ {Cache_UNKNOWN, 0   },
+/* ac */ {Cache_UNKNOWN, 0   },
+/* ad */ {Cache_UNKNOWN, 0   },
+/* ae */ {Cache_UNKNOWN, 0   },
+/* af */ {Cache_UNKNOWN, 0   },
+/* b0 */ {Cache_TLBi,    0   },
+/* b1 */ {Cache_UNKNOWN, 0   },
+/* b2 */ {Cache_UNKNOWN, 0   },
+/* b3 */ {Cache_TLBd,    0   },
+/* b4 */ {Cache_UNKNOWN, 0   },
+/* b5 */ {Cache_UNKNOWN, 0   },
+/* b6 */ {Cache_UNKNOWN, 0   },
+/* b7 */ {Cache_UNKNOWN, 0   },
+/* b8 */ {Cache_UNKNOWN, 0   },
+/* b9 */ {Cache_UNKNOWN, 0   },
+/* ba */ {Cache_UNKNOWN, 0   },
+/* bb */ {Cache_UNKNOWN, 0   },
+/* bc */ {Cache_UNKNOWN, 0   },
+/* bd */ {Cache_UNKNOWN, 0   },
+/* be */ {Cache_UNKNOWN, 0   },
+/* bf */ {Cache_UNKNOWN, 0   },
+/* c0 */ {Cache_UNKNOWN, 0   },
+/* c1 */ {Cache_UNKNOWN, 0   },
+/* c2 */ {Cache_UNKNOWN, 0   },
+/* c3 */ {Cache_UNKNOWN, 0   },
+/* c4 */ {Cache_UNKNOWN, 0   },
+/* c5 */ {Cache_UNKNOWN, 0   },
+/* c6 */ {Cache_UNKNOWN, 0   },
+/* c7 */ {Cache_UNKNOWN, 0   },
+/* c8 */ {Cache_UNKNOWN, 0   },
+/* c9 */ {Cache_UNKNOWN, 0   },
+/* ca */ {Cache_UNKNOWN, 0   },
+/* cb */ {Cache_UNKNOWN, 0   },
+/* cc */ {Cache_UNKNOWN, 0   },
+/* cd */ {Cache_UNKNOWN, 0   },
+/* ce */ {Cache_UNKNOWN, 0   },
+/* cf */ {Cache_UNKNOWN, 0   },
+/* d0 */ {Cache_UNKNOWN, 0   },
+/* d1 */ {Cache_UNKNOWN, 0   },
+/* d2 */ {Cache_UNKNOWN, 0   },
+/* d3 */ {Cache_UNKNOWN, 0   },
+/* d4 */ {Cache_UNKNOWN, 0   },
+/* d5 */ {Cache_UNKNOWN, 0   },
+/* d6 */ {Cache_UNKNOWN, 0   },
+/* d7 */ {Cache_UNKNOWN, 0   },
+/* d8 */ {Cache_UNKNOWN, 0   },
+/* d9 */ {Cache_UNKNOWN, 0   },
+/* da */ {Cache_UNKNOWN, 0   },
+/* db */ {Cache_UNKNOWN, 0   },
+/* dc */ {Cache_UNKNOWN, 0   },
+/* dd */ {Cache_UNKNOWN, 0   },
+/* de */ {Cache_UNKNOWN, 0   },
+/* df */ {Cache_UNKNOWN, 0   },
+/* e0 */ {Cache_UNKNOWN, 0   },
+/* e1 */ {Cache_UNKNOWN, 0   },
+/* e2 */ {Cache_UNKNOWN, 0   },
+/* e3 */ {Cache_UNKNOWN, 0   },
+/* e4 */ {Cache_UNKNOWN, 0   },
+/* e5 */ {Cache_UNKNOWN, 0   },
+/* e6 */ {Cache_UNKNOWN, 0   },
+/* e7 */ {Cache_UNKNOWN, 0   },
+/* e8 */ {Cache_UNKNOWN, 0   },
+/* e9 */ {Cache_UNKNOWN, 0   },
+/* ea */ {Cache_UNKNOWN, 0   },
+/* eb */ {Cache_UNKNOWN, 0   },
+/* ec */ {Cache_UNKNOWN, 0   },
+/* ed */ {Cache_UNKNOWN, 0   },
+/* ee */ {Cache_UNKNOWN, 0   },
+/* ef */ {Cache_UNKNOWN, 0   },
+/* f0 */ {Cache_UNKNOWN, 0   },
+/* f1 */ {Cache_UNKNOWN, 0   },
+/* f2 */ {Cache_UNKNOWN, 0   },
+/* f3 */ {Cache_UNKNOWN, 0   },
+/* f4 */ {Cache_UNKNOWN, 0   },
+/* f5 */ {Cache_UNKNOWN, 0   },
+/* f6 */ {Cache_UNKNOWN, 0   },
+/* f7 */ {Cache_UNKNOWN, 0   },
+/* f8 */ {Cache_UNKNOWN, 0   },
+/* f9 */ {Cache_UNKNOWN, 0   },
+/* fa */ {Cache_UNKNOWN, 0   },
+/* fb */ {Cache_UNKNOWN, 0   },
+/* fc */ {Cache_UNKNOWN, 0   },
+/* fd */ {Cache_UNKNOWN, 0   },
+/* fe */ {Cache_UNKNOWN, 0   },
+/* ff */ {Cache_UNKNOWN, 0   }
+};
+
+
+/*
+ * use the above table to determine the CacheEntryLineSize.
+ */
+static void
+getIntelCacheEntryLineSize(unsigned long val, int *level, 
+						unsigned long *lineSize)
+{
+    CacheType type;
+
+    type = CacheMap[val].type;
+    /* only interested in data caches */
+    /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
+     * this data check has the side effect of rejecting that entry. If
+     * that wasn't the case, we could have to reject it explicitly */
+    if (CacheMap[val].lineSize == 0) {
+	return;
+    }
+    /* look at the caches, skip types we aren't interested in.
+     * if we already have a value for a lower level cache, skip the
+     * current entry */
+    if ((type == Cache_L1)|| (type == Cache_L1d)) {
+	*level = 1;
+	*lineSize = CacheMap[val].lineSize;
+    } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
+	*level = 2;
+	*lineSize = CacheMap[val].lineSize;
+    } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
+	*level = 3;
+	*lineSize = CacheMap[val].lineSize;
+    }
+    return;
+}
+
+
+static void
+getIntelRegisterCacheLineSize(unsigned long val, 
+			int *level, unsigned long *lineSize)
+{
+    getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
+    getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
+    getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
+    getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
+}
+
+/*
+ * returns '0' if no recognized cache is found, or if the cache
+ * information is supported by this processor 
+ */
+static unsigned long
+getIntelCacheLineSize(int cpuidLevel)
+{
+    int level = 4;
+    unsigned long lineSize = 0;
+    unsigned long eax, ebx, ecx, edx;
+    int repeat, count;
+
+    if (cpuidLevel < 2) {
+	return 0;
+    }
+
+    /* command '2' of the cpuid is intel's cache info call. Each byte of the
+     * 4 registers contain a potential descriptor for the cache. The CacheMap	
+     * table maps the cache entry with the processor cache. Register 'al'
+     * contains a count value that cpuid '2' needs to be called in order to 
+     * find all the cache descriptors. Only registers with the high bit set
+     * to 'zero' have valid descriptors. This code loops through all the
+     * required calls to cpuid '2' and passes any valid descriptors it finds
+     * to the getIntelRegisterCacheLineSize code, which breaks the registers
+     * down into their component descriptors. In the end the lineSize of the
+     * lowest level cache data cache is returned. */
+    freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
+    repeat = eax & 0xf;
+    for (count = 0; count < repeat; count++) {
+	if ((eax & 0x80000000) == 0) {
+	    getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
+	}
+	if ((ebx & 0x80000000) == 0) {
+	    getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
+	}
+	if ((ecx & 0x80000000) == 0) {
+	    getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
+	}
+	if ((edx & 0x80000000) == 0) {
+	    getIntelRegisterCacheLineSize(edx, &level, &lineSize);
+	}
+	if (count+1 != repeat) {
+	    freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
+	}
+    }
+    return lineSize;
+}
+
+/*
+ * returns '0' if the cache info is not supported by this processor.
+ * This is based on the AMD extended cache commands for cpuid. 
+ * (see "AMD Processor Recognition Application Note" Publication 20734).
+ * Some other processors use the identical scheme.
+ * (see "Processor Recognition, Transmeta Corporation").
+ */
+static unsigned long
+getOtherCacheLineSize(unsigned long cpuidLevel)
+{
+    unsigned long lineSize = 0;
+    unsigned long eax, ebx, ecx, edx;
+
+    /* get the Extended CPUID level */
+    freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+    cpuidLevel = eax;
+
+    if (cpuidLevel >= 0x80000005) {
+	freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+	lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
+    }
+    return lineSize;
+}
+
+static const char * const manMap[] = {
+#define INTEL     0
+    "GenuineIntel",
+#define AMD       1
+    "AuthenticAMD",
+#define CYRIX     2
+    "CyrixInstead",
+#define CENTAUR   2
+    "CentaurHauls",
+#define NEXGEN    3
+    "NexGenDriven",
+#define TRANSMETA 4
+    "GenuineTMx86",
+#define RISE      5
+    "RiseRiseRise",
+#define UMC       6
+    "UMC UMC UMC ",
+#define SIS       7
+    "Sis Sis Sis ",
+#define NATIONAL  8
+    "Geode by NSC",
+};
+
+static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]);
+
+
+#define MAN_UNKNOWN 9
+
+#if !defined(AMD_64)
+#define SSE2_FLAG (1<<26)
+unsigned long
+s_mpi_is_sse2()
+{
+    unsigned long eax, ebx, ecx, edx;
+    int manufacturer = MAN_UNKNOWN;
+    int i;
+    char string[13];
+
+    if (is386() || is486()) {
+	return 0;
+    }
+    freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
+    /* string holds the CPU's manufacturer ID string - a twelve
+     * character ASCII string stored in ebx, edx, ecx, and
+     * the 32-bit extended feature flags are in edx, ecx.
+     */
+    *(int *)string = ebx;
+    *(int *)&string[4] = (int)edx;
+    *(int *)&string[8] = (int)ecx;
+    string[12] = 0;
+
+    /* has no SSE2 extensions */
+    if (eax == 0) {
+	return 0;
+    }
+
+    for (i=0; i < n_manufacturers; i++) {
+	if ( strcmp(manMap[i],string) == 0) {
+	    manufacturer = i;
+	    break;
+	}
+    }
+
+    freebl_cpuid(1,&eax,&ebx,&ecx,&edx);
+    return (edx & SSE2_FLAG) == SSE2_FLAG;
+}
+#endif
+
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+    unsigned long eax, ebx, ecx, edx;
+    unsigned long cpuidLevel;
+    unsigned long cacheLineSize = 0;
+    int manufacturer = MAN_UNKNOWN;
+    int i;
+    char string[65];
+
+#if !defined(AMD_64)
+    if (is386()) {
+	return 0; /* 386 had no cache */
+    } if (is486()) {
+	return 32; /* really? need more info */
+    }
+#endif
+
+    /* Pentium, cpuid command is available */
+    freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
+    cpuidLevel = eax;
+    /* string holds the CPU's manufacturer ID string - a twelve
+     * character ASCII string stored in ebx, edx, ecx, and
+     * the 32-bit extended feature flags are in edx, ecx.
+     */
+    *(int *)string = ebx;
+    *(int *)&string[4] = (int)edx;
+    *(int *)&string[8] = (int)ecx;
+    string[12] = 0;
+
+    manufacturer = MAN_UNKNOWN;
+    for (i=0; i < n_manufacturers; i++) {
+	if ( strcmp(manMap[i],string) == 0) {
+	    manufacturer = i;
+	}
+    }
+
+    if (manufacturer == INTEL) {
+	cacheLineSize = getIntelCacheLineSize(cpuidLevel);
+    } else {
+	cacheLineSize = getOtherCacheLineSize(cpuidLevel);
+    }
+    /* doesn't support cache info based on cpuid. This means
+     * an old pentium class processor, which have cache lines of
+     * 32. If we learn differently, we can use a switch based on
+     * the Manufacturer id  */
+    if (cacheLineSize == 0) {
+	cacheLineSize = 32;
+    }
+    return cacheLineSize;
+}
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+#if defined(__ppc64__) 
+/*
+ *  Sigh, The PPC has some really nice features to help us determine cache
+ *  size, since it had lots of direct control functions to do so. The POWER
+ *  processor even has an instruction to do this, but it was dropped in
+ *  PowerPC. Unfortunately most of them are not available in user mode.
+ *
+ *  The dcbz function would be a great way to determine cache line size except
+ *  1) it only works on write-back memory (it throws an exception otherwise), 
+ *  and 2) because so many mac programs 'knew' the processor cache size was
+ *  32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
+ *  G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
+ *  these programs happy. dcbzl work if 64 bit instructions are supported.
+ *  If you know 64 bit instructions are supported, and that stack is 
+ *  write-back, you can use this code.
+ */
+#include "memory.h"
+
+/* clear the cache line that contains 'array' */
+static inline void dcbzl(char *array)
+{
+	register char *a asm("r2") = array;
+	__asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) );
+}
+
+
+#define PPC_DO_ALIGN(x,y) ((char *)\
+			((((long long) (x))+((y)-1))&~((y)-1)))
+
+#define PPC_MAX_LINE_SIZE 256
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+    char testArray[2*PPC_MAX_LINE_SIZE+1];
+    char *test;
+    int i;
+
+    /* align the array on a maximum line size boundary, so we
+     * know we are starting to clear from the first address */
+    test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); 
+    /* set all the values to 1's */
+    memset(test, 0xff, PPC_MAX_LINE_SIZE);
+    /* clear one cache block starting at 'test' */
+    dcbzl(test);
+
+    /* find the size of the cleared area, that's our block size */
+    for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) {
+	if (test[i-1] == 0) {
+	    return i;
+	}
+    }
+    return 0;
+}
+
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+
+/*
+ * put other processor and platform specific cache code here
+ * return the smallest cache line size in bytes on the processor 
+ * (usually the L1 cache). If the OS has a call, this would be
+ * a greate place to put it.
+ *
+ * If there is no cache, return 0;
+ * 
+ * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
+ * below aren't compiled.
+ *
+ */
+
+
+/* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or 
+ * OS */
+#if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
+
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+   return MPI_CACHE_LINE_SIZE;
+}
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+
+/* If no way to get the processor cache line size has been defined, assume
+ * it's 32 bytes (most common value, does not significantly impact performance)
+ */ 
+#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+   return 32;
+}
+#endif
+
+#ifdef TEST_IT
+#include <stdio.h>
+
+main()
+{
+    printf("line size = %d\n", s_mpi_getProcessorLineSize());
+} 
+#endif
This site is hosted by Intevation GmbH (Datenschutzerklärung und Impressum | Privacy Policy and Imprint)