Mercurial > trustbridge > nss-cmake-static
comparison nss/lib/freebl/mpi/mpcpucache.c @ 0:1e5118fa0cb1
This is NSS with a Cmake Buildsyste
To compile a static NSS library for Windows we've used the
Chromium-NSS fork and added a Cmake buildsystem to compile
it statically for Windows. See README.chromium for chromium
changes and README.trustbridge for our modifications.
author | Andre Heinecke <andre.heinecke@intevation.de> |
---|---|
date | Mon, 28 Jul 2014 10:47:06 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1e5118fa0cb1 |
---|---|
1 /* This Source Code Form is subject to the terms of the Mozilla Public | |
2 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | |
4 | |
5 #include "mpi.h" | |
6 | |
7 /* | |
8 * This file implements a single function: s_mpi_getProcessorLineSize(); | |
9 * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line | |
10 * if a cache exists, or zero if there is no cache. If more than one | |
11 * cache line exists, it should return the smallest line size (which is | |
12 * usually the L1 cache). | |
13 * | |
14 * mp_modexp uses this information to make sure that private key information | |
15 * isn't being leaked through the cache. | |
16 * | |
17 * Currently the file returns good data for most modern x86 processors, and | |
18 * reasonable data on 64-bit ppc processors. All other processors are assumed | |
19 * to have a cache line size of 32 bytes unless modified by target.mk. | |
20 * | |
21 */ | |
22 | |
23 #if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) | |
24 /* X86 processors have special instructions that tell us about the cache */ | |
25 #include "string.h" | |
26 | |
27 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) | |
28 #define AMD_64 1 | |
29 #endif | |
30 | |
31 /* Generic CPUID function */ | |
32 #if defined(AMD_64) | |
33 | |
34 #if defined(__GNUC__) | |
35 | |
36 void freebl_cpuid(unsigned long op, unsigned long *eax, | |
37 unsigned long *ebx, unsigned long *ecx, | |
38 unsigned long *edx) | |
39 { | |
40 __asm__("cpuid\n\t" | |
41 : "=a" (*eax), | |
42 "=b" (*ebx), | |
43 "=c" (*ecx), | |
44 "=d" (*edx) | |
45 : "0" (op)); | |
46 } | |
47 | |
48 #elif defined(_MSC_VER) | |
49 | |
50 #include <intrin.h> | |
51 | |
52 void freebl_cpuid(unsigned long op, unsigned long *eax, | |
53 unsigned long *ebx, unsigned long *ecx, | |
54 unsigned long *edx) | |
55 { | |
56 int intrinsic_out[4]; | |
57 | |
58 __cpuid(intrinsic_out, op); | |
59 *eax = intrinsic_out[0]; | |
60 *ebx = intrinsic_out[1]; | |
61 *ecx = intrinsic_out[2]; | |
62 *edx = intrinsic_out[3]; | |
63 } | |
64 | |
65 #endif | |
66 | |
67 #else /* !defined(AMD_64) */ | |
68 | |
69 /* x86 */ | |
70 | |
71 #if defined(__GNUC__) | |
72 void freebl_cpuid(unsigned long op, unsigned long *eax, | |
73 unsigned long *ebx, unsigned long *ecx, | |
74 unsigned long *edx) | |
75 { | |
76 /* sigh GCC isn't smart enough to save the ebx PIC register on it's own | |
77 * in this case, so do it by hand. Use edi to store ebx and pass the | |
78 * value returned in ebx from cpuid through edi. */ | |
79 __asm__("mov %%ebx,%%edi\n\t" | |
80 "cpuid\n\t" | |
81 "xchgl %%ebx,%%edi\n\t" | |
82 : "=a" (*eax), | |
83 "=D" (*ebx), | |
84 "=c" (*ecx), | |
85 "=d" (*edx) | |
86 : "0" (op)); | |
87 } | |
88 | |
89 /* | |
90 * try flipping a processor flag to determine CPU type | |
91 */ | |
92 static unsigned long changeFlag(unsigned long flag) | |
93 { | |
94 unsigned long changedFlags, originalFlags; | |
95 __asm__("pushfl\n\t" /* get the flags */ | |
96 "popl %0\n\t" | |
97 "movl %0,%1\n\t" /* save the original flags */ | |
98 "xorl %2,%0\n\t" /* flip the bit */ | |
99 "pushl %0\n\t" /* set the flags */ | |
100 "popfl\n\t" | |
101 "pushfl\n\t" /* get the flags again (for return) */ | |
102 "popl %0\n\t" | |
103 "pushl %1\n\t" /* restore the original flags */ | |
104 "popfl\n\t" | |
105 : "=r" (changedFlags), | |
106 "=r" (originalFlags), | |
107 "=r" (flag) | |
108 : "2" (flag)); | |
109 return changedFlags ^ originalFlags; | |
110 } | |
111 | |
112 #elif defined(_MSC_VER) | |
113 | |
114 /* | |
115 * windows versions of the above assembler | |
116 */ | |
117 #define wcpuid __asm __emit 0fh __asm __emit 0a2h | |
118 void freebl_cpuid(unsigned long op, unsigned long *Reax, | |
119 unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) | |
120 { | |
121 unsigned long Leax, Lebx, Lecx, Ledx; | |
122 __asm { | |
123 pushad | |
124 mov eax,op | |
125 wcpuid | |
126 mov Leax,eax | |
127 mov Lebx,ebx | |
128 mov Lecx,ecx | |
129 mov Ledx,edx | |
130 popad | |
131 } | |
132 *Reax = Leax; | |
133 *Rebx = Lebx; | |
134 *Recx = Lecx; | |
135 *Redx = Ledx; | |
136 } | |
137 | |
138 static unsigned long changeFlag(unsigned long flag) | |
139 { | |
140 unsigned long changedFlags, originalFlags; | |
141 __asm { | |
142 push eax | |
143 push ebx | |
144 pushfd /* get the flags */ | |
145 pop eax | |
146 push eax /* save the flags on the stack */ | |
147 mov originalFlags,eax /* save the original flags */ | |
148 mov ebx,flag | |
149 xor eax,ebx /* flip the bit */ | |
150 push eax /* set the flags */ | |
151 popfd | |
152 pushfd /* get the flags again (for return) */ | |
153 pop eax | |
154 popfd /* restore the original flags */ | |
155 mov changedFlags,eax | |
156 pop ebx | |
157 pop eax | |
158 } | |
159 return changedFlags ^ originalFlags; | |
160 } | |
161 #endif | |
162 | |
163 #endif | |
164 | |
165 #if !defined(AMD_64) | |
166 #define AC_FLAG 0x40000 | |
167 #define ID_FLAG 0x200000 | |
168 | |
169 /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ | |
170 static int is386() | |
171 { | |
172 return changeFlag(AC_FLAG) == 0; | |
173 } | |
174 | |
175 /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ | |
176 static int is486() | |
177 { | |
178 return changeFlag(ID_FLAG) == 0; | |
179 } | |
180 #endif | |
181 | |
182 | |
183 /* | |
184 * table for Intel Cache. | |
185 * See Intel Application Note AP-485 for more information | |
186 */ | |
187 | |
188 typedef unsigned char CacheTypeEntry; | |
189 | |
190 typedef enum { | |
191 Cache_NONE = 0, | |
192 Cache_UNKNOWN = 1, | |
193 Cache_TLB = 2, | |
194 Cache_TLBi = 3, | |
195 Cache_TLBd = 4, | |
196 Cache_Trace = 5, | |
197 Cache_L1 = 6, | |
198 Cache_L1i = 7, | |
199 Cache_L1d = 8, | |
200 Cache_L2 = 9 , | |
201 Cache_L2i = 10 , | |
202 Cache_L2d = 11 , | |
203 Cache_L3 = 12 , | |
204 Cache_L3i = 13, | |
205 Cache_L3d = 14 | |
206 } CacheType; | |
207 | |
208 struct _cache { | |
209 CacheTypeEntry type; | |
210 unsigned char lineSize; | |
211 }; | |
212 static const struct _cache CacheMap[256] = { | |
213 /* 00 */ {Cache_NONE, 0 }, | |
214 /* 01 */ {Cache_TLBi, 0 }, | |
215 /* 02 */ {Cache_TLBi, 0 }, | |
216 /* 03 */ {Cache_TLBd, 0 }, | |
217 /* 04 */ {Cache_TLBd, }, | |
218 /* 05 */ {Cache_UNKNOWN, 0 }, | |
219 /* 06 */ {Cache_L1i, 32 }, | |
220 /* 07 */ {Cache_UNKNOWN, 0 }, | |
221 /* 08 */ {Cache_L1i, 32 }, | |
222 /* 09 */ {Cache_UNKNOWN, 0 }, | |
223 /* 0a */ {Cache_L1d, 32 }, | |
224 /* 0b */ {Cache_UNKNOWN, 0 }, | |
225 /* 0c */ {Cache_L1d, 32 }, | |
226 /* 0d */ {Cache_UNKNOWN, 0 }, | |
227 /* 0e */ {Cache_UNKNOWN, 0 }, | |
228 /* 0f */ {Cache_UNKNOWN, 0 }, | |
229 /* 10 */ {Cache_UNKNOWN, 0 }, | |
230 /* 11 */ {Cache_UNKNOWN, 0 }, | |
231 /* 12 */ {Cache_UNKNOWN, 0 }, | |
232 /* 13 */ {Cache_UNKNOWN, 0 }, | |
233 /* 14 */ {Cache_UNKNOWN, 0 }, | |
234 /* 15 */ {Cache_UNKNOWN, 0 }, | |
235 /* 16 */ {Cache_UNKNOWN, 0 }, | |
236 /* 17 */ {Cache_UNKNOWN, 0 }, | |
237 /* 18 */ {Cache_UNKNOWN, 0 }, | |
238 /* 19 */ {Cache_UNKNOWN, 0 }, | |
239 /* 1a */ {Cache_UNKNOWN, 0 }, | |
240 /* 1b */ {Cache_UNKNOWN, 0 }, | |
241 /* 1c */ {Cache_UNKNOWN, 0 }, | |
242 /* 1d */ {Cache_UNKNOWN, 0 }, | |
243 /* 1e */ {Cache_UNKNOWN, 0 }, | |
244 /* 1f */ {Cache_UNKNOWN, 0 }, | |
245 /* 20 */ {Cache_UNKNOWN, 0 }, | |
246 /* 21 */ {Cache_UNKNOWN, 0 }, | |
247 /* 22 */ {Cache_L3, 64 }, | |
248 /* 23 */ {Cache_L3, 64 }, | |
249 /* 24 */ {Cache_UNKNOWN, 0 }, | |
250 /* 25 */ {Cache_L3, 64 }, | |
251 /* 26 */ {Cache_UNKNOWN, 0 }, | |
252 /* 27 */ {Cache_UNKNOWN, 0 }, | |
253 /* 28 */ {Cache_UNKNOWN, 0 }, | |
254 /* 29 */ {Cache_L3, 64 }, | |
255 /* 2a */ {Cache_UNKNOWN, 0 }, | |
256 /* 2b */ {Cache_UNKNOWN, 0 }, | |
257 /* 2c */ {Cache_L1d, 64 }, | |
258 /* 2d */ {Cache_UNKNOWN, 0 }, | |
259 /* 2e */ {Cache_UNKNOWN, 0 }, | |
260 /* 2f */ {Cache_UNKNOWN, 0 }, | |
261 /* 30 */ {Cache_L1i, 64 }, | |
262 /* 31 */ {Cache_UNKNOWN, 0 }, | |
263 /* 32 */ {Cache_UNKNOWN, 0 }, | |
264 /* 33 */ {Cache_UNKNOWN, 0 }, | |
265 /* 34 */ {Cache_UNKNOWN, 0 }, | |
266 /* 35 */ {Cache_UNKNOWN, 0 }, | |
267 /* 36 */ {Cache_UNKNOWN, 0 }, | |
268 /* 37 */ {Cache_UNKNOWN, 0 }, | |
269 /* 38 */ {Cache_UNKNOWN, 0 }, | |
270 /* 39 */ {Cache_L2, 64 }, | |
271 /* 3a */ {Cache_UNKNOWN, 0 }, | |
272 /* 3b */ {Cache_L2, 64 }, | |
273 /* 3c */ {Cache_L2, 64 }, | |
274 /* 3d */ {Cache_UNKNOWN, 0 }, | |
275 /* 3e */ {Cache_UNKNOWN, 0 }, | |
276 /* 3f */ {Cache_UNKNOWN, 0 }, | |
277 /* 40 */ {Cache_L2, 0 }, | |
278 /* 41 */ {Cache_L2, 32 }, | |
279 /* 42 */ {Cache_L2, 32 }, | |
280 /* 43 */ {Cache_L2, 32 }, | |
281 /* 44 */ {Cache_L2, 32 }, | |
282 /* 45 */ {Cache_L2, 32 }, | |
283 /* 46 */ {Cache_UNKNOWN, 0 }, | |
284 /* 47 */ {Cache_UNKNOWN, 0 }, | |
285 /* 48 */ {Cache_UNKNOWN, 0 }, | |
286 /* 49 */ {Cache_UNKNOWN, 0 }, | |
287 /* 4a */ {Cache_UNKNOWN, 0 }, | |
288 /* 4b */ {Cache_UNKNOWN, 0 }, | |
289 /* 4c */ {Cache_UNKNOWN, 0 }, | |
290 /* 4d */ {Cache_UNKNOWN, 0 }, | |
291 /* 4e */ {Cache_UNKNOWN, 0 }, | |
292 /* 4f */ {Cache_UNKNOWN, 0 }, | |
293 /* 50 */ {Cache_TLBi, 0 }, | |
294 /* 51 */ {Cache_TLBi, 0 }, | |
295 /* 52 */ {Cache_TLBi, 0 }, | |
296 /* 53 */ {Cache_UNKNOWN, 0 }, | |
297 /* 54 */ {Cache_UNKNOWN, 0 }, | |
298 /* 55 */ {Cache_UNKNOWN, 0 }, | |
299 /* 56 */ {Cache_UNKNOWN, 0 }, | |
300 /* 57 */ {Cache_UNKNOWN, 0 }, | |
301 /* 58 */ {Cache_UNKNOWN, 0 }, | |
302 /* 59 */ {Cache_UNKNOWN, 0 }, | |
303 /* 5a */ {Cache_UNKNOWN, 0 }, | |
304 /* 5b */ {Cache_TLBd, 0 }, | |
305 /* 5c */ {Cache_TLBd, 0 }, | |
306 /* 5d */ {Cache_TLBd, 0 }, | |
307 /* 5e */ {Cache_UNKNOWN, 0 }, | |
308 /* 5f */ {Cache_UNKNOWN, 0 }, | |
309 /* 60 */ {Cache_UNKNOWN, 0 }, | |
310 /* 61 */ {Cache_UNKNOWN, 0 }, | |
311 /* 62 */ {Cache_UNKNOWN, 0 }, | |
312 /* 63 */ {Cache_UNKNOWN, 0 }, | |
313 /* 64 */ {Cache_UNKNOWN, 0 }, | |
314 /* 65 */ {Cache_UNKNOWN, 0 }, | |
315 /* 66 */ {Cache_L1d, 64 }, | |
316 /* 67 */ {Cache_L1d, 64 }, | |
317 /* 68 */ {Cache_L1d, 64 }, | |
318 /* 69 */ {Cache_UNKNOWN, 0 }, | |
319 /* 6a */ {Cache_UNKNOWN, 0 }, | |
320 /* 6b */ {Cache_UNKNOWN, 0 }, | |
321 /* 6c */ {Cache_UNKNOWN, 0 }, | |
322 /* 6d */ {Cache_UNKNOWN, 0 }, | |
323 /* 6e */ {Cache_UNKNOWN, 0 }, | |
324 /* 6f */ {Cache_UNKNOWN, 0 }, | |
325 /* 70 */ {Cache_Trace, 1 }, | |
326 /* 71 */ {Cache_Trace, 1 }, | |
327 /* 72 */ {Cache_Trace, 1 }, | |
328 /* 73 */ {Cache_UNKNOWN, 0 }, | |
329 /* 74 */ {Cache_UNKNOWN, 0 }, | |
330 /* 75 */ {Cache_UNKNOWN, 0 }, | |
331 /* 76 */ {Cache_UNKNOWN, 0 }, | |
332 /* 77 */ {Cache_UNKNOWN, 0 }, | |
333 /* 78 */ {Cache_UNKNOWN, 0 }, | |
334 /* 79 */ {Cache_L2, 64 }, | |
335 /* 7a */ {Cache_L2, 64 }, | |
336 /* 7b */ {Cache_L2, 64 }, | |
337 /* 7c */ {Cache_L2, 64 }, | |
338 /* 7d */ {Cache_UNKNOWN, 0 }, | |
339 /* 7e */ {Cache_UNKNOWN, 0 }, | |
340 /* 7f */ {Cache_UNKNOWN, 0 }, | |
341 /* 80 */ {Cache_UNKNOWN, 0 }, | |
342 /* 81 */ {Cache_UNKNOWN, 0 }, | |
343 /* 82 */ {Cache_L2, 32 }, | |
344 /* 83 */ {Cache_L2, 32 }, | |
345 /* 84 */ {Cache_L2, 32 }, | |
346 /* 85 */ {Cache_L2, 32 }, | |
347 /* 86 */ {Cache_L2, 64 }, | |
348 /* 87 */ {Cache_L2, 64 }, | |
349 /* 88 */ {Cache_UNKNOWN, 0 }, | |
350 /* 89 */ {Cache_UNKNOWN, 0 }, | |
351 /* 8a */ {Cache_UNKNOWN, 0 }, | |
352 /* 8b */ {Cache_UNKNOWN, 0 }, | |
353 /* 8c */ {Cache_UNKNOWN, 0 }, | |
354 /* 8d */ {Cache_UNKNOWN, 0 }, | |
355 /* 8e */ {Cache_UNKNOWN, 0 }, | |
356 /* 8f */ {Cache_UNKNOWN, 0 }, | |
357 /* 90 */ {Cache_UNKNOWN, 0 }, | |
358 /* 91 */ {Cache_UNKNOWN, 0 }, | |
359 /* 92 */ {Cache_UNKNOWN, 0 }, | |
360 /* 93 */ {Cache_UNKNOWN, 0 }, | |
361 /* 94 */ {Cache_UNKNOWN, 0 }, | |
362 /* 95 */ {Cache_UNKNOWN, 0 }, | |
363 /* 96 */ {Cache_UNKNOWN, 0 }, | |
364 /* 97 */ {Cache_UNKNOWN, 0 }, | |
365 /* 98 */ {Cache_UNKNOWN, 0 }, | |
366 /* 99 */ {Cache_UNKNOWN, 0 }, | |
367 /* 9a */ {Cache_UNKNOWN, 0 }, | |
368 /* 9b */ {Cache_UNKNOWN, 0 }, | |
369 /* 9c */ {Cache_UNKNOWN, 0 }, | |
370 /* 9d */ {Cache_UNKNOWN, 0 }, | |
371 /* 9e */ {Cache_UNKNOWN, 0 }, | |
372 /* 9f */ {Cache_UNKNOWN, 0 }, | |
373 /* a0 */ {Cache_UNKNOWN, 0 }, | |
374 /* a1 */ {Cache_UNKNOWN, 0 }, | |
375 /* a2 */ {Cache_UNKNOWN, 0 }, | |
376 /* a3 */ {Cache_UNKNOWN, 0 }, | |
377 /* a4 */ {Cache_UNKNOWN, 0 }, | |
378 /* a5 */ {Cache_UNKNOWN, 0 }, | |
379 /* a6 */ {Cache_UNKNOWN, 0 }, | |
380 /* a7 */ {Cache_UNKNOWN, 0 }, | |
381 /* a8 */ {Cache_UNKNOWN, 0 }, | |
382 /* a9 */ {Cache_UNKNOWN, 0 }, | |
383 /* aa */ {Cache_UNKNOWN, 0 }, | |
384 /* ab */ {Cache_UNKNOWN, 0 }, | |
385 /* ac */ {Cache_UNKNOWN, 0 }, | |
386 /* ad */ {Cache_UNKNOWN, 0 }, | |
387 /* ae */ {Cache_UNKNOWN, 0 }, | |
388 /* af */ {Cache_UNKNOWN, 0 }, | |
389 /* b0 */ {Cache_TLBi, 0 }, | |
390 /* b1 */ {Cache_UNKNOWN, 0 }, | |
391 /* b2 */ {Cache_UNKNOWN, 0 }, | |
392 /* b3 */ {Cache_TLBd, 0 }, | |
393 /* b4 */ {Cache_UNKNOWN, 0 }, | |
394 /* b5 */ {Cache_UNKNOWN, 0 }, | |
395 /* b6 */ {Cache_UNKNOWN, 0 }, | |
396 /* b7 */ {Cache_UNKNOWN, 0 }, | |
397 /* b8 */ {Cache_UNKNOWN, 0 }, | |
398 /* b9 */ {Cache_UNKNOWN, 0 }, | |
399 /* ba */ {Cache_UNKNOWN, 0 }, | |
400 /* bb */ {Cache_UNKNOWN, 0 }, | |
401 /* bc */ {Cache_UNKNOWN, 0 }, | |
402 /* bd */ {Cache_UNKNOWN, 0 }, | |
403 /* be */ {Cache_UNKNOWN, 0 }, | |
404 /* bf */ {Cache_UNKNOWN, 0 }, | |
405 /* c0 */ {Cache_UNKNOWN, 0 }, | |
406 /* c1 */ {Cache_UNKNOWN, 0 }, | |
407 /* c2 */ {Cache_UNKNOWN, 0 }, | |
408 /* c3 */ {Cache_UNKNOWN, 0 }, | |
409 /* c4 */ {Cache_UNKNOWN, 0 }, | |
410 /* c5 */ {Cache_UNKNOWN, 0 }, | |
411 /* c6 */ {Cache_UNKNOWN, 0 }, | |
412 /* c7 */ {Cache_UNKNOWN, 0 }, | |
413 /* c8 */ {Cache_UNKNOWN, 0 }, | |
414 /* c9 */ {Cache_UNKNOWN, 0 }, | |
415 /* ca */ {Cache_UNKNOWN, 0 }, | |
416 /* cb */ {Cache_UNKNOWN, 0 }, | |
417 /* cc */ {Cache_UNKNOWN, 0 }, | |
418 /* cd */ {Cache_UNKNOWN, 0 }, | |
419 /* ce */ {Cache_UNKNOWN, 0 }, | |
420 /* cf */ {Cache_UNKNOWN, 0 }, | |
421 /* d0 */ {Cache_UNKNOWN, 0 }, | |
422 /* d1 */ {Cache_UNKNOWN, 0 }, | |
423 /* d2 */ {Cache_UNKNOWN, 0 }, | |
424 /* d3 */ {Cache_UNKNOWN, 0 }, | |
425 /* d4 */ {Cache_UNKNOWN, 0 }, | |
426 /* d5 */ {Cache_UNKNOWN, 0 }, | |
427 /* d6 */ {Cache_UNKNOWN, 0 }, | |
428 /* d7 */ {Cache_UNKNOWN, 0 }, | |
429 /* d8 */ {Cache_UNKNOWN, 0 }, | |
430 /* d9 */ {Cache_UNKNOWN, 0 }, | |
431 /* da */ {Cache_UNKNOWN, 0 }, | |
432 /* db */ {Cache_UNKNOWN, 0 }, | |
433 /* dc */ {Cache_UNKNOWN, 0 }, | |
434 /* dd */ {Cache_UNKNOWN, 0 }, | |
435 /* de */ {Cache_UNKNOWN, 0 }, | |
436 /* df */ {Cache_UNKNOWN, 0 }, | |
437 /* e0 */ {Cache_UNKNOWN, 0 }, | |
438 /* e1 */ {Cache_UNKNOWN, 0 }, | |
439 /* e2 */ {Cache_UNKNOWN, 0 }, | |
440 /* e3 */ {Cache_UNKNOWN, 0 }, | |
441 /* e4 */ {Cache_UNKNOWN, 0 }, | |
442 /* e5 */ {Cache_UNKNOWN, 0 }, | |
443 /* e6 */ {Cache_UNKNOWN, 0 }, | |
444 /* e7 */ {Cache_UNKNOWN, 0 }, | |
445 /* e8 */ {Cache_UNKNOWN, 0 }, | |
446 /* e9 */ {Cache_UNKNOWN, 0 }, | |
447 /* ea */ {Cache_UNKNOWN, 0 }, | |
448 /* eb */ {Cache_UNKNOWN, 0 }, | |
449 /* ec */ {Cache_UNKNOWN, 0 }, | |
450 /* ed */ {Cache_UNKNOWN, 0 }, | |
451 /* ee */ {Cache_UNKNOWN, 0 }, | |
452 /* ef */ {Cache_UNKNOWN, 0 }, | |
453 /* f0 */ {Cache_UNKNOWN, 0 }, | |
454 /* f1 */ {Cache_UNKNOWN, 0 }, | |
455 /* f2 */ {Cache_UNKNOWN, 0 }, | |
456 /* f3 */ {Cache_UNKNOWN, 0 }, | |
457 /* f4 */ {Cache_UNKNOWN, 0 }, | |
458 /* f5 */ {Cache_UNKNOWN, 0 }, | |
459 /* f6 */ {Cache_UNKNOWN, 0 }, | |
460 /* f7 */ {Cache_UNKNOWN, 0 }, | |
461 /* f8 */ {Cache_UNKNOWN, 0 }, | |
462 /* f9 */ {Cache_UNKNOWN, 0 }, | |
463 /* fa */ {Cache_UNKNOWN, 0 }, | |
464 /* fb */ {Cache_UNKNOWN, 0 }, | |
465 /* fc */ {Cache_UNKNOWN, 0 }, | |
466 /* fd */ {Cache_UNKNOWN, 0 }, | |
467 /* fe */ {Cache_UNKNOWN, 0 }, | |
468 /* ff */ {Cache_UNKNOWN, 0 } | |
469 }; | |
470 | |
471 | |
472 /* | |
473 * use the above table to determine the CacheEntryLineSize. | |
474 */ | |
475 static void | |
476 getIntelCacheEntryLineSize(unsigned long val, int *level, | |
477 unsigned long *lineSize) | |
478 { | |
479 CacheType type; | |
480 | |
481 type = CacheMap[val].type; | |
482 /* only interested in data caches */ | |
483 /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. | |
484 * this data check has the side effect of rejecting that entry. If | |
485 * that wasn't the case, we could have to reject it explicitly */ | |
486 if (CacheMap[val].lineSize == 0) { | |
487 return; | |
488 } | |
489 /* look at the caches, skip types we aren't interested in. | |
490 * if we already have a value for a lower level cache, skip the | |
491 * current entry */ | |
492 if ((type == Cache_L1)|| (type == Cache_L1d)) { | |
493 *level = 1; | |
494 *lineSize = CacheMap[val].lineSize; | |
495 } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { | |
496 *level = 2; | |
497 *lineSize = CacheMap[val].lineSize; | |
498 } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { | |
499 *level = 3; | |
500 *lineSize = CacheMap[val].lineSize; | |
501 } | |
502 return; | |
503 } | |
504 | |
505 | |
506 static void | |
507 getIntelRegisterCacheLineSize(unsigned long val, | |
508 int *level, unsigned long *lineSize) | |
509 { | |
510 getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); | |
511 getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); | |
512 getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); | |
513 getIntelCacheEntryLineSize(val & 0xff, level, lineSize); | |
514 } | |
515 | |
516 /* | |
517 * returns '0' if no recognized cache is found, or if the cache | |
518 * information is supported by this processor | |
519 */ | |
520 static unsigned long | |
521 getIntelCacheLineSize(int cpuidLevel) | |
522 { | |
523 int level = 4; | |
524 unsigned long lineSize = 0; | |
525 unsigned long eax, ebx, ecx, edx; | |
526 int repeat, count; | |
527 | |
528 if (cpuidLevel < 2) { | |
529 return 0; | |
530 } | |
531 | |
532 /* command '2' of the cpuid is intel's cache info call. Each byte of the | |
533 * 4 registers contain a potential descriptor for the cache. The CacheMap | |
534 * table maps the cache entry with the processor cache. Register 'al' | |
535 * contains a count value that cpuid '2' needs to be called in order to | |
536 * find all the cache descriptors. Only registers with the high bit set | |
537 * to 'zero' have valid descriptors. This code loops through all the | |
538 * required calls to cpuid '2' and passes any valid descriptors it finds | |
539 * to the getIntelRegisterCacheLineSize code, which breaks the registers | |
540 * down into their component descriptors. In the end the lineSize of the | |
541 * lowest level cache data cache is returned. */ | |
542 freebl_cpuid(2, &eax, &ebx, &ecx, &edx); | |
543 repeat = eax & 0xf; | |
544 for (count = 0; count < repeat; count++) { | |
545 if ((eax & 0x80000000) == 0) { | |
546 getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); | |
547 } | |
548 if ((ebx & 0x80000000) == 0) { | |
549 getIntelRegisterCacheLineSize(ebx, &level, &lineSize); | |
550 } | |
551 if ((ecx & 0x80000000) == 0) { | |
552 getIntelRegisterCacheLineSize(ecx, &level, &lineSize); | |
553 } | |
554 if ((edx & 0x80000000) == 0) { | |
555 getIntelRegisterCacheLineSize(edx, &level, &lineSize); | |
556 } | |
557 if (count+1 != repeat) { | |
558 freebl_cpuid(2, &eax, &ebx, &ecx, &edx); | |
559 } | |
560 } | |
561 return lineSize; | |
562 } | |
563 | |
564 /* | |
565 * returns '0' if the cache info is not supported by this processor. | |
566 * This is based on the AMD extended cache commands for cpuid. | |
567 * (see "AMD Processor Recognition Application Note" Publication 20734). | |
568 * Some other processors use the identical scheme. | |
569 * (see "Processor Recognition, Transmeta Corporation"). | |
570 */ | |
571 static unsigned long | |
572 getOtherCacheLineSize(unsigned long cpuidLevel) | |
573 { | |
574 unsigned long lineSize = 0; | |
575 unsigned long eax, ebx, ecx, edx; | |
576 | |
577 /* get the Extended CPUID level */ | |
578 freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); | |
579 cpuidLevel = eax; | |
580 | |
581 if (cpuidLevel >= 0x80000005) { | |
582 freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); | |
583 lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ | |
584 } | |
585 return lineSize; | |
586 } | |
587 | |
588 static const char * const manMap[] = { | |
589 #define INTEL 0 | |
590 "GenuineIntel", | |
591 #define AMD 1 | |
592 "AuthenticAMD", | |
593 #define CYRIX 2 | |
594 "CyrixInstead", | |
595 #define CENTAUR 2 | |
596 "CentaurHauls", | |
597 #define NEXGEN 3 | |
598 "NexGenDriven", | |
599 #define TRANSMETA 4 | |
600 "GenuineTMx86", | |
601 #define RISE 5 | |
602 "RiseRiseRise", | |
603 #define UMC 6 | |
604 "UMC UMC UMC ", | |
605 #define SIS 7 | |
606 "Sis Sis Sis ", | |
607 #define NATIONAL 8 | |
608 "Geode by NSC", | |
609 }; | |
610 | |
611 static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]); | |
612 | |
613 | |
614 #define MAN_UNKNOWN 9 | |
615 | |
616 #if !defined(AMD_64) | |
617 #define SSE2_FLAG (1<<26) | |
618 unsigned long | |
619 s_mpi_is_sse2() | |
620 { | |
621 unsigned long eax, ebx, ecx, edx; | |
622 int manufacturer = MAN_UNKNOWN; | |
623 int i; | |
624 char string[13]; | |
625 | |
626 if (is386() || is486()) { | |
627 return 0; | |
628 } | |
629 freebl_cpuid(0, &eax, &ebx, &ecx, &edx); | |
630 /* string holds the CPU's manufacturer ID string - a twelve | |
631 * character ASCII string stored in ebx, edx, ecx, and | |
632 * the 32-bit extended feature flags are in edx, ecx. | |
633 */ | |
634 *(int *)string = ebx; | |
635 *(int *)&string[4] = (int)edx; | |
636 *(int *)&string[8] = (int)ecx; | |
637 string[12] = 0; | |
638 | |
639 /* has no SSE2 extensions */ | |
640 if (eax == 0) { | |
641 return 0; | |
642 } | |
643 | |
644 for (i=0; i < n_manufacturers; i++) { | |
645 if ( strcmp(manMap[i],string) == 0) { | |
646 manufacturer = i; | |
647 break; | |
648 } | |
649 } | |
650 | |
651 freebl_cpuid(1,&eax,&ebx,&ecx,&edx); | |
652 return (edx & SSE2_FLAG) == SSE2_FLAG; | |
653 } | |
654 #endif | |
655 | |
656 unsigned long | |
657 s_mpi_getProcessorLineSize() | |
658 { | |
659 unsigned long eax, ebx, ecx, edx; | |
660 unsigned long cpuidLevel; | |
661 unsigned long cacheLineSize = 0; | |
662 int manufacturer = MAN_UNKNOWN; | |
663 int i; | |
664 char string[65]; | |
665 | |
666 #if !defined(AMD_64) | |
667 if (is386()) { | |
668 return 0; /* 386 had no cache */ | |
669 } if (is486()) { | |
670 return 32; /* really? need more info */ | |
671 } | |
672 #endif | |
673 | |
674 /* Pentium, cpuid command is available */ | |
675 freebl_cpuid(0, &eax, &ebx, &ecx, &edx); | |
676 cpuidLevel = eax; | |
677 /* string holds the CPU's manufacturer ID string - a twelve | |
678 * character ASCII string stored in ebx, edx, ecx, and | |
679 * the 32-bit extended feature flags are in edx, ecx. | |
680 */ | |
681 *(int *)string = ebx; | |
682 *(int *)&string[4] = (int)edx; | |
683 *(int *)&string[8] = (int)ecx; | |
684 string[12] = 0; | |
685 | |
686 manufacturer = MAN_UNKNOWN; | |
687 for (i=0; i < n_manufacturers; i++) { | |
688 if ( strcmp(manMap[i],string) == 0) { | |
689 manufacturer = i; | |
690 } | |
691 } | |
692 | |
693 if (manufacturer == INTEL) { | |
694 cacheLineSize = getIntelCacheLineSize(cpuidLevel); | |
695 } else { | |
696 cacheLineSize = getOtherCacheLineSize(cpuidLevel); | |
697 } | |
698 /* doesn't support cache info based on cpuid. This means | |
699 * an old pentium class processor, which have cache lines of | |
700 * 32. If we learn differently, we can use a switch based on | |
701 * the Manufacturer id */ | |
702 if (cacheLineSize == 0) { | |
703 cacheLineSize = 32; | |
704 } | |
705 return cacheLineSize; | |
706 } | |
707 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 | |
708 #endif | |
709 | |
710 #if defined(__ppc64__) | |
711 /* | |
712 * Sigh, The PPC has some really nice features to help us determine cache | |
713 * size, since it had lots of direct control functions to do so. The POWER | |
714 * processor even has an instruction to do this, but it was dropped in | |
715 * PowerPC. Unfortunately most of them are not available in user mode. | |
716 * | |
717 * The dcbz function would be a great way to determine cache line size except | |
718 * 1) it only works on write-back memory (it throws an exception otherwise), | |
719 * and 2) because so many mac programs 'knew' the processor cache size was | |
720 * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new | |
721 * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep | |
722 * these programs happy. dcbzl work if 64 bit instructions are supported. | |
723 * If you know 64 bit instructions are supported, and that stack is | |
724 * write-back, you can use this code. | |
725 */ | |
726 #include "memory.h" | |
727 | |
728 /* clear the cache line that contains 'array' */ | |
729 static inline void dcbzl(char *array) | |
730 { | |
731 register char *a asm("r2") = array; | |
732 __asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) ); | |
733 } | |
734 | |
735 | |
736 #define PPC_DO_ALIGN(x,y) ((char *)\ | |
737 ((((long long) (x))+((y)-1))&~((y)-1))) | |
738 | |
739 #define PPC_MAX_LINE_SIZE 256 | |
740 unsigned long | |
741 s_mpi_getProcessorLineSize() | |
742 { | |
743 char testArray[2*PPC_MAX_LINE_SIZE+1]; | |
744 char *test; | |
745 int i; | |
746 | |
747 /* align the array on a maximum line size boundary, so we | |
748 * know we are starting to clear from the first address */ | |
749 test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); | |
750 /* set all the values to 1's */ | |
751 memset(test, 0xff, PPC_MAX_LINE_SIZE); | |
752 /* clear one cache block starting at 'test' */ | |
753 dcbzl(test); | |
754 | |
755 /* find the size of the cleared area, that's our block size */ | |
756 for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) { | |
757 if (test[i-1] == 0) { | |
758 return i; | |
759 } | |
760 } | |
761 return 0; | |
762 } | |
763 | |
764 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 | |
765 #endif | |
766 | |
767 | |
768 /* | |
769 * put other processor and platform specific cache code here | |
770 * return the smallest cache line size in bytes on the processor | |
771 * (usually the L1 cache). If the OS has a call, this would be | |
772 * a greate place to put it. | |
773 * | |
774 * If there is no cache, return 0; | |
775 * | |
776 * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions | |
777 * below aren't compiled. | |
778 * | |
779 */ | |
780 | |
781 | |
782 /* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or | |
783 * OS */ | |
784 #if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED) | |
785 | |
786 unsigned long | |
787 s_mpi_getProcessorLineSize() | |
788 { | |
789 return MPI_CACHE_LINE_SIZE; | |
790 } | |
791 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 | |
792 #endif | |
793 | |
794 | |
795 /* If no way to get the processor cache line size has been defined, assume | |
796 * it's 32 bytes (most common value, does not significantly impact performance) | |
797 */ | |
798 #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED | |
799 unsigned long | |
800 s_mpi_getProcessorLineSize() | |
801 { | |
802 return 32; | |
803 } | |
804 #endif | |
805 | |
806 #ifdef TEST_IT | |
807 #include <stdio.h> | |
808 | |
809 main() | |
810 { | |
811 printf("line size = %d\n", s_mpi_getProcessorLineSize()); | |
812 } | |
813 #endif |