comparison nss/lib/freebl/mpi/mpcpucache.c @ 0:1e5118fa0cb1

This is NSS with a Cmake Buildsyste To compile a static NSS library for Windows we've used the Chromium-NSS fork and added a Cmake buildsystem to compile it statically for Windows. See README.chromium for chromium changes and README.trustbridge for our modifications.
author Andre Heinecke <andre.heinecke@intevation.de>
date Mon, 28 Jul 2014 10:47:06 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1e5118fa0cb1
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5 #include "mpi.h"
6
7 /*
8 * This file implements a single function: s_mpi_getProcessorLineSize();
9 * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
10 * if a cache exists, or zero if there is no cache. If more than one
11 * cache line exists, it should return the smallest line size (which is
12 * usually the L1 cache).
13 *
14 * mp_modexp uses this information to make sure that private key information
15 * isn't being leaked through the cache.
16 *
17 * Currently the file returns good data for most modern x86 processors, and
18 * reasonable data on 64-bit ppc processors. All other processors are assumed
19 * to have a cache line size of 32 bytes unless modified by target.mk.
20 *
21 */
22
23 #if defined(i386) || defined(__i386) || defined(__X86__) || defined (_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
24 /* X86 processors have special instructions that tell us about the cache */
25 #include "string.h"
26
27 #if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
28 #define AMD_64 1
29 #endif
30
31 /* Generic CPUID function */
32 #if defined(AMD_64)
33
34 #if defined(__GNUC__)
35
36 void freebl_cpuid(unsigned long op, unsigned long *eax,
37 unsigned long *ebx, unsigned long *ecx,
38 unsigned long *edx)
39 {
40 __asm__("cpuid\n\t"
41 : "=a" (*eax),
42 "=b" (*ebx),
43 "=c" (*ecx),
44 "=d" (*edx)
45 : "0" (op));
46 }
47
48 #elif defined(_MSC_VER)
49
50 #include <intrin.h>
51
52 void freebl_cpuid(unsigned long op, unsigned long *eax,
53 unsigned long *ebx, unsigned long *ecx,
54 unsigned long *edx)
55 {
56 int intrinsic_out[4];
57
58 __cpuid(intrinsic_out, op);
59 *eax = intrinsic_out[0];
60 *ebx = intrinsic_out[1];
61 *ecx = intrinsic_out[2];
62 *edx = intrinsic_out[3];
63 }
64
65 #endif
66
67 #else /* !defined(AMD_64) */
68
69 /* x86 */
70
71 #if defined(__GNUC__)
72 void freebl_cpuid(unsigned long op, unsigned long *eax,
73 unsigned long *ebx, unsigned long *ecx,
74 unsigned long *edx)
75 {
76 /* sigh GCC isn't smart enough to save the ebx PIC register on it's own
77 * in this case, so do it by hand. Use edi to store ebx and pass the
78 * value returned in ebx from cpuid through edi. */
79 __asm__("mov %%ebx,%%edi\n\t"
80 "cpuid\n\t"
81 "xchgl %%ebx,%%edi\n\t"
82 : "=a" (*eax),
83 "=D" (*ebx),
84 "=c" (*ecx),
85 "=d" (*edx)
86 : "0" (op));
87 }
88
89 /*
90 * try flipping a processor flag to determine CPU type
91 */
92 static unsigned long changeFlag(unsigned long flag)
93 {
94 unsigned long changedFlags, originalFlags;
95 __asm__("pushfl\n\t" /* get the flags */
96 "popl %0\n\t"
97 "movl %0,%1\n\t" /* save the original flags */
98 "xorl %2,%0\n\t" /* flip the bit */
99 "pushl %0\n\t" /* set the flags */
100 "popfl\n\t"
101 "pushfl\n\t" /* get the flags again (for return) */
102 "popl %0\n\t"
103 "pushl %1\n\t" /* restore the original flags */
104 "popfl\n\t"
105 : "=r" (changedFlags),
106 "=r" (originalFlags),
107 "=r" (flag)
108 : "2" (flag));
109 return changedFlags ^ originalFlags;
110 }
111
112 #elif defined(_MSC_VER)
113
114 /*
115 * windows versions of the above assembler
116 */
117 #define wcpuid __asm __emit 0fh __asm __emit 0a2h
118 void freebl_cpuid(unsigned long op, unsigned long *Reax,
119 unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
120 {
121 unsigned long Leax, Lebx, Lecx, Ledx;
122 __asm {
123 pushad
124 mov eax,op
125 wcpuid
126 mov Leax,eax
127 mov Lebx,ebx
128 mov Lecx,ecx
129 mov Ledx,edx
130 popad
131 }
132 *Reax = Leax;
133 *Rebx = Lebx;
134 *Recx = Lecx;
135 *Redx = Ledx;
136 }
137
138 static unsigned long changeFlag(unsigned long flag)
139 {
140 unsigned long changedFlags, originalFlags;
141 __asm {
142 push eax
143 push ebx
144 pushfd /* get the flags */
145 pop eax
146 push eax /* save the flags on the stack */
147 mov originalFlags,eax /* save the original flags */
148 mov ebx,flag
149 xor eax,ebx /* flip the bit */
150 push eax /* set the flags */
151 popfd
152 pushfd /* get the flags again (for return) */
153 pop eax
154 popfd /* restore the original flags */
155 mov changedFlags,eax
156 pop ebx
157 pop eax
158 }
159 return changedFlags ^ originalFlags;
160 }
161 #endif
162
163 #endif
164
165 #if !defined(AMD_64)
166 #define AC_FLAG 0x40000
167 #define ID_FLAG 0x200000
168
169 /* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
170 static int is386()
171 {
172 return changeFlag(AC_FLAG) == 0;
173 }
174
175 /* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
176 static int is486()
177 {
178 return changeFlag(ID_FLAG) == 0;
179 }
180 #endif
181
182
183 /*
184 * table for Intel Cache.
185 * See Intel Application Note AP-485 for more information
186 */
187
188 typedef unsigned char CacheTypeEntry;
189
190 typedef enum {
191 Cache_NONE = 0,
192 Cache_UNKNOWN = 1,
193 Cache_TLB = 2,
194 Cache_TLBi = 3,
195 Cache_TLBd = 4,
196 Cache_Trace = 5,
197 Cache_L1 = 6,
198 Cache_L1i = 7,
199 Cache_L1d = 8,
200 Cache_L2 = 9 ,
201 Cache_L2i = 10 ,
202 Cache_L2d = 11 ,
203 Cache_L3 = 12 ,
204 Cache_L3i = 13,
205 Cache_L3d = 14
206 } CacheType;
207
208 struct _cache {
209 CacheTypeEntry type;
210 unsigned char lineSize;
211 };
212 static const struct _cache CacheMap[256] = {
213 /* 00 */ {Cache_NONE, 0 },
214 /* 01 */ {Cache_TLBi, 0 },
215 /* 02 */ {Cache_TLBi, 0 },
216 /* 03 */ {Cache_TLBd, 0 },
217 /* 04 */ {Cache_TLBd, },
218 /* 05 */ {Cache_UNKNOWN, 0 },
219 /* 06 */ {Cache_L1i, 32 },
220 /* 07 */ {Cache_UNKNOWN, 0 },
221 /* 08 */ {Cache_L1i, 32 },
222 /* 09 */ {Cache_UNKNOWN, 0 },
223 /* 0a */ {Cache_L1d, 32 },
224 /* 0b */ {Cache_UNKNOWN, 0 },
225 /* 0c */ {Cache_L1d, 32 },
226 /* 0d */ {Cache_UNKNOWN, 0 },
227 /* 0e */ {Cache_UNKNOWN, 0 },
228 /* 0f */ {Cache_UNKNOWN, 0 },
229 /* 10 */ {Cache_UNKNOWN, 0 },
230 /* 11 */ {Cache_UNKNOWN, 0 },
231 /* 12 */ {Cache_UNKNOWN, 0 },
232 /* 13 */ {Cache_UNKNOWN, 0 },
233 /* 14 */ {Cache_UNKNOWN, 0 },
234 /* 15 */ {Cache_UNKNOWN, 0 },
235 /* 16 */ {Cache_UNKNOWN, 0 },
236 /* 17 */ {Cache_UNKNOWN, 0 },
237 /* 18 */ {Cache_UNKNOWN, 0 },
238 /* 19 */ {Cache_UNKNOWN, 0 },
239 /* 1a */ {Cache_UNKNOWN, 0 },
240 /* 1b */ {Cache_UNKNOWN, 0 },
241 /* 1c */ {Cache_UNKNOWN, 0 },
242 /* 1d */ {Cache_UNKNOWN, 0 },
243 /* 1e */ {Cache_UNKNOWN, 0 },
244 /* 1f */ {Cache_UNKNOWN, 0 },
245 /* 20 */ {Cache_UNKNOWN, 0 },
246 /* 21 */ {Cache_UNKNOWN, 0 },
247 /* 22 */ {Cache_L3, 64 },
248 /* 23 */ {Cache_L3, 64 },
249 /* 24 */ {Cache_UNKNOWN, 0 },
250 /* 25 */ {Cache_L3, 64 },
251 /* 26 */ {Cache_UNKNOWN, 0 },
252 /* 27 */ {Cache_UNKNOWN, 0 },
253 /* 28 */ {Cache_UNKNOWN, 0 },
254 /* 29 */ {Cache_L3, 64 },
255 /* 2a */ {Cache_UNKNOWN, 0 },
256 /* 2b */ {Cache_UNKNOWN, 0 },
257 /* 2c */ {Cache_L1d, 64 },
258 /* 2d */ {Cache_UNKNOWN, 0 },
259 /* 2e */ {Cache_UNKNOWN, 0 },
260 /* 2f */ {Cache_UNKNOWN, 0 },
261 /* 30 */ {Cache_L1i, 64 },
262 /* 31 */ {Cache_UNKNOWN, 0 },
263 /* 32 */ {Cache_UNKNOWN, 0 },
264 /* 33 */ {Cache_UNKNOWN, 0 },
265 /* 34 */ {Cache_UNKNOWN, 0 },
266 /* 35 */ {Cache_UNKNOWN, 0 },
267 /* 36 */ {Cache_UNKNOWN, 0 },
268 /* 37 */ {Cache_UNKNOWN, 0 },
269 /* 38 */ {Cache_UNKNOWN, 0 },
270 /* 39 */ {Cache_L2, 64 },
271 /* 3a */ {Cache_UNKNOWN, 0 },
272 /* 3b */ {Cache_L2, 64 },
273 /* 3c */ {Cache_L2, 64 },
274 /* 3d */ {Cache_UNKNOWN, 0 },
275 /* 3e */ {Cache_UNKNOWN, 0 },
276 /* 3f */ {Cache_UNKNOWN, 0 },
277 /* 40 */ {Cache_L2, 0 },
278 /* 41 */ {Cache_L2, 32 },
279 /* 42 */ {Cache_L2, 32 },
280 /* 43 */ {Cache_L2, 32 },
281 /* 44 */ {Cache_L2, 32 },
282 /* 45 */ {Cache_L2, 32 },
283 /* 46 */ {Cache_UNKNOWN, 0 },
284 /* 47 */ {Cache_UNKNOWN, 0 },
285 /* 48 */ {Cache_UNKNOWN, 0 },
286 /* 49 */ {Cache_UNKNOWN, 0 },
287 /* 4a */ {Cache_UNKNOWN, 0 },
288 /* 4b */ {Cache_UNKNOWN, 0 },
289 /* 4c */ {Cache_UNKNOWN, 0 },
290 /* 4d */ {Cache_UNKNOWN, 0 },
291 /* 4e */ {Cache_UNKNOWN, 0 },
292 /* 4f */ {Cache_UNKNOWN, 0 },
293 /* 50 */ {Cache_TLBi, 0 },
294 /* 51 */ {Cache_TLBi, 0 },
295 /* 52 */ {Cache_TLBi, 0 },
296 /* 53 */ {Cache_UNKNOWN, 0 },
297 /* 54 */ {Cache_UNKNOWN, 0 },
298 /* 55 */ {Cache_UNKNOWN, 0 },
299 /* 56 */ {Cache_UNKNOWN, 0 },
300 /* 57 */ {Cache_UNKNOWN, 0 },
301 /* 58 */ {Cache_UNKNOWN, 0 },
302 /* 59 */ {Cache_UNKNOWN, 0 },
303 /* 5a */ {Cache_UNKNOWN, 0 },
304 /* 5b */ {Cache_TLBd, 0 },
305 /* 5c */ {Cache_TLBd, 0 },
306 /* 5d */ {Cache_TLBd, 0 },
307 /* 5e */ {Cache_UNKNOWN, 0 },
308 /* 5f */ {Cache_UNKNOWN, 0 },
309 /* 60 */ {Cache_UNKNOWN, 0 },
310 /* 61 */ {Cache_UNKNOWN, 0 },
311 /* 62 */ {Cache_UNKNOWN, 0 },
312 /* 63 */ {Cache_UNKNOWN, 0 },
313 /* 64 */ {Cache_UNKNOWN, 0 },
314 /* 65 */ {Cache_UNKNOWN, 0 },
315 /* 66 */ {Cache_L1d, 64 },
316 /* 67 */ {Cache_L1d, 64 },
317 /* 68 */ {Cache_L1d, 64 },
318 /* 69 */ {Cache_UNKNOWN, 0 },
319 /* 6a */ {Cache_UNKNOWN, 0 },
320 /* 6b */ {Cache_UNKNOWN, 0 },
321 /* 6c */ {Cache_UNKNOWN, 0 },
322 /* 6d */ {Cache_UNKNOWN, 0 },
323 /* 6e */ {Cache_UNKNOWN, 0 },
324 /* 6f */ {Cache_UNKNOWN, 0 },
325 /* 70 */ {Cache_Trace, 1 },
326 /* 71 */ {Cache_Trace, 1 },
327 /* 72 */ {Cache_Trace, 1 },
328 /* 73 */ {Cache_UNKNOWN, 0 },
329 /* 74 */ {Cache_UNKNOWN, 0 },
330 /* 75 */ {Cache_UNKNOWN, 0 },
331 /* 76 */ {Cache_UNKNOWN, 0 },
332 /* 77 */ {Cache_UNKNOWN, 0 },
333 /* 78 */ {Cache_UNKNOWN, 0 },
334 /* 79 */ {Cache_L2, 64 },
335 /* 7a */ {Cache_L2, 64 },
336 /* 7b */ {Cache_L2, 64 },
337 /* 7c */ {Cache_L2, 64 },
338 /* 7d */ {Cache_UNKNOWN, 0 },
339 /* 7e */ {Cache_UNKNOWN, 0 },
340 /* 7f */ {Cache_UNKNOWN, 0 },
341 /* 80 */ {Cache_UNKNOWN, 0 },
342 /* 81 */ {Cache_UNKNOWN, 0 },
343 /* 82 */ {Cache_L2, 32 },
344 /* 83 */ {Cache_L2, 32 },
345 /* 84 */ {Cache_L2, 32 },
346 /* 85 */ {Cache_L2, 32 },
347 /* 86 */ {Cache_L2, 64 },
348 /* 87 */ {Cache_L2, 64 },
349 /* 88 */ {Cache_UNKNOWN, 0 },
350 /* 89 */ {Cache_UNKNOWN, 0 },
351 /* 8a */ {Cache_UNKNOWN, 0 },
352 /* 8b */ {Cache_UNKNOWN, 0 },
353 /* 8c */ {Cache_UNKNOWN, 0 },
354 /* 8d */ {Cache_UNKNOWN, 0 },
355 /* 8e */ {Cache_UNKNOWN, 0 },
356 /* 8f */ {Cache_UNKNOWN, 0 },
357 /* 90 */ {Cache_UNKNOWN, 0 },
358 /* 91 */ {Cache_UNKNOWN, 0 },
359 /* 92 */ {Cache_UNKNOWN, 0 },
360 /* 93 */ {Cache_UNKNOWN, 0 },
361 /* 94 */ {Cache_UNKNOWN, 0 },
362 /* 95 */ {Cache_UNKNOWN, 0 },
363 /* 96 */ {Cache_UNKNOWN, 0 },
364 /* 97 */ {Cache_UNKNOWN, 0 },
365 /* 98 */ {Cache_UNKNOWN, 0 },
366 /* 99 */ {Cache_UNKNOWN, 0 },
367 /* 9a */ {Cache_UNKNOWN, 0 },
368 /* 9b */ {Cache_UNKNOWN, 0 },
369 /* 9c */ {Cache_UNKNOWN, 0 },
370 /* 9d */ {Cache_UNKNOWN, 0 },
371 /* 9e */ {Cache_UNKNOWN, 0 },
372 /* 9f */ {Cache_UNKNOWN, 0 },
373 /* a0 */ {Cache_UNKNOWN, 0 },
374 /* a1 */ {Cache_UNKNOWN, 0 },
375 /* a2 */ {Cache_UNKNOWN, 0 },
376 /* a3 */ {Cache_UNKNOWN, 0 },
377 /* a4 */ {Cache_UNKNOWN, 0 },
378 /* a5 */ {Cache_UNKNOWN, 0 },
379 /* a6 */ {Cache_UNKNOWN, 0 },
380 /* a7 */ {Cache_UNKNOWN, 0 },
381 /* a8 */ {Cache_UNKNOWN, 0 },
382 /* a9 */ {Cache_UNKNOWN, 0 },
383 /* aa */ {Cache_UNKNOWN, 0 },
384 /* ab */ {Cache_UNKNOWN, 0 },
385 /* ac */ {Cache_UNKNOWN, 0 },
386 /* ad */ {Cache_UNKNOWN, 0 },
387 /* ae */ {Cache_UNKNOWN, 0 },
388 /* af */ {Cache_UNKNOWN, 0 },
389 /* b0 */ {Cache_TLBi, 0 },
390 /* b1 */ {Cache_UNKNOWN, 0 },
391 /* b2 */ {Cache_UNKNOWN, 0 },
392 /* b3 */ {Cache_TLBd, 0 },
393 /* b4 */ {Cache_UNKNOWN, 0 },
394 /* b5 */ {Cache_UNKNOWN, 0 },
395 /* b6 */ {Cache_UNKNOWN, 0 },
396 /* b7 */ {Cache_UNKNOWN, 0 },
397 /* b8 */ {Cache_UNKNOWN, 0 },
398 /* b9 */ {Cache_UNKNOWN, 0 },
399 /* ba */ {Cache_UNKNOWN, 0 },
400 /* bb */ {Cache_UNKNOWN, 0 },
401 /* bc */ {Cache_UNKNOWN, 0 },
402 /* bd */ {Cache_UNKNOWN, 0 },
403 /* be */ {Cache_UNKNOWN, 0 },
404 /* bf */ {Cache_UNKNOWN, 0 },
405 /* c0 */ {Cache_UNKNOWN, 0 },
406 /* c1 */ {Cache_UNKNOWN, 0 },
407 /* c2 */ {Cache_UNKNOWN, 0 },
408 /* c3 */ {Cache_UNKNOWN, 0 },
409 /* c4 */ {Cache_UNKNOWN, 0 },
410 /* c5 */ {Cache_UNKNOWN, 0 },
411 /* c6 */ {Cache_UNKNOWN, 0 },
412 /* c7 */ {Cache_UNKNOWN, 0 },
413 /* c8 */ {Cache_UNKNOWN, 0 },
414 /* c9 */ {Cache_UNKNOWN, 0 },
415 /* ca */ {Cache_UNKNOWN, 0 },
416 /* cb */ {Cache_UNKNOWN, 0 },
417 /* cc */ {Cache_UNKNOWN, 0 },
418 /* cd */ {Cache_UNKNOWN, 0 },
419 /* ce */ {Cache_UNKNOWN, 0 },
420 /* cf */ {Cache_UNKNOWN, 0 },
421 /* d0 */ {Cache_UNKNOWN, 0 },
422 /* d1 */ {Cache_UNKNOWN, 0 },
423 /* d2 */ {Cache_UNKNOWN, 0 },
424 /* d3 */ {Cache_UNKNOWN, 0 },
425 /* d4 */ {Cache_UNKNOWN, 0 },
426 /* d5 */ {Cache_UNKNOWN, 0 },
427 /* d6 */ {Cache_UNKNOWN, 0 },
428 /* d7 */ {Cache_UNKNOWN, 0 },
429 /* d8 */ {Cache_UNKNOWN, 0 },
430 /* d9 */ {Cache_UNKNOWN, 0 },
431 /* da */ {Cache_UNKNOWN, 0 },
432 /* db */ {Cache_UNKNOWN, 0 },
433 /* dc */ {Cache_UNKNOWN, 0 },
434 /* dd */ {Cache_UNKNOWN, 0 },
435 /* de */ {Cache_UNKNOWN, 0 },
436 /* df */ {Cache_UNKNOWN, 0 },
437 /* e0 */ {Cache_UNKNOWN, 0 },
438 /* e1 */ {Cache_UNKNOWN, 0 },
439 /* e2 */ {Cache_UNKNOWN, 0 },
440 /* e3 */ {Cache_UNKNOWN, 0 },
441 /* e4 */ {Cache_UNKNOWN, 0 },
442 /* e5 */ {Cache_UNKNOWN, 0 },
443 /* e6 */ {Cache_UNKNOWN, 0 },
444 /* e7 */ {Cache_UNKNOWN, 0 },
445 /* e8 */ {Cache_UNKNOWN, 0 },
446 /* e9 */ {Cache_UNKNOWN, 0 },
447 /* ea */ {Cache_UNKNOWN, 0 },
448 /* eb */ {Cache_UNKNOWN, 0 },
449 /* ec */ {Cache_UNKNOWN, 0 },
450 /* ed */ {Cache_UNKNOWN, 0 },
451 /* ee */ {Cache_UNKNOWN, 0 },
452 /* ef */ {Cache_UNKNOWN, 0 },
453 /* f0 */ {Cache_UNKNOWN, 0 },
454 /* f1 */ {Cache_UNKNOWN, 0 },
455 /* f2 */ {Cache_UNKNOWN, 0 },
456 /* f3 */ {Cache_UNKNOWN, 0 },
457 /* f4 */ {Cache_UNKNOWN, 0 },
458 /* f5 */ {Cache_UNKNOWN, 0 },
459 /* f6 */ {Cache_UNKNOWN, 0 },
460 /* f7 */ {Cache_UNKNOWN, 0 },
461 /* f8 */ {Cache_UNKNOWN, 0 },
462 /* f9 */ {Cache_UNKNOWN, 0 },
463 /* fa */ {Cache_UNKNOWN, 0 },
464 /* fb */ {Cache_UNKNOWN, 0 },
465 /* fc */ {Cache_UNKNOWN, 0 },
466 /* fd */ {Cache_UNKNOWN, 0 },
467 /* fe */ {Cache_UNKNOWN, 0 },
468 /* ff */ {Cache_UNKNOWN, 0 }
469 };
470
471
472 /*
473 * use the above table to determine the CacheEntryLineSize.
474 */
475 static void
476 getIntelCacheEntryLineSize(unsigned long val, int *level,
477 unsigned long *lineSize)
478 {
479 CacheType type;
480
481 type = CacheMap[val].type;
482 /* only interested in data caches */
483 /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
484 * this data check has the side effect of rejecting that entry. If
485 * that wasn't the case, we could have to reject it explicitly */
486 if (CacheMap[val].lineSize == 0) {
487 return;
488 }
489 /* look at the caches, skip types we aren't interested in.
490 * if we already have a value for a lower level cache, skip the
491 * current entry */
492 if ((type == Cache_L1)|| (type == Cache_L1d)) {
493 *level = 1;
494 *lineSize = CacheMap[val].lineSize;
495 } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
496 *level = 2;
497 *lineSize = CacheMap[val].lineSize;
498 } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
499 *level = 3;
500 *lineSize = CacheMap[val].lineSize;
501 }
502 return;
503 }
504
505
506 static void
507 getIntelRegisterCacheLineSize(unsigned long val,
508 int *level, unsigned long *lineSize)
509 {
510 getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
511 getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
512 getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
513 getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
514 }
515
516 /*
517 * returns '0' if no recognized cache is found, or if the cache
518 * information is supported by this processor
519 */
520 static unsigned long
521 getIntelCacheLineSize(int cpuidLevel)
522 {
523 int level = 4;
524 unsigned long lineSize = 0;
525 unsigned long eax, ebx, ecx, edx;
526 int repeat, count;
527
528 if (cpuidLevel < 2) {
529 return 0;
530 }
531
532 /* command '2' of the cpuid is intel's cache info call. Each byte of the
533 * 4 registers contain a potential descriptor for the cache. The CacheMap
534 * table maps the cache entry with the processor cache. Register 'al'
535 * contains a count value that cpuid '2' needs to be called in order to
536 * find all the cache descriptors. Only registers with the high bit set
537 * to 'zero' have valid descriptors. This code loops through all the
538 * required calls to cpuid '2' and passes any valid descriptors it finds
539 * to the getIntelRegisterCacheLineSize code, which breaks the registers
540 * down into their component descriptors. In the end the lineSize of the
541 * lowest level cache data cache is returned. */
542 freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
543 repeat = eax & 0xf;
544 for (count = 0; count < repeat; count++) {
545 if ((eax & 0x80000000) == 0) {
546 getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
547 }
548 if ((ebx & 0x80000000) == 0) {
549 getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
550 }
551 if ((ecx & 0x80000000) == 0) {
552 getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
553 }
554 if ((edx & 0x80000000) == 0) {
555 getIntelRegisterCacheLineSize(edx, &level, &lineSize);
556 }
557 if (count+1 != repeat) {
558 freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
559 }
560 }
561 return lineSize;
562 }
563
564 /*
565 * returns '0' if the cache info is not supported by this processor.
566 * This is based on the AMD extended cache commands for cpuid.
567 * (see "AMD Processor Recognition Application Note" Publication 20734).
568 * Some other processors use the identical scheme.
569 * (see "Processor Recognition, Transmeta Corporation").
570 */
571 static unsigned long
572 getOtherCacheLineSize(unsigned long cpuidLevel)
573 {
574 unsigned long lineSize = 0;
575 unsigned long eax, ebx, ecx, edx;
576
577 /* get the Extended CPUID level */
578 freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
579 cpuidLevel = eax;
580
581 if (cpuidLevel >= 0x80000005) {
582 freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
583 lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
584 }
585 return lineSize;
586 }
587
588 static const char * const manMap[] = {
589 #define INTEL 0
590 "GenuineIntel",
591 #define AMD 1
592 "AuthenticAMD",
593 #define CYRIX 2
594 "CyrixInstead",
595 #define CENTAUR 2
596 "CentaurHauls",
597 #define NEXGEN 3
598 "NexGenDriven",
599 #define TRANSMETA 4
600 "GenuineTMx86",
601 #define RISE 5
602 "RiseRiseRise",
603 #define UMC 6
604 "UMC UMC UMC ",
605 #define SIS 7
606 "Sis Sis Sis ",
607 #define NATIONAL 8
608 "Geode by NSC",
609 };
610
611 static const int n_manufacturers = sizeof(manMap)/sizeof(manMap[0]);
612
613
614 #define MAN_UNKNOWN 9
615
616 #if !defined(AMD_64)
617 #define SSE2_FLAG (1<<26)
618 unsigned long
619 s_mpi_is_sse2()
620 {
621 unsigned long eax, ebx, ecx, edx;
622 int manufacturer = MAN_UNKNOWN;
623 int i;
624 char string[13];
625
626 if (is386() || is486()) {
627 return 0;
628 }
629 freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
630 /* string holds the CPU's manufacturer ID string - a twelve
631 * character ASCII string stored in ebx, edx, ecx, and
632 * the 32-bit extended feature flags are in edx, ecx.
633 */
634 *(int *)string = ebx;
635 *(int *)&string[4] = (int)edx;
636 *(int *)&string[8] = (int)ecx;
637 string[12] = 0;
638
639 /* has no SSE2 extensions */
640 if (eax == 0) {
641 return 0;
642 }
643
644 for (i=0; i < n_manufacturers; i++) {
645 if ( strcmp(manMap[i],string) == 0) {
646 manufacturer = i;
647 break;
648 }
649 }
650
651 freebl_cpuid(1,&eax,&ebx,&ecx,&edx);
652 return (edx & SSE2_FLAG) == SSE2_FLAG;
653 }
654 #endif
655
656 unsigned long
657 s_mpi_getProcessorLineSize()
658 {
659 unsigned long eax, ebx, ecx, edx;
660 unsigned long cpuidLevel;
661 unsigned long cacheLineSize = 0;
662 int manufacturer = MAN_UNKNOWN;
663 int i;
664 char string[65];
665
666 #if !defined(AMD_64)
667 if (is386()) {
668 return 0; /* 386 had no cache */
669 } if (is486()) {
670 return 32; /* really? need more info */
671 }
672 #endif
673
674 /* Pentium, cpuid command is available */
675 freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
676 cpuidLevel = eax;
677 /* string holds the CPU's manufacturer ID string - a twelve
678 * character ASCII string stored in ebx, edx, ecx, and
679 * the 32-bit extended feature flags are in edx, ecx.
680 */
681 *(int *)string = ebx;
682 *(int *)&string[4] = (int)edx;
683 *(int *)&string[8] = (int)ecx;
684 string[12] = 0;
685
686 manufacturer = MAN_UNKNOWN;
687 for (i=0; i < n_manufacturers; i++) {
688 if ( strcmp(manMap[i],string) == 0) {
689 manufacturer = i;
690 }
691 }
692
693 if (manufacturer == INTEL) {
694 cacheLineSize = getIntelCacheLineSize(cpuidLevel);
695 } else {
696 cacheLineSize = getOtherCacheLineSize(cpuidLevel);
697 }
698 /* doesn't support cache info based on cpuid. This means
699 * an old pentium class processor, which have cache lines of
700 * 32. If we learn differently, we can use a switch based on
701 * the Manufacturer id */
702 if (cacheLineSize == 0) {
703 cacheLineSize = 32;
704 }
705 return cacheLineSize;
706 }
707 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
708 #endif
709
710 #if defined(__ppc64__)
711 /*
712 * Sigh, The PPC has some really nice features to help us determine cache
713 * size, since it had lots of direct control functions to do so. The POWER
714 * processor even has an instruction to do this, but it was dropped in
715 * PowerPC. Unfortunately most of them are not available in user mode.
716 *
717 * The dcbz function would be a great way to determine cache line size except
718 * 1) it only works on write-back memory (it throws an exception otherwise),
719 * and 2) because so many mac programs 'knew' the processor cache size was
720 * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
721 * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
722 * these programs happy. dcbzl work if 64 bit instructions are supported.
723 * If you know 64 bit instructions are supported, and that stack is
724 * write-back, you can use this code.
725 */
726 #include "memory.h"
727
728 /* clear the cache line that contains 'array' */
729 static inline void dcbzl(char *array)
730 {
731 register char *a asm("r2") = array;
732 __asm__ __volatile__( "dcbzl %0,r0" : "=r" (a): "0"(a) );
733 }
734
735
736 #define PPC_DO_ALIGN(x,y) ((char *)\
737 ((((long long) (x))+((y)-1))&~((y)-1)))
738
739 #define PPC_MAX_LINE_SIZE 256
740 unsigned long
741 s_mpi_getProcessorLineSize()
742 {
743 char testArray[2*PPC_MAX_LINE_SIZE+1];
744 char *test;
745 int i;
746
747 /* align the array on a maximum line size boundary, so we
748 * know we are starting to clear from the first address */
749 test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
750 /* set all the values to 1's */
751 memset(test, 0xff, PPC_MAX_LINE_SIZE);
752 /* clear one cache block starting at 'test' */
753 dcbzl(test);
754
755 /* find the size of the cleared area, that's our block size */
756 for (i=PPC_MAX_LINE_SIZE; i != 0; i = i/2) {
757 if (test[i-1] == 0) {
758 return i;
759 }
760 }
761 return 0;
762 }
763
764 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
765 #endif
766
767
768 /*
769 * put other processor and platform specific cache code here
770 * return the smallest cache line size in bytes on the processor
771 * (usually the L1 cache). If the OS has a call, this would be
772 * a greate place to put it.
773 *
774 * If there is no cache, return 0;
775 *
776 * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
777 * below aren't compiled.
778 *
779 */
780
781
782 /* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or
783 * OS */
784 #if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
785
786 unsigned long
787 s_mpi_getProcessorLineSize()
788 {
789 return MPI_CACHE_LINE_SIZE;
790 }
791 #define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
792 #endif
793
794
795 /* If no way to get the processor cache line size has been defined, assume
796 * it's 32 bytes (most common value, does not significantly impact performance)
797 */
798 #ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
799 unsigned long
800 s_mpi_getProcessorLineSize()
801 {
802 return 32;
803 }
804 #endif
805
806 #ifdef TEST_IT
807 #include <stdio.h>
808
809 main()
810 {
811 printf("line size = %d\n", s_mpi_getProcessorLineSize());
812 }
813 #endif
This site is hosted by Intevation GmbH (Datenschutzerklärung und Impressum | Privacy Policy and Imprint)