Mercurial > trustbridge > nss-cmake-static
comparison nss/lib/freebl/mpi/mpi_x86_asm.c @ 0:1e5118fa0cb1
This is NSS with a Cmake Buildsyste
To compile a static NSS library for Windows we've used the
Chromium-NSS fork and added a Cmake buildsystem to compile
it statically for Windows. See README.chromium for chromium
changes and README.trustbridge for our modifications.
author | Andre Heinecke <andre.heinecke@intevation.de> |
---|---|
date | Mon, 28 Jul 2014 10:47:06 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1e5118fa0cb1 |
---|---|
1 /* | |
2 * mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions. | |
3 * | |
4 * This Source Code Form is subject to the terms of the Mozilla Public | |
5 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | |
7 | |
8 #include "mpi-priv.h" | |
9 | |
10 static int is_sse = -1; | |
11 extern unsigned long s_mpi_is_sse2(); | |
12 | |
13 /* | |
14 * ebp - 36: caller's esi | |
15 * ebp - 32: caller's edi | |
16 * ebp - 28: | |
17 * ebp - 24: | |
18 * ebp - 20: | |
19 * ebp - 16: | |
20 * ebp - 12: | |
21 * ebp - 8: | |
22 * ebp - 4: | |
23 * ebp + 0: caller's ebp | |
24 * ebp + 4: return address | |
25 * ebp + 8: a argument | |
26 * ebp + 12: a_len argument | |
27 * ebp + 16: b argument | |
28 * ebp + 20: c argument | |
29 * registers: | |
30 * eax: | |
31 * ebx: carry | |
32 * ecx: a_len | |
33 * edx: | |
34 * esi: a ptr | |
35 * edi: c ptr | |
36 */ | |
37 __declspec(naked) void | |
38 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) | |
39 { | |
40 __asm { | |
41 mov eax, is_sse | |
42 cmp eax, 0 | |
43 je s_mpv_mul_d_x86 | |
44 jg s_mpv_mul_d_sse2 | |
45 call s_mpi_is_sse2 | |
46 mov is_sse, eax | |
47 cmp eax, 0 | |
48 jg s_mpv_mul_d_sse2 | |
49 s_mpv_mul_d_x86: | |
50 push ebp | |
51 mov ebp,esp | |
52 sub esp,28 | |
53 push edi | |
54 push esi | |
55 push ebx | |
56 mov ebx,0 ; carry = 0 | |
57 mov ecx,[ebp+12] ; ecx = a_len | |
58 mov edi,[ebp+20] | |
59 cmp ecx,0 | |
60 je L_2 ; jmp if a_len == 0 | |
61 mov esi,[ebp+8] ; esi = a | |
62 cld | |
63 L_1: | |
64 lodsd ; eax = [ds:esi]; esi += 4 | |
65 mov edx,[ebp+16] ; edx = b | |
66 mul edx ; edx:eax = Phi:Plo = a_i * b | |
67 | |
68 add eax,ebx ; add carry (ebx) to edx:eax | |
69 adc edx,0 | |
70 mov ebx,edx ; high half of product becomes next carry | |
71 | |
72 stosd ; [es:edi] = ax; edi += 4; | |
73 dec ecx ; --a_len | |
74 jnz L_1 ; jmp if a_len != 0 | |
75 L_2: | |
76 mov [edi],ebx ; *c = carry | |
77 pop ebx | |
78 pop esi | |
79 pop edi | |
80 leave | |
81 ret | |
82 nop | |
83 s_mpv_mul_d_sse2: | |
84 push ebp | |
85 mov ebp, esp | |
86 push edi | |
87 push esi | |
88 psubq mm2, mm2 ; carry = 0 | |
89 mov ecx, [ebp+12] ; ecx = a_len | |
90 movd mm1, [ebp+16] ; mm1 = b | |
91 mov edi, [ebp+20] | |
92 cmp ecx, 0 | |
93 je L_6 ; jmp if a_len == 0 | |
94 mov esi, [ebp+8] ; esi = a | |
95 cld | |
96 L_5: | |
97 movd mm0, [esi] ; mm0 = *a++ | |
98 add esi, 4 | |
99 pmuludq mm0, mm1 ; mm0 = b * *a++ | |
100 paddq mm2, mm0 ; add the carry | |
101 movd [edi], mm2 ; store the 32bit result | |
102 add edi, 4 | |
103 psrlq mm2, 32 ; save the carry | |
104 dec ecx ; --a_len | |
105 jnz L_5 ; jmp if a_len != 0 | |
106 L_6: | |
107 movd [edi], mm2 ; *c = carry | |
108 emms | |
109 pop esi | |
110 pop edi | |
111 leave | |
112 ret | |
113 nop | |
114 } | |
115 } | |
116 | |
117 /* | |
118 * ebp - 36: caller's esi | |
119 * ebp - 32: caller's edi | |
120 * ebp - 28: | |
121 * ebp - 24: | |
122 * ebp - 20: | |
123 * ebp - 16: | |
124 * ebp - 12: | |
125 * ebp - 8: | |
126 * ebp - 4: | |
127 * ebp + 0: caller's ebp | |
128 * ebp + 4: return address | |
129 * ebp + 8: a argument | |
130 * ebp + 12: a_len argument | |
131 * ebp + 16: b argument | |
132 * ebp + 20: c argument | |
133 * registers: | |
134 * eax: | |
135 * ebx: carry | |
136 * ecx: a_len | |
137 * edx: | |
138 * esi: a ptr | |
139 * edi: c ptr | |
140 */ | |
141 __declspec(naked) void | |
142 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) | |
143 { | |
144 __asm { | |
145 mov eax, is_sse | |
146 cmp eax, 0 | |
147 je s_mpv_mul_d_add_x86 | |
148 jg s_mpv_mul_d_add_sse2 | |
149 call s_mpi_is_sse2 | |
150 mov is_sse, eax | |
151 cmp eax, 0 | |
152 jg s_mpv_mul_d_add_sse2 | |
153 s_mpv_mul_d_add_x86: | |
154 push ebp | |
155 mov ebp,esp | |
156 sub esp,28 | |
157 push edi | |
158 push esi | |
159 push ebx | |
160 mov ebx,0 ; carry = 0 | |
161 mov ecx,[ebp+12] ; ecx = a_len | |
162 mov edi,[ebp+20] | |
163 cmp ecx,0 | |
164 je L_11 ; jmp if a_len == 0 | |
165 mov esi,[ebp+8] ; esi = a | |
166 cld | |
167 L_10: | |
168 lodsd ; eax = [ds:esi]; esi += 4 | |
169 mov edx,[ebp+16] ; edx = b | |
170 mul edx ; edx:eax = Phi:Plo = a_i * b | |
171 | |
172 add eax,ebx ; add carry (ebx) to edx:eax | |
173 adc edx,0 | |
174 mov ebx,[edi] ; add in current word from *c | |
175 add eax,ebx | |
176 adc edx,0 | |
177 mov ebx,edx ; high half of product becomes next carry | |
178 | |
179 stosd ; [es:edi] = ax; edi += 4; | |
180 dec ecx ; --a_len | |
181 jnz L_10 ; jmp if a_len != 0 | |
182 L_11: | |
183 mov [edi],ebx ; *c = carry | |
184 pop ebx | |
185 pop esi | |
186 pop edi | |
187 leave | |
188 ret | |
189 nop | |
190 s_mpv_mul_d_add_sse2: | |
191 push ebp | |
192 mov ebp, esp | |
193 push edi | |
194 push esi | |
195 psubq mm2, mm2 ; carry = 0 | |
196 mov ecx, [ebp+12] ; ecx = a_len | |
197 movd mm1, [ebp+16] ; mm1 = b | |
198 mov edi, [ebp+20] | |
199 cmp ecx, 0 | |
200 je L_16 ; jmp if a_len == 0 | |
201 mov esi, [ebp+8] ; esi = a | |
202 cld | |
203 L_15: | |
204 movd mm0, [esi] ; mm0 = *a++ | |
205 add esi, 4 | |
206 pmuludq mm0, mm1 ; mm0 = b * *a++ | |
207 paddq mm2, mm0 ; add the carry | |
208 movd mm0, [edi] | |
209 paddq mm2, mm0 ; add the carry | |
210 movd [edi], mm2 ; store the 32bit result | |
211 add edi, 4 | |
212 psrlq mm2, 32 ; save the carry | |
213 dec ecx ; --a_len | |
214 jnz L_15 ; jmp if a_len != 0 | |
215 L_16: | |
216 movd [edi], mm2 ; *c = carry | |
217 emms | |
218 pop esi | |
219 pop edi | |
220 leave | |
221 ret | |
222 nop | |
223 } | |
224 } | |
225 | |
226 /* | |
227 * ebp - 36: caller's esi | |
228 * ebp - 32: caller's edi | |
229 * ebp - 28: | |
230 * ebp - 24: | |
231 * ebp - 20: | |
232 * ebp - 16: | |
233 * ebp - 12: | |
234 * ebp - 8: | |
235 * ebp - 4: | |
236 * ebp + 0: caller's ebp | |
237 * ebp + 4: return address | |
238 * ebp + 8: a argument | |
239 * ebp + 12: a_len argument | |
240 * ebp + 16: b argument | |
241 * ebp + 20: c argument | |
242 * registers: | |
243 * eax: | |
244 * ebx: carry | |
245 * ecx: a_len | |
246 * edx: | |
247 * esi: a ptr | |
248 * edi: c ptr | |
249 */ | |
250 __declspec(naked) void | |
251 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) | |
252 { | |
253 __asm { | |
254 mov eax, is_sse | |
255 cmp eax, 0 | |
256 je s_mpv_mul_d_add_prop_x86 | |
257 jg s_mpv_mul_d_add_prop_sse2 | |
258 call s_mpi_is_sse2 | |
259 mov is_sse, eax | |
260 cmp eax, 0 | |
261 jg s_mpv_mul_d_add_prop_sse2 | |
262 s_mpv_mul_d_add_prop_x86: | |
263 push ebp | |
264 mov ebp,esp | |
265 sub esp,28 | |
266 push edi | |
267 push esi | |
268 push ebx | |
269 mov ebx,0 ; carry = 0 | |
270 mov ecx,[ebp+12] ; ecx = a_len | |
271 mov edi,[ebp+20] | |
272 cmp ecx,0 | |
273 je L_21 ; jmp if a_len == 0 | |
274 cld | |
275 mov esi,[ebp+8] ; esi = a | |
276 L_20: | |
277 lodsd ; eax = [ds:esi]; esi += 4 | |
278 mov edx,[ebp+16] ; edx = b | |
279 mul edx ; edx:eax = Phi:Plo = a_i * b | |
280 | |
281 add eax,ebx ; add carry (ebx) to edx:eax | |
282 adc edx,0 | |
283 mov ebx,[edi] ; add in current word from *c | |
284 add eax,ebx | |
285 adc edx,0 | |
286 mov ebx,edx ; high half of product becomes next carry | |
287 | |
288 stosd ; [es:edi] = ax; edi += 4; | |
289 dec ecx ; --a_len | |
290 jnz L_20 ; jmp if a_len != 0 | |
291 L_21: | |
292 cmp ebx,0 ; is carry zero? | |
293 jz L_23 | |
294 mov eax,[edi] ; add in current word from *c | |
295 add eax,ebx | |
296 stosd ; [es:edi] = ax; edi += 4; | |
297 jnc L_23 | |
298 L_22: | |
299 mov eax,[edi] ; add in current word from *c | |
300 adc eax,0 | |
301 stosd ; [es:edi] = ax; edi += 4; | |
302 jc L_22 | |
303 L_23: | |
304 pop ebx | |
305 pop esi | |
306 pop edi | |
307 leave | |
308 ret | |
309 nop | |
310 s_mpv_mul_d_add_prop_sse2: | |
311 push ebp | |
312 mov ebp, esp | |
313 push edi | |
314 push esi | |
315 push ebx | |
316 psubq mm2, mm2 ; carry = 0 | |
317 mov ecx, [ebp+12] ; ecx = a_len | |
318 movd mm1, [ebp+16] ; mm1 = b | |
319 mov edi, [ebp+20] | |
320 cmp ecx, 0 | |
321 je L_26 ; jmp if a_len == 0 | |
322 mov esi, [ebp+8] ; esi = a | |
323 cld | |
324 L_25: | |
325 movd mm0, [esi] ; mm0 = *a++ | |
326 movd mm3, [edi] ; fetch the sum | |
327 add esi, 4 | |
328 pmuludq mm0, mm1 ; mm0 = b * *a++ | |
329 paddq mm2, mm0 ; add the carry | |
330 paddq mm2, mm3 ; add *c++ | |
331 movd [edi], mm2 ; store the 32bit result | |
332 add edi, 4 | |
333 psrlq mm2, 32 ; save the carry | |
334 dec ecx ; --a_len | |
335 jnz L_25 ; jmp if a_len != 0 | |
336 L_26: | |
337 movd ebx, mm2 | |
338 cmp ebx, 0 ; is carry zero? | |
339 jz L_28 | |
340 mov eax, [edi] | |
341 add eax, ebx | |
342 stosd | |
343 jnc L_28 | |
344 L_27: | |
345 mov eax, [edi] ; add in current word from *c | |
346 adc eax, 0 | |
347 stosd ; [es:edi] = ax; edi += 4; | |
348 jc L_27 | |
349 L_28: | |
350 emms | |
351 pop ebx | |
352 pop esi | |
353 pop edi | |
354 leave | |
355 ret | |
356 nop | |
357 } | |
358 } | |
359 | |
360 /* | |
361 * ebp - 20: caller's esi | |
362 * ebp - 16: caller's edi | |
363 * ebp - 12: | |
364 * ebp - 8: carry | |
365 * ebp - 4: a_len local | |
366 * ebp + 0: caller's ebp | |
367 * ebp + 4: return address | |
368 * ebp + 8: pa argument | |
369 * ebp + 12: a_len argument | |
370 * ebp + 16: ps argument | |
371 * ebp + 20: | |
372 * registers: | |
373 * eax: | |
374 * ebx: carry | |
375 * ecx: a_len | |
376 * edx: | |
377 * esi: a ptr | |
378 * edi: c ptr | |
379 */ | |
380 __declspec(naked) void | |
381 s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs) | |
382 { | |
383 __asm { | |
384 mov eax, is_sse | |
385 cmp eax, 0 | |
386 je s_mpv_sqr_add_prop_x86 | |
387 jg s_mpv_sqr_add_prop_sse2 | |
388 call s_mpi_is_sse2 | |
389 mov is_sse, eax | |
390 cmp eax, 0 | |
391 jg s_mpv_sqr_add_prop_sse2 | |
392 s_mpv_sqr_add_prop_x86: | |
393 push ebp | |
394 mov ebp,esp | |
395 sub esp,12 | |
396 push edi | |
397 push esi | |
398 push ebx | |
399 mov ebx,0 ; carry = 0 | |
400 mov ecx,[ebp+12] ; a_len | |
401 mov edi,[ebp+16] ; edi = ps | |
402 cmp ecx,0 | |
403 je L_31 ; jump if a_len == 0 | |
404 cld | |
405 mov esi,[ebp+8] ; esi = pa | |
406 L_30: | |
407 lodsd ; eax = [ds:si]; si += 4; | |
408 mul eax | |
409 | |
410 add eax,ebx ; add "carry" | |
411 adc edx,0 | |
412 mov ebx,[edi] | |
413 add eax,ebx ; add low word from result | |
414 mov ebx,[edi+4] | |
415 stosd ; [es:di] = eax; di += 4; | |
416 adc edx,ebx ; add high word from result | |
417 mov ebx,0 | |
418 mov eax,edx | |
419 adc ebx,0 | |
420 stosd ; [es:di] = eax; di += 4; | |
421 dec ecx ; --a_len | |
422 jnz L_30 ; jmp if a_len != 0 | |
423 L_31: | |
424 cmp ebx,0 ; is carry zero? | |
425 jz L_34 | |
426 mov eax,[edi] ; add in current word from *c | |
427 add eax,ebx | |
428 stosd ; [es:edi] = ax; edi += 4; | |
429 jnc L_34 | |
430 L_32: | |
431 mov eax,[edi] ; add in current word from *c | |
432 adc eax,0 | |
433 stosd ; [es:edi] = ax; edi += 4; | |
434 jc L_32 | |
435 L_34: | |
436 pop ebx | |
437 pop esi | |
438 pop edi | |
439 leave | |
440 ret | |
441 nop | |
442 s_mpv_sqr_add_prop_sse2: | |
443 push ebp | |
444 mov ebp, esp | |
445 push edi | |
446 push esi | |
447 push ebx | |
448 psubq mm2, mm2 ; carry = 0 | |
449 mov ecx, [ebp+12] ; ecx = a_len | |
450 mov edi, [ebp+16] | |
451 cmp ecx, 0 | |
452 je L_36 ; jmp if a_len == 0 | |
453 mov esi, [ebp+8] ; esi = a | |
454 cld | |
455 L_35: | |
456 movd mm0, [esi] ; mm0 = *a | |
457 movd mm3, [edi] ; fetch the sum | |
458 add esi, 4 | |
459 pmuludq mm0, mm0 ; mm0 = sqr(a) | |
460 paddq mm2, mm0 ; add the carry | |
461 paddq mm2, mm3 ; add the low word | |
462 movd mm3, [edi+4] | |
463 movd [edi], mm2 ; store the 32bit result | |
464 psrlq mm2, 32 | |
465 paddq mm2, mm3 ; add the high word | |
466 movd [edi+4], mm2 ; store the 32bit result | |
467 psrlq mm2, 32 ; save the carry. | |
468 add edi, 8 | |
469 dec ecx ; --a_len | |
470 jnz L_35 ; jmp if a_len != 0 | |
471 L_36: | |
472 movd ebx, mm2 | |
473 cmp ebx, 0 ; is carry zero? | |
474 jz L_38 | |
475 mov eax, [edi] | |
476 add eax, ebx | |
477 stosd | |
478 jnc L_38 | |
479 L_37: | |
480 mov eax, [edi] ; add in current word from *c | |
481 adc eax, 0 | |
482 stosd ; [es:edi] = ax; edi += 4; | |
483 jc L_37 | |
484 L_38: | |
485 emms | |
486 pop ebx | |
487 pop esi | |
488 pop edi | |
489 leave | |
490 ret | |
491 nop | |
492 } | |
493 } | |
494 | |
495 /* | |
496 * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized | |
497 * so its high bit is 1. This code is from NSPR. | |
498 * | |
499 * Dump of assembler code for function s_mpv_div_2dx1d: | |
500 * | |
501 * esp + 0: Caller's ebx | |
502 * esp + 4: return address | |
503 * esp + 8: Nhi argument | |
504 * esp + 12: Nlo argument | |
505 * esp + 16: divisor argument | |
506 * esp + 20: qp argument | |
507 * esp + 24: rp argument | |
508 * registers: | |
509 * eax: | |
510 * ebx: carry | |
511 * ecx: a_len | |
512 * edx: | |
513 * esi: a ptr | |
514 * edi: c ptr | |
515 */ | |
516 __declspec(naked) mp_err | |
517 s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, | |
518 mp_digit *qp, mp_digit *rp) | |
519 { | |
520 __asm { | |
521 push ebx | |
522 mov edx,[esp+8] | |
523 mov eax,[esp+12] | |
524 mov ebx,[esp+16] | |
525 div ebx | |
526 mov ebx,[esp+20] | |
527 mov [ebx],eax | |
528 mov ebx,[esp+24] | |
529 mov [ebx],edx | |
530 xor eax,eax ; return zero | |
531 pop ebx | |
532 ret | |
533 nop | |
534 } | |
535 } |