comparison nss/lib/freebl/mpi/mpi_x86_asm.c @ 0:1e5118fa0cb1

This is NSS with a Cmake Buildsyste To compile a static NSS library for Windows we've used the Chromium-NSS fork and added a Cmake buildsystem to compile it statically for Windows. See README.chromium for chromium changes and README.trustbridge for our modifications.
author Andre Heinecke <andre.heinecke@intevation.de>
date Mon, 28 Jul 2014 10:47:06 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1e5118fa0cb1
1 /*
2 * mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions.
3 *
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7
8 #include "mpi-priv.h"
9
10 static int is_sse = -1;
11 extern unsigned long s_mpi_is_sse2();
12
13 /*
14 * ebp - 36: caller's esi
15 * ebp - 32: caller's edi
16 * ebp - 28:
17 * ebp - 24:
18 * ebp - 20:
19 * ebp - 16:
20 * ebp - 12:
21 * ebp - 8:
22 * ebp - 4:
23 * ebp + 0: caller's ebp
24 * ebp + 4: return address
25 * ebp + 8: a argument
26 * ebp + 12: a_len argument
27 * ebp + 16: b argument
28 * ebp + 20: c argument
29 * registers:
30 * eax:
31 * ebx: carry
32 * ecx: a_len
33 * edx:
34 * esi: a ptr
35 * edi: c ptr
36 */
37 __declspec(naked) void
38 s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
39 {
40 __asm {
41 mov eax, is_sse
42 cmp eax, 0
43 je s_mpv_mul_d_x86
44 jg s_mpv_mul_d_sse2
45 call s_mpi_is_sse2
46 mov is_sse, eax
47 cmp eax, 0
48 jg s_mpv_mul_d_sse2
49 s_mpv_mul_d_x86:
50 push ebp
51 mov ebp,esp
52 sub esp,28
53 push edi
54 push esi
55 push ebx
56 mov ebx,0 ; carry = 0
57 mov ecx,[ebp+12] ; ecx = a_len
58 mov edi,[ebp+20]
59 cmp ecx,0
60 je L_2 ; jmp if a_len == 0
61 mov esi,[ebp+8] ; esi = a
62 cld
63 L_1:
64 lodsd ; eax = [ds:esi]; esi += 4
65 mov edx,[ebp+16] ; edx = b
66 mul edx ; edx:eax = Phi:Plo = a_i * b
67
68 add eax,ebx ; add carry (ebx) to edx:eax
69 adc edx,0
70 mov ebx,edx ; high half of product becomes next carry
71
72 stosd ; [es:edi] = ax; edi += 4;
73 dec ecx ; --a_len
74 jnz L_1 ; jmp if a_len != 0
75 L_2:
76 mov [edi],ebx ; *c = carry
77 pop ebx
78 pop esi
79 pop edi
80 leave
81 ret
82 nop
83 s_mpv_mul_d_sse2:
84 push ebp
85 mov ebp, esp
86 push edi
87 push esi
88 psubq mm2, mm2 ; carry = 0
89 mov ecx, [ebp+12] ; ecx = a_len
90 movd mm1, [ebp+16] ; mm1 = b
91 mov edi, [ebp+20]
92 cmp ecx, 0
93 je L_6 ; jmp if a_len == 0
94 mov esi, [ebp+8] ; esi = a
95 cld
96 L_5:
97 movd mm0, [esi] ; mm0 = *a++
98 add esi, 4
99 pmuludq mm0, mm1 ; mm0 = b * *a++
100 paddq mm2, mm0 ; add the carry
101 movd [edi], mm2 ; store the 32bit result
102 add edi, 4
103 psrlq mm2, 32 ; save the carry
104 dec ecx ; --a_len
105 jnz L_5 ; jmp if a_len != 0
106 L_6:
107 movd [edi], mm2 ; *c = carry
108 emms
109 pop esi
110 pop edi
111 leave
112 ret
113 nop
114 }
115 }
116
117 /*
118 * ebp - 36: caller's esi
119 * ebp - 32: caller's edi
120 * ebp - 28:
121 * ebp - 24:
122 * ebp - 20:
123 * ebp - 16:
124 * ebp - 12:
125 * ebp - 8:
126 * ebp - 4:
127 * ebp + 0: caller's ebp
128 * ebp + 4: return address
129 * ebp + 8: a argument
130 * ebp + 12: a_len argument
131 * ebp + 16: b argument
132 * ebp + 20: c argument
133 * registers:
134 * eax:
135 * ebx: carry
136 * ecx: a_len
137 * edx:
138 * esi: a ptr
139 * edi: c ptr
140 */
141 __declspec(naked) void
142 s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
143 {
144 __asm {
145 mov eax, is_sse
146 cmp eax, 0
147 je s_mpv_mul_d_add_x86
148 jg s_mpv_mul_d_add_sse2
149 call s_mpi_is_sse2
150 mov is_sse, eax
151 cmp eax, 0
152 jg s_mpv_mul_d_add_sse2
153 s_mpv_mul_d_add_x86:
154 push ebp
155 mov ebp,esp
156 sub esp,28
157 push edi
158 push esi
159 push ebx
160 mov ebx,0 ; carry = 0
161 mov ecx,[ebp+12] ; ecx = a_len
162 mov edi,[ebp+20]
163 cmp ecx,0
164 je L_11 ; jmp if a_len == 0
165 mov esi,[ebp+8] ; esi = a
166 cld
167 L_10:
168 lodsd ; eax = [ds:esi]; esi += 4
169 mov edx,[ebp+16] ; edx = b
170 mul edx ; edx:eax = Phi:Plo = a_i * b
171
172 add eax,ebx ; add carry (ebx) to edx:eax
173 adc edx,0
174 mov ebx,[edi] ; add in current word from *c
175 add eax,ebx
176 adc edx,0
177 mov ebx,edx ; high half of product becomes next carry
178
179 stosd ; [es:edi] = ax; edi += 4;
180 dec ecx ; --a_len
181 jnz L_10 ; jmp if a_len != 0
182 L_11:
183 mov [edi],ebx ; *c = carry
184 pop ebx
185 pop esi
186 pop edi
187 leave
188 ret
189 nop
190 s_mpv_mul_d_add_sse2:
191 push ebp
192 mov ebp, esp
193 push edi
194 push esi
195 psubq mm2, mm2 ; carry = 0
196 mov ecx, [ebp+12] ; ecx = a_len
197 movd mm1, [ebp+16] ; mm1 = b
198 mov edi, [ebp+20]
199 cmp ecx, 0
200 je L_16 ; jmp if a_len == 0
201 mov esi, [ebp+8] ; esi = a
202 cld
203 L_15:
204 movd mm0, [esi] ; mm0 = *a++
205 add esi, 4
206 pmuludq mm0, mm1 ; mm0 = b * *a++
207 paddq mm2, mm0 ; add the carry
208 movd mm0, [edi]
209 paddq mm2, mm0 ; add the carry
210 movd [edi], mm2 ; store the 32bit result
211 add edi, 4
212 psrlq mm2, 32 ; save the carry
213 dec ecx ; --a_len
214 jnz L_15 ; jmp if a_len != 0
215 L_16:
216 movd [edi], mm2 ; *c = carry
217 emms
218 pop esi
219 pop edi
220 leave
221 ret
222 nop
223 }
224 }
225
226 /*
227 * ebp - 36: caller's esi
228 * ebp - 32: caller's edi
229 * ebp - 28:
230 * ebp - 24:
231 * ebp - 20:
232 * ebp - 16:
233 * ebp - 12:
234 * ebp - 8:
235 * ebp - 4:
236 * ebp + 0: caller's ebp
237 * ebp + 4: return address
238 * ebp + 8: a argument
239 * ebp + 12: a_len argument
240 * ebp + 16: b argument
241 * ebp + 20: c argument
242 * registers:
243 * eax:
244 * ebx: carry
245 * ecx: a_len
246 * edx:
247 * esi: a ptr
248 * edi: c ptr
249 */
250 __declspec(naked) void
251 s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
252 {
253 __asm {
254 mov eax, is_sse
255 cmp eax, 0
256 je s_mpv_mul_d_add_prop_x86
257 jg s_mpv_mul_d_add_prop_sse2
258 call s_mpi_is_sse2
259 mov is_sse, eax
260 cmp eax, 0
261 jg s_mpv_mul_d_add_prop_sse2
262 s_mpv_mul_d_add_prop_x86:
263 push ebp
264 mov ebp,esp
265 sub esp,28
266 push edi
267 push esi
268 push ebx
269 mov ebx,0 ; carry = 0
270 mov ecx,[ebp+12] ; ecx = a_len
271 mov edi,[ebp+20]
272 cmp ecx,0
273 je L_21 ; jmp if a_len == 0
274 cld
275 mov esi,[ebp+8] ; esi = a
276 L_20:
277 lodsd ; eax = [ds:esi]; esi += 4
278 mov edx,[ebp+16] ; edx = b
279 mul edx ; edx:eax = Phi:Plo = a_i * b
280
281 add eax,ebx ; add carry (ebx) to edx:eax
282 adc edx,0
283 mov ebx,[edi] ; add in current word from *c
284 add eax,ebx
285 adc edx,0
286 mov ebx,edx ; high half of product becomes next carry
287
288 stosd ; [es:edi] = ax; edi += 4;
289 dec ecx ; --a_len
290 jnz L_20 ; jmp if a_len != 0
291 L_21:
292 cmp ebx,0 ; is carry zero?
293 jz L_23
294 mov eax,[edi] ; add in current word from *c
295 add eax,ebx
296 stosd ; [es:edi] = ax; edi += 4;
297 jnc L_23
298 L_22:
299 mov eax,[edi] ; add in current word from *c
300 adc eax,0
301 stosd ; [es:edi] = ax; edi += 4;
302 jc L_22
303 L_23:
304 pop ebx
305 pop esi
306 pop edi
307 leave
308 ret
309 nop
310 s_mpv_mul_d_add_prop_sse2:
311 push ebp
312 mov ebp, esp
313 push edi
314 push esi
315 push ebx
316 psubq mm2, mm2 ; carry = 0
317 mov ecx, [ebp+12] ; ecx = a_len
318 movd mm1, [ebp+16] ; mm1 = b
319 mov edi, [ebp+20]
320 cmp ecx, 0
321 je L_26 ; jmp if a_len == 0
322 mov esi, [ebp+8] ; esi = a
323 cld
324 L_25:
325 movd mm0, [esi] ; mm0 = *a++
326 movd mm3, [edi] ; fetch the sum
327 add esi, 4
328 pmuludq mm0, mm1 ; mm0 = b * *a++
329 paddq mm2, mm0 ; add the carry
330 paddq mm2, mm3 ; add *c++
331 movd [edi], mm2 ; store the 32bit result
332 add edi, 4
333 psrlq mm2, 32 ; save the carry
334 dec ecx ; --a_len
335 jnz L_25 ; jmp if a_len != 0
336 L_26:
337 movd ebx, mm2
338 cmp ebx, 0 ; is carry zero?
339 jz L_28
340 mov eax, [edi]
341 add eax, ebx
342 stosd
343 jnc L_28
344 L_27:
345 mov eax, [edi] ; add in current word from *c
346 adc eax, 0
347 stosd ; [es:edi] = ax; edi += 4;
348 jc L_27
349 L_28:
350 emms
351 pop ebx
352 pop esi
353 pop edi
354 leave
355 ret
356 nop
357 }
358 }
359
360 /*
361 * ebp - 20: caller's esi
362 * ebp - 16: caller's edi
363 * ebp - 12:
364 * ebp - 8: carry
365 * ebp - 4: a_len local
366 * ebp + 0: caller's ebp
367 * ebp + 4: return address
368 * ebp + 8: pa argument
369 * ebp + 12: a_len argument
370 * ebp + 16: ps argument
371 * ebp + 20:
372 * registers:
373 * eax:
374 * ebx: carry
375 * ecx: a_len
376 * edx:
377 * esi: a ptr
378 * edi: c ptr
379 */
380 __declspec(naked) void
381 s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
382 {
383 __asm {
384 mov eax, is_sse
385 cmp eax, 0
386 je s_mpv_sqr_add_prop_x86
387 jg s_mpv_sqr_add_prop_sse2
388 call s_mpi_is_sse2
389 mov is_sse, eax
390 cmp eax, 0
391 jg s_mpv_sqr_add_prop_sse2
392 s_mpv_sqr_add_prop_x86:
393 push ebp
394 mov ebp,esp
395 sub esp,12
396 push edi
397 push esi
398 push ebx
399 mov ebx,0 ; carry = 0
400 mov ecx,[ebp+12] ; a_len
401 mov edi,[ebp+16] ; edi = ps
402 cmp ecx,0
403 je L_31 ; jump if a_len == 0
404 cld
405 mov esi,[ebp+8] ; esi = pa
406 L_30:
407 lodsd ; eax = [ds:si]; si += 4;
408 mul eax
409
410 add eax,ebx ; add "carry"
411 adc edx,0
412 mov ebx,[edi]
413 add eax,ebx ; add low word from result
414 mov ebx,[edi+4]
415 stosd ; [es:di] = eax; di += 4;
416 adc edx,ebx ; add high word from result
417 mov ebx,0
418 mov eax,edx
419 adc ebx,0
420 stosd ; [es:di] = eax; di += 4;
421 dec ecx ; --a_len
422 jnz L_30 ; jmp if a_len != 0
423 L_31:
424 cmp ebx,0 ; is carry zero?
425 jz L_34
426 mov eax,[edi] ; add in current word from *c
427 add eax,ebx
428 stosd ; [es:edi] = ax; edi += 4;
429 jnc L_34
430 L_32:
431 mov eax,[edi] ; add in current word from *c
432 adc eax,0
433 stosd ; [es:edi] = ax; edi += 4;
434 jc L_32
435 L_34:
436 pop ebx
437 pop esi
438 pop edi
439 leave
440 ret
441 nop
442 s_mpv_sqr_add_prop_sse2:
443 push ebp
444 mov ebp, esp
445 push edi
446 push esi
447 push ebx
448 psubq mm2, mm2 ; carry = 0
449 mov ecx, [ebp+12] ; ecx = a_len
450 mov edi, [ebp+16]
451 cmp ecx, 0
452 je L_36 ; jmp if a_len == 0
453 mov esi, [ebp+8] ; esi = a
454 cld
455 L_35:
456 movd mm0, [esi] ; mm0 = *a
457 movd mm3, [edi] ; fetch the sum
458 add esi, 4
459 pmuludq mm0, mm0 ; mm0 = sqr(a)
460 paddq mm2, mm0 ; add the carry
461 paddq mm2, mm3 ; add the low word
462 movd mm3, [edi+4]
463 movd [edi], mm2 ; store the 32bit result
464 psrlq mm2, 32
465 paddq mm2, mm3 ; add the high word
466 movd [edi+4], mm2 ; store the 32bit result
467 psrlq mm2, 32 ; save the carry.
468 add edi, 8
469 dec ecx ; --a_len
470 jnz L_35 ; jmp if a_len != 0
471 L_36:
472 movd ebx, mm2
473 cmp ebx, 0 ; is carry zero?
474 jz L_38
475 mov eax, [edi]
476 add eax, ebx
477 stosd
478 jnc L_38
479 L_37:
480 mov eax, [edi] ; add in current word from *c
481 adc eax, 0
482 stosd ; [es:edi] = ax; edi += 4;
483 jc L_37
484 L_38:
485 emms
486 pop ebx
487 pop esi
488 pop edi
489 leave
490 ret
491 nop
492 }
493 }
494
495 /*
496 * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
497 * so its high bit is 1. This code is from NSPR.
498 *
499 * Dump of assembler code for function s_mpv_div_2dx1d:
500 *
501 * esp + 0: Caller's ebx
502 * esp + 4: return address
503 * esp + 8: Nhi argument
504 * esp + 12: Nlo argument
505 * esp + 16: divisor argument
506 * esp + 20: qp argument
507 * esp + 24: rp argument
508 * registers:
509 * eax:
510 * ebx: carry
511 * ecx: a_len
512 * edx:
513 * esi: a ptr
514 * edi: c ptr
515 */
516 __declspec(naked) mp_err
517 s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
518 mp_digit *qp, mp_digit *rp)
519 {
520 __asm {
521 push ebx
522 mov edx,[esp+8]
523 mov eax,[esp+12]
524 mov ebx,[esp+16]
525 div ebx
526 mov ebx,[esp+20]
527 mov [ebx],eax
528 mov ebx,[esp+24]
529 mov [ebx],edx
530 xor eax,eax ; return zero
531 pop ebx
532 ret
533 nop
534 }
535 }
This site is hosted by Intevation GmbH (Datenschutzerklärung und Impressum | Privacy Policy and Imprint)