Mercurial > trustbridge > nss-cmake-static
comparison nss/lib/freebl/intel-aes-x86-masm.asm @ 0:1e5118fa0cb1
This is NSS with a Cmake Buildsyste
To compile a static NSS library for Windows we've used the
Chromium-NSS fork and added a Cmake buildsystem to compile
it statically for Windows. See README.chromium for chromium
changes and README.trustbridge for our modifications.
author | Andre Heinecke <andre.heinecke@intevation.de> |
---|---|
date | Mon, 28 Jul 2014 10:47:06 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1e5118fa0cb1 |
---|---|
1 ; LICENSE: | |
2 ; This submission to NSS is to be made available under the terms of the | |
3 ; Mozilla Public License, v. 2.0. You can obtain one at http: | |
4 ; //mozilla.org/MPL/2.0/. | |
5 ;############################################################################### | |
6 ; Copyright(c) 2014, Intel Corp. | |
7 ; Developers and authors: | |
8 ; Shay Gueron and Vlad Krasnov | |
9 ; Intel Corporation, Israel Development Centre, Haifa, Israel | |
10 ; Please send feedback directly to crypto.feedback.alias@intel.com | |
11 | |
12 | |
13 .MODEL FLAT, C | |
14 .XMM | |
15 | |
16 .DATA | |
17 ALIGN 16 | |
18 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh | |
19 Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h | |
20 Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh | |
21 Lcon1 dd 1,1,1,1 | |
22 Lcon2 dd 1bh,1bh,1bh,1bh | |
23 | |
24 .CODE | |
25 | |
26 ctx textequ <ecx> | |
27 output textequ <edx> | |
28 input textequ <eax> | |
29 inputLen textequ <edi> | |
30 | |
31 | |
32 aes_rnd MACRO i | |
33 movdqu xmm7, [i*16 + ctx] | |
34 aesenc xmm0, xmm7 | |
35 aesenc xmm1, xmm7 | |
36 aesenc xmm2, xmm7 | |
37 aesenc xmm3, xmm7 | |
38 aesenc xmm4, xmm7 | |
39 aesenc xmm5, xmm7 | |
40 aesenc xmm6, xmm7 | |
41 ENDM | |
42 | |
43 aes_last_rnd MACRO i | |
44 movdqu xmm7, [i*16 + ctx] | |
45 aesenclast xmm0, xmm7 | |
46 aesenclast xmm1, xmm7 | |
47 aesenclast xmm2, xmm7 | |
48 aesenclast xmm3, xmm7 | |
49 aesenclast xmm4, xmm7 | |
50 aesenclast xmm5, xmm7 | |
51 aesenclast xmm6, xmm7 | |
52 ENDM | |
53 | |
54 aes_dec_rnd MACRO i | |
55 movdqu xmm7, [i*16 + ctx] | |
56 aesdec xmm0, xmm7 | |
57 aesdec xmm1, xmm7 | |
58 aesdec xmm2, xmm7 | |
59 aesdec xmm3, xmm7 | |
60 aesdec xmm4, xmm7 | |
61 aesdec xmm5, xmm7 | |
62 aesdec xmm6, xmm7 | |
63 ENDM | |
64 | |
65 aes_dec_last_rnd MACRO i | |
66 movdqu xmm7, [i*16 + ctx] | |
67 aesdeclast xmm0, xmm7 | |
68 aesdeclast xmm1, xmm7 | |
69 aesdeclast xmm2, xmm7 | |
70 aesdeclast xmm3, xmm7 | |
71 aesdeclast xmm4, xmm7 | |
72 aesdeclast xmm5, xmm7 | |
73 aesdeclast xmm6, xmm7 | |
74 ENDM | |
75 | |
76 | |
77 gen_aes_ecb_func MACRO enc, rnds | |
78 | |
79 LOCAL loop7 | |
80 LOCAL loop1 | |
81 LOCAL bail | |
82 | |
83 push inputLen | |
84 | |
85 mov ctx, [esp + 2*4 + 0*4] | |
86 mov output, [esp + 2*4 + 1*4] | |
87 mov input, [esp + 2*4 + 4*4] | |
88 mov inputLen, [esp + 2*4 + 5*4] | |
89 | |
90 lea ctx, [44+ctx] | |
91 | |
92 loop7: | |
93 cmp inputLen, 7*16 | |
94 jb loop1 | |
95 | |
96 movdqu xmm0, [0*16 + input] | |
97 movdqu xmm1, [1*16 + input] | |
98 movdqu xmm2, [2*16 + input] | |
99 movdqu xmm3, [3*16 + input] | |
100 movdqu xmm4, [4*16 + input] | |
101 movdqu xmm5, [5*16 + input] | |
102 movdqu xmm6, [6*16 + input] | |
103 | |
104 movdqu xmm7, [0*16 + ctx] | |
105 pxor xmm0, xmm7 | |
106 pxor xmm1, xmm7 | |
107 pxor xmm2, xmm7 | |
108 pxor xmm3, xmm7 | |
109 pxor xmm4, xmm7 | |
110 pxor xmm5, xmm7 | |
111 pxor xmm6, xmm7 | |
112 | |
113 IF enc eq 1 | |
114 rnd textequ <aes_rnd> | |
115 lastrnd textequ <aes_last_rnd> | |
116 aesinst textequ <aesenc> | |
117 aeslastinst textequ <aesenclast> | |
118 ELSE | |
119 rnd textequ <aes_dec_rnd> | |
120 lastrnd textequ <aes_dec_last_rnd> | |
121 aesinst textequ <aesdec> | |
122 aeslastinst textequ <aesdeclast> | |
123 ENDIF | |
124 | |
125 i = 1 | |
126 WHILE i LT rnds | |
127 rnd i | |
128 i = i+1 | |
129 ENDM | |
130 lastrnd rnds | |
131 | |
132 movdqu [0*16 + output], xmm0 | |
133 movdqu [1*16 + output], xmm1 | |
134 movdqu [2*16 + output], xmm2 | |
135 movdqu [3*16 + output], xmm3 | |
136 movdqu [4*16 + output], xmm4 | |
137 movdqu [5*16 + output], xmm5 | |
138 movdqu [6*16 + output], xmm6 | |
139 | |
140 lea input, [7*16 + input] | |
141 lea output, [7*16 + output] | |
142 sub inputLen, 7*16 | |
143 jmp loop7 | |
144 | |
145 loop1: | |
146 cmp inputLen, 1*16 | |
147 jb bail | |
148 | |
149 movdqu xmm0, [input] | |
150 movdqu xmm7, [0*16 + ctx] | |
151 pxor xmm0, xmm7 | |
152 | |
153 i = 1 | |
154 WHILE i LT rnds | |
155 movdqu xmm7, [i*16 + ctx] | |
156 aesinst xmm0, xmm7 | |
157 i = i+1 | |
158 ENDM | |
159 movdqu xmm7, [rnds*16 + ctx] | |
160 aeslastinst xmm0, xmm7 | |
161 | |
162 movdqu [output], xmm0 | |
163 | |
164 lea input, [1*16 + input] | |
165 lea output, [1*16 + output] | |
166 sub inputLen, 1*16 | |
167 jmp loop1 | |
168 | |
169 bail: | |
170 xor eax, eax | |
171 pop inputLen | |
172 ret | |
173 | |
174 ENDM | |
175 | |
176 ALIGN 16 | |
177 intel_aes_encrypt_ecb_128 PROC | |
178 gen_aes_ecb_func 1, 10 | |
179 intel_aes_encrypt_ecb_128 ENDP | |
180 | |
181 ALIGN 16 | |
182 intel_aes_encrypt_ecb_192 PROC | |
183 gen_aes_ecb_func 1, 12 | |
184 intel_aes_encrypt_ecb_192 ENDP | |
185 | |
186 ALIGN 16 | |
187 intel_aes_encrypt_ecb_256 PROC | |
188 gen_aes_ecb_func 1, 14 | |
189 intel_aes_encrypt_ecb_256 ENDP | |
190 | |
191 ALIGN 16 | |
192 intel_aes_decrypt_ecb_128 PROC | |
193 gen_aes_ecb_func 0, 10 | |
194 intel_aes_decrypt_ecb_128 ENDP | |
195 | |
196 ALIGN 16 | |
197 intel_aes_decrypt_ecb_192 PROC | |
198 gen_aes_ecb_func 0, 12 | |
199 intel_aes_decrypt_ecb_192 ENDP | |
200 | |
201 ALIGN 16 | |
202 intel_aes_decrypt_ecb_256 PROC | |
203 gen_aes_ecb_func 0, 14 | |
204 intel_aes_decrypt_ecb_256 ENDP | |
205 | |
206 | |
207 KEY textequ <ecx> | |
208 KS textequ <edx> | |
209 ITR textequ <eax> | |
210 | |
211 ALIGN 16 | |
212 intel_aes_encrypt_init_128 PROC | |
213 | |
214 mov KEY, [esp + 1*4 + 0*4] | |
215 mov KS, [esp + 1*4 + 1*4] | |
216 | |
217 | |
218 movdqu xmm1, [KEY] | |
219 movdqu [KS], xmm1 | |
220 movdqa xmm2, xmm1 | |
221 | |
222 lea ITR, Lcon1 | |
223 movdqa xmm0, [ITR] | |
224 lea ITR, Lmask | |
225 movdqa xmm4, [ITR] | |
226 | |
227 mov ITR, 8 | |
228 | |
229 Lenc_128_ks_loop: | |
230 lea KS, [16 + KS] | |
231 dec ITR | |
232 | |
233 pshufb xmm2, xmm4 | |
234 aesenclast xmm2, xmm0 | |
235 pslld xmm0, 1 | |
236 movdqa xmm3, xmm1 | |
237 pslldq xmm3, 4 | |
238 pxor xmm1, xmm3 | |
239 pslldq xmm3, 4 | |
240 pxor xmm1, xmm3 | |
241 pslldq xmm3, 4 | |
242 pxor xmm1, xmm3 | |
243 pxor xmm1, xmm2 | |
244 movdqu [KS], xmm1 | |
245 movdqa xmm2, xmm1 | |
246 | |
247 jne Lenc_128_ks_loop | |
248 | |
249 lea ITR, Lcon2 | |
250 movdqa xmm0, [ITR] | |
251 | |
252 pshufb xmm2, xmm4 | |
253 aesenclast xmm2, xmm0 | |
254 pslld xmm0, 1 | |
255 movdqa xmm3, xmm1 | |
256 pslldq xmm3, 4 | |
257 pxor xmm1, xmm3 | |
258 pslldq xmm3, 4 | |
259 pxor xmm1, xmm3 | |
260 pslldq xmm3, 4 | |
261 pxor xmm1, xmm3 | |
262 pxor xmm1, xmm2 | |
263 movdqu [16 + KS], xmm1 | |
264 movdqa xmm2, xmm1 | |
265 | |
266 pshufb xmm2, xmm4 | |
267 aesenclast xmm2, xmm0 | |
268 movdqa xmm3, xmm1 | |
269 pslldq xmm3, 4 | |
270 pxor xmm1, xmm3 | |
271 pslldq xmm3, 4 | |
272 pxor xmm1, xmm3 | |
273 pslldq xmm3, 4 | |
274 pxor xmm1, xmm3 | |
275 pxor xmm1, xmm2 | |
276 movdqu [32 + KS], xmm1 | |
277 movdqa xmm2, xmm1 | |
278 | |
279 ret | |
280 intel_aes_encrypt_init_128 ENDP | |
281 | |
282 | |
283 ALIGN 16 | |
284 intel_aes_decrypt_init_128 PROC | |
285 | |
286 mov KEY, [esp + 1*4 + 0*4] | |
287 mov KS, [esp + 1*4 + 1*4] | |
288 | |
289 push KS | |
290 push KEY | |
291 | |
292 call intel_aes_encrypt_init_128 | |
293 | |
294 pop KEY | |
295 pop KS | |
296 | |
297 movdqu xmm0, [0*16 + KS] | |
298 movdqu xmm1, [10*16 + KS] | |
299 movdqu [10*16 + KS], xmm0 | |
300 movdqu [0*16 + KS], xmm1 | |
301 | |
302 i = 1 | |
303 WHILE i LT 5 | |
304 movdqu xmm0, [i*16 + KS] | |
305 movdqu xmm1, [(10-i)*16 + KS] | |
306 | |
307 aesimc xmm0, xmm0 | |
308 aesimc xmm1, xmm1 | |
309 | |
310 movdqu [(10-i)*16 + KS], xmm0 | |
311 movdqu [i*16 + KS], xmm1 | |
312 | |
313 i = i+1 | |
314 ENDM | |
315 | |
316 movdqu xmm0, [5*16 + KS] | |
317 aesimc xmm0, xmm0 | |
318 movdqu [5*16 + KS], xmm0 | |
319 ret | |
320 intel_aes_decrypt_init_128 ENDP | |
321 | |
322 | |
323 ALIGN 16 | |
324 intel_aes_encrypt_init_192 PROC | |
325 | |
326 mov KEY, [esp + 1*4 + 0*4] | |
327 mov KS, [esp + 1*4 + 1*4] | |
328 | |
329 pxor xmm3, xmm3 | |
330 movdqu xmm1, [KEY] | |
331 pinsrd xmm3, DWORD PTR [16 + KEY], 0 | |
332 pinsrd xmm3, DWORD PTR [20 + KEY], 1 | |
333 | |
334 movdqu [KS], xmm1 | |
335 movdqa xmm5, xmm3 | |
336 | |
337 lea ITR, Lcon1 | |
338 movdqu xmm0, [ITR] | |
339 lea ITR, Lmask192 | |
340 movdqu xmm4, [ITR] | |
341 | |
342 mov ITR, 4 | |
343 | |
344 Lenc_192_ks_loop: | |
345 movdqa xmm2, xmm3 | |
346 pshufb xmm2, xmm4 | |
347 aesenclast xmm2, xmm0 | |
348 pslld xmm0, 1 | |
349 | |
350 movdqa xmm6, xmm1 | |
351 movdqa xmm7, xmm3 | |
352 pslldq xmm6, 4 | |
353 pslldq xmm7, 4 | |
354 pxor xmm1, xmm6 | |
355 pxor xmm3, xmm7 | |
356 pslldq xmm6, 4 | |
357 pxor xmm1, xmm6 | |
358 pslldq xmm6, 4 | |
359 pxor xmm1, xmm6 | |
360 pxor xmm1, xmm2 | |
361 pshufd xmm2, xmm1, 0ffh | |
362 pxor xmm3, xmm2 | |
363 | |
364 movdqa xmm6, xmm1 | |
365 shufpd xmm5, xmm1, 00h | |
366 shufpd xmm6, xmm3, 01h | |
367 | |
368 movdqu [16 + KS], xmm5 | |
369 movdqu [32 + KS], xmm6 | |
370 | |
371 movdqa xmm2, xmm3 | |
372 pshufb xmm2, xmm4 | |
373 aesenclast xmm2, xmm0 | |
374 pslld xmm0, 1 | |
375 | |
376 movdqa xmm6, xmm1 | |
377 movdqa xmm7, xmm3 | |
378 pslldq xmm6, 4 | |
379 pslldq xmm7, 4 | |
380 pxor xmm1, xmm6 | |
381 pxor xmm3, xmm7 | |
382 pslldq xmm6, 4 | |
383 pxor xmm1, xmm6 | |
384 pslldq xmm6, 4 | |
385 pxor xmm1, xmm6 | |
386 pxor xmm1, xmm2 | |
387 pshufd xmm2, xmm1, 0ffh | |
388 pxor xmm3, xmm2 | |
389 | |
390 movdqu [48 + KS], xmm1 | |
391 movdqa xmm5, xmm3 | |
392 | |
393 lea KS, [48 + KS] | |
394 | |
395 dec ITR | |
396 jnz Lenc_192_ks_loop | |
397 | |
398 movdqu [16 + KS], xmm5 | |
399 ret | |
400 intel_aes_encrypt_init_192 ENDP | |
401 | |
402 ALIGN 16 | |
403 intel_aes_decrypt_init_192 PROC | |
404 mov KEY, [esp + 1*4 + 0*4] | |
405 mov KS, [esp + 1*4 + 1*4] | |
406 | |
407 push KS | |
408 push KEY | |
409 | |
410 call intel_aes_encrypt_init_192 | |
411 | |
412 pop KEY | |
413 pop KS | |
414 | |
415 movdqu xmm0, [0*16 + KS] | |
416 movdqu xmm1, [12*16 + KS] | |
417 movdqu [12*16 + KS], xmm0 | |
418 movdqu [0*16 + KS], xmm1 | |
419 | |
420 i = 1 | |
421 WHILE i LT 6 | |
422 movdqu xmm0, [i*16 + KS] | |
423 movdqu xmm1, [(12-i)*16 + KS] | |
424 | |
425 aesimc xmm0, xmm0 | |
426 aesimc xmm1, xmm1 | |
427 | |
428 movdqu [(12-i)*16 + KS], xmm0 | |
429 movdqu [i*16 + KS], xmm1 | |
430 | |
431 i = i+1 | |
432 ENDM | |
433 | |
434 movdqu xmm0, [6*16 + KS] | |
435 aesimc xmm0, xmm0 | |
436 movdqu [6*16 + KS], xmm0 | |
437 ret | |
438 intel_aes_decrypt_init_192 ENDP | |
439 | |
440 ALIGN 16 | |
441 intel_aes_encrypt_init_256 PROC | |
442 | |
443 mov KEY, [esp + 1*4 + 0*4] | |
444 mov KS, [esp + 1*4 + 1*4] | |
445 movdqu xmm1, [16*0 + KEY] | |
446 movdqu xmm3, [16*1 + KEY] | |
447 | |
448 movdqu [16*0 + KS], xmm1 | |
449 movdqu [16*1 + KS], xmm3 | |
450 | |
451 lea ITR, Lcon1 | |
452 movdqu xmm0, [ITR] | |
453 lea ITR, Lmask256 | |
454 movdqu xmm5, [ITR] | |
455 | |
456 pxor xmm6, xmm6 | |
457 | |
458 mov ITR, 6 | |
459 | |
460 Lenc_256_ks_loop: | |
461 | |
462 movdqa xmm2, xmm3 | |
463 pshufb xmm2, xmm5 | |
464 aesenclast xmm2, xmm0 | |
465 pslld xmm0, 1 | |
466 movdqa xmm4, xmm1 | |
467 pslldq xmm4, 4 | |
468 pxor xmm1, xmm4 | |
469 pslldq xmm4, 4 | |
470 pxor xmm1, xmm4 | |
471 pslldq xmm4, 4 | |
472 pxor xmm1, xmm4 | |
473 pxor xmm1, xmm2 | |
474 movdqu [16*2 + KS], xmm1 | |
475 | |
476 pshufd xmm2, xmm1, 0ffh | |
477 aesenclast xmm2, xmm6 | |
478 movdqa xmm4, xmm3 | |
479 pslldq xmm4, 4 | |
480 pxor xmm3, xmm4 | |
481 pslldq xmm4, 4 | |
482 pxor xmm3, xmm4 | |
483 pslldq xmm4, 4 | |
484 pxor xmm3, xmm4 | |
485 pxor xmm3, xmm2 | |
486 movdqu [16*3 + KS], xmm3 | |
487 | |
488 lea KS, [32 + KS] | |
489 dec ITR | |
490 jnz Lenc_256_ks_loop | |
491 | |
492 movdqa xmm2, xmm3 | |
493 pshufb xmm2, xmm5 | |
494 aesenclast xmm2, xmm0 | |
495 movdqa xmm4, xmm1 | |
496 pslldq xmm4, 4 | |
497 pxor xmm1, xmm4 | |
498 pslldq xmm4, 4 | |
499 pxor xmm1, xmm4 | |
500 pslldq xmm4, 4 | |
501 pxor xmm1, xmm4 | |
502 pxor xmm1, xmm2 | |
503 movdqu [16*2 + KS], xmm1 | |
504 | |
505 ret | |
506 intel_aes_encrypt_init_256 ENDP | |
507 | |
508 ALIGN 16 | |
509 intel_aes_decrypt_init_256 PROC | |
510 mov KEY, [esp + 1*4 + 0*4] | |
511 mov KS, [esp + 1*4 + 1*4] | |
512 | |
513 push KS | |
514 push KEY | |
515 | |
516 call intel_aes_encrypt_init_256 | |
517 | |
518 pop KEY | |
519 pop KS | |
520 | |
521 movdqu xmm0, [0*16 + KS] | |
522 movdqu xmm1, [14*16 + KS] | |
523 movdqu [14*16 + KS], xmm0 | |
524 movdqu [0*16 + KS], xmm1 | |
525 | |
526 i = 1 | |
527 WHILE i LT 7 | |
528 movdqu xmm0, [i*16 + KS] | |
529 movdqu xmm1, [(14-i)*16 + KS] | |
530 | |
531 aesimc xmm0, xmm0 | |
532 aesimc xmm1, xmm1 | |
533 | |
534 movdqu [(14-i)*16 + KS], xmm0 | |
535 movdqu [i*16 + KS], xmm1 | |
536 | |
537 i = i+1 | |
538 ENDM | |
539 | |
540 movdqu xmm0, [7*16 + KS] | |
541 aesimc xmm0, xmm0 | |
542 movdqu [7*16 + KS], xmm0 | |
543 ret | |
544 intel_aes_decrypt_init_256 ENDP | |
545 | |
546 | |
547 | |
548 gen_aes_cbc_enc_func MACRO rnds | |
549 | |
550 LOCAL loop1 | |
551 LOCAL bail | |
552 | |
553 push inputLen | |
554 | |
555 mov ctx, [esp + 2*4 + 0*4] | |
556 mov output, [esp + 2*4 + 1*4] | |
557 mov input, [esp + 2*4 + 4*4] | |
558 mov inputLen, [esp + 2*4 + 5*4] | |
559 | |
560 lea ctx, [44+ctx] | |
561 | |
562 movdqu xmm0, [-32+ctx] | |
563 | |
564 movdqu xmm2, [0*16 + ctx] | |
565 movdqu xmm3, [1*16 + ctx] | |
566 movdqu xmm4, [2*16 + ctx] | |
567 movdqu xmm5, [3*16 + ctx] | |
568 movdqu xmm6, [4*16 + ctx] | |
569 | |
570 loop1: | |
571 cmp inputLen, 1*16 | |
572 jb bail | |
573 | |
574 movdqu xmm1, [input] | |
575 pxor xmm1, xmm2 | |
576 pxor xmm0, xmm1 | |
577 | |
578 aesenc xmm0, xmm3 | |
579 aesenc xmm0, xmm4 | |
580 aesenc xmm0, xmm5 | |
581 aesenc xmm0, xmm6 | |
582 | |
583 i = 5 | |
584 WHILE i LT rnds | |
585 movdqu xmm7, [i*16 + ctx] | |
586 aesenc xmm0, xmm7 | |
587 i = i+1 | |
588 ENDM | |
589 movdqu xmm7, [rnds*16 + ctx] | |
590 aesenclast xmm0, xmm7 | |
591 | |
592 movdqu [output], xmm0 | |
593 | |
594 lea input, [1*16 + input] | |
595 lea output, [1*16 + output] | |
596 sub inputLen, 1*16 | |
597 jmp loop1 | |
598 | |
599 bail: | |
600 movdqu [-32+ctx], xmm0 | |
601 | |
602 xor eax, eax | |
603 pop inputLen | |
604 ret | |
605 | |
606 ENDM | |
607 | |
608 gen_aes_cbc_dec_func MACRO rnds | |
609 | |
610 LOCAL loop7 | |
611 LOCAL loop1 | |
612 LOCAL dec1 | |
613 LOCAL bail | |
614 | |
615 push inputLen | |
616 | |
617 mov ctx, [esp + 2*4 + 0*4] | |
618 mov output, [esp + 2*4 + 1*4] | |
619 mov input, [esp + 2*4 + 4*4] | |
620 mov inputLen, [esp + 2*4 + 5*4] | |
621 | |
622 lea ctx, [44+ctx] | |
623 | |
624 loop7: | |
625 cmp inputLen, 7*16 | |
626 jb dec1 | |
627 | |
628 movdqu xmm0, [0*16 + input] | |
629 movdqu xmm1, [1*16 + input] | |
630 movdqu xmm2, [2*16 + input] | |
631 movdqu xmm3, [3*16 + input] | |
632 movdqu xmm4, [4*16 + input] | |
633 movdqu xmm5, [5*16 + input] | |
634 movdqu xmm6, [6*16 + input] | |
635 | |
636 movdqu xmm7, [0*16 + ctx] | |
637 pxor xmm0, xmm7 | |
638 pxor xmm1, xmm7 | |
639 pxor xmm2, xmm7 | |
640 pxor xmm3, xmm7 | |
641 pxor xmm4, xmm7 | |
642 pxor xmm5, xmm7 | |
643 pxor xmm6, xmm7 | |
644 | |
645 i = 1 | |
646 WHILE i LT rnds | |
647 aes_dec_rnd i | |
648 i = i+1 | |
649 ENDM | |
650 aes_dec_last_rnd rnds | |
651 | |
652 movdqu xmm7, [-32 + ctx] | |
653 pxor xmm0, xmm7 | |
654 movdqu xmm7, [0*16 + input] | |
655 pxor xmm1, xmm7 | |
656 movdqu xmm7, [1*16 + input] | |
657 pxor xmm2, xmm7 | |
658 movdqu xmm7, [2*16 + input] | |
659 pxor xmm3, xmm7 | |
660 movdqu xmm7, [3*16 + input] | |
661 pxor xmm4, xmm7 | |
662 movdqu xmm7, [4*16 + input] | |
663 pxor xmm5, xmm7 | |
664 movdqu xmm7, [5*16 + input] | |
665 pxor xmm6, xmm7 | |
666 movdqu xmm7, [6*16 + input] | |
667 | |
668 movdqu [0*16 + output], xmm0 | |
669 movdqu [1*16 + output], xmm1 | |
670 movdqu [2*16 + output], xmm2 | |
671 movdqu [3*16 + output], xmm3 | |
672 movdqu [4*16 + output], xmm4 | |
673 movdqu [5*16 + output], xmm5 | |
674 movdqu [6*16 + output], xmm6 | |
675 movdqu [-32 + ctx], xmm7 | |
676 | |
677 lea input, [7*16 + input] | |
678 lea output, [7*16 + output] | |
679 sub inputLen, 7*16 | |
680 jmp loop7 | |
681 dec1: | |
682 | |
683 movdqu xmm3, [-32 + ctx] | |
684 | |
685 loop1: | |
686 cmp inputLen, 1*16 | |
687 jb bail | |
688 | |
689 movdqu xmm0, [input] | |
690 movdqa xmm4, xmm0 | |
691 movdqu xmm7, [0*16 + ctx] | |
692 pxor xmm0, xmm7 | |
693 | |
694 i = 1 | |
695 WHILE i LT rnds | |
696 movdqu xmm7, [i*16 + ctx] | |
697 aesdec xmm0, xmm7 | |
698 i = i+1 | |
699 ENDM | |
700 movdqu xmm7, [rnds*16 + ctx] | |
701 aesdeclast xmm0, xmm7 | |
702 pxor xmm3, xmm0 | |
703 | |
704 movdqu [output], xmm3 | |
705 movdqa xmm3, xmm4 | |
706 | |
707 lea input, [1*16 + input] | |
708 lea output, [1*16 + output] | |
709 sub inputLen, 1*16 | |
710 jmp loop1 | |
711 | |
712 bail: | |
713 movdqu [-32 + ctx], xmm3 | |
714 xor eax, eax | |
715 pop inputLen | |
716 ret | |
717 ENDM | |
718 | |
719 ALIGN 16 | |
720 intel_aes_encrypt_cbc_128 PROC | |
721 gen_aes_cbc_enc_func 10 | |
722 intel_aes_encrypt_cbc_128 ENDP | |
723 | |
724 ALIGN 16 | |
725 intel_aes_encrypt_cbc_192 PROC | |
726 gen_aes_cbc_enc_func 12 | |
727 intel_aes_encrypt_cbc_192 ENDP | |
728 | |
729 ALIGN 16 | |
730 intel_aes_encrypt_cbc_256 PROC | |
731 gen_aes_cbc_enc_func 14 | |
732 intel_aes_encrypt_cbc_256 ENDP | |
733 | |
734 ALIGN 16 | |
735 intel_aes_decrypt_cbc_128 PROC | |
736 gen_aes_cbc_dec_func 10 | |
737 intel_aes_decrypt_cbc_128 ENDP | |
738 | |
739 ALIGN 16 | |
740 intel_aes_decrypt_cbc_192 PROC | |
741 gen_aes_cbc_dec_func 12 | |
742 intel_aes_decrypt_cbc_192 ENDP | |
743 | |
744 ALIGN 16 | |
745 intel_aes_decrypt_cbc_256 PROC | |
746 gen_aes_cbc_dec_func 14 | |
747 intel_aes_decrypt_cbc_256 ENDP | |
748 | |
749 | |
750 | |
751 ctrCtx textequ <esi> | |
752 CTR textequ <ebx> | |
753 | |
754 gen_aes_ctr_func MACRO rnds | |
755 | |
756 LOCAL loop7 | |
757 LOCAL loop1 | |
758 LOCAL enc1 | |
759 LOCAL bail | |
760 | |
761 push inputLen | |
762 push ctrCtx | |
763 push CTR | |
764 push ebp | |
765 | |
766 mov ctrCtx, [esp + 4*5 + 0*4] | |
767 mov output, [esp + 4*5 + 1*4] | |
768 mov input, [esp + 4*5 + 4*4] | |
769 mov inputLen, [esp + 4*5 + 5*4] | |
770 | |
771 mov ctx, [4+ctrCtx] | |
772 lea ctx, [44+ctx] | |
773 | |
774 mov ebp, esp | |
775 sub esp, 7*16 | |
776 and esp, -16 | |
777 | |
778 movdqu xmm0, [8+ctrCtx] | |
779 mov ctrCtx, [ctrCtx + 8 + 3*4] | |
780 bswap ctrCtx | |
781 movdqu xmm1, [ctx + 0*16] | |
782 | |
783 pxor xmm0, xmm1 | |
784 | |
785 movdqa [esp + 0*16], xmm0 | |
786 movdqa [esp + 1*16], xmm0 | |
787 movdqa [esp + 2*16], xmm0 | |
788 movdqa [esp + 3*16], xmm0 | |
789 movdqa [esp + 4*16], xmm0 | |
790 movdqa [esp + 5*16], xmm0 | |
791 movdqa [esp + 6*16], xmm0 | |
792 | |
793 inc ctrCtx | |
794 mov CTR, ctrCtx | |
795 bswap CTR | |
796 xor CTR, [ctx + 3*4] | |
797 mov [esp + 1*16 + 3*4], CTR | |
798 | |
799 inc ctrCtx | |
800 mov CTR, ctrCtx | |
801 bswap CTR | |
802 xor CTR, [ctx + 3*4] | |
803 mov [esp + 2*16 + 3*4], CTR | |
804 | |
805 inc ctrCtx | |
806 mov CTR, ctrCtx | |
807 bswap CTR | |
808 xor CTR, [ctx + 3*4] | |
809 mov [esp + 3*16 + 3*4], CTR | |
810 | |
811 inc ctrCtx | |
812 mov CTR, ctrCtx | |
813 bswap CTR | |
814 xor CTR, [ctx + 3*4] | |
815 mov [esp + 4*16 + 3*4], CTR | |
816 | |
817 inc ctrCtx | |
818 mov CTR, ctrCtx | |
819 bswap CTR | |
820 xor CTR, [ctx + 3*4] | |
821 mov [esp + 5*16 + 3*4], CTR | |
822 | |
823 inc ctrCtx | |
824 mov CTR, ctrCtx | |
825 bswap CTR | |
826 xor CTR, [ctx + 3*4] | |
827 mov [esp + 6*16 + 3*4], CTR | |
828 | |
829 | |
830 loop7: | |
831 cmp inputLen, 7*16 | |
832 jb loop1 | |
833 | |
834 movdqu xmm0, [0*16 + esp] | |
835 movdqu xmm1, [1*16 + esp] | |
836 movdqu xmm2, [2*16 + esp] | |
837 movdqu xmm3, [3*16 + esp] | |
838 movdqu xmm4, [4*16 + esp] | |
839 movdqu xmm5, [5*16 + esp] | |
840 movdqu xmm6, [6*16 + esp] | |
841 | |
842 i = 1 | |
843 WHILE i LE 7 | |
844 aes_rnd i | |
845 | |
846 inc ctrCtx | |
847 mov CTR, ctrCtx | |
848 bswap CTR | |
849 xor CTR, [ctx + 3*4] | |
850 mov [esp + (i-1)*16 + 3*4], CTR | |
851 | |
852 i = i+1 | |
853 ENDM | |
854 WHILE i LT rnds | |
855 aes_rnd i | |
856 i = i+1 | |
857 ENDM | |
858 aes_last_rnd rnds | |
859 | |
860 movdqu xmm7, [0*16 + input] | |
861 pxor xmm0, xmm7 | |
862 movdqu xmm7, [1*16 + input] | |
863 pxor xmm1, xmm7 | |
864 movdqu xmm7, [2*16 + input] | |
865 pxor xmm2, xmm7 | |
866 movdqu xmm7, [3*16 + input] | |
867 pxor xmm3, xmm7 | |
868 movdqu xmm7, [4*16 + input] | |
869 pxor xmm4, xmm7 | |
870 movdqu xmm7, [5*16 + input] | |
871 pxor xmm5, xmm7 | |
872 movdqu xmm7, [6*16 + input] | |
873 pxor xmm6, xmm7 | |
874 | |
875 movdqu [0*16 + output], xmm0 | |
876 movdqu [1*16 + output], xmm1 | |
877 movdqu [2*16 + output], xmm2 | |
878 movdqu [3*16 + output], xmm3 | |
879 movdqu [4*16 + output], xmm4 | |
880 movdqu [5*16 + output], xmm5 | |
881 movdqu [6*16 + output], xmm6 | |
882 | |
883 lea input, [7*16 + input] | |
884 lea output, [7*16 + output] | |
885 sub inputLen, 7*16 | |
886 jmp loop7 | |
887 | |
888 | |
889 loop1: | |
890 cmp inputLen, 1*16 | |
891 jb bail | |
892 | |
893 movdqu xmm0, [esp] | |
894 add esp, 16 | |
895 | |
896 i = 1 | |
897 WHILE i LT rnds | |
898 movdqu xmm7, [i*16 + ctx] | |
899 aesenc xmm0, xmm7 | |
900 i = i+1 | |
901 ENDM | |
902 movdqu xmm7, [rnds*16 + ctx] | |
903 aesenclast xmm0, xmm7 | |
904 | |
905 movdqu xmm7, [input] | |
906 pxor xmm0, xmm7 | |
907 movdqu [output], xmm0 | |
908 | |
909 lea input, [1*16 + input] | |
910 lea output, [1*16 + output] | |
911 sub inputLen, 1*16 | |
912 jmp loop1 | |
913 | |
914 bail: | |
915 | |
916 mov ctrCtx, [ebp + 4*5 + 0*4] | |
917 movdqu xmm0, [esp] | |
918 movdqu xmm1, [ctx + 0*16] | |
919 pxor xmm0, xmm1 | |
920 movdqu [8+ctrCtx], xmm0 | |
921 | |
922 | |
923 xor eax, eax | |
924 mov esp, ebp | |
925 pop ebp | |
926 pop CTR | |
927 pop ctrCtx | |
928 pop inputLen | |
929 ret | |
930 ENDM | |
931 | |
932 | |
933 ALIGN 16 | |
934 intel_aes_encrypt_ctr_128 PROC | |
935 gen_aes_ctr_func 10 | |
936 intel_aes_encrypt_ctr_128 ENDP | |
937 | |
938 ALIGN 16 | |
939 intel_aes_encrypt_ctr_192 PROC | |
940 gen_aes_ctr_func 12 | |
941 intel_aes_encrypt_ctr_192 ENDP | |
942 | |
943 ALIGN 16 | |
944 intel_aes_encrypt_ctr_256 PROC | |
945 gen_aes_ctr_func 14 | |
946 intel_aes_encrypt_ctr_256 ENDP | |
947 | |
948 | |
949 END |