Mercurial > trustbridge > nss-cmake-static
comparison nss/lib/freebl/intel-aes-x64-masm.asm @ 0:1e5118fa0cb1
This is NSS with a Cmake Buildsyste
To compile a static NSS library for Windows we've used the
Chromium-NSS fork and added a Cmake buildsystem to compile
it statically for Windows. See README.chromium for chromium
changes and README.trustbridge for our modifications.
author | Andre Heinecke <andre.heinecke@intevation.de> |
---|---|
date | Mon, 28 Jul 2014 10:47:06 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1e5118fa0cb1 |
---|---|
1 ; LICENSE: | |
2 ; This submission to NSS is to be made available under the terms of the | |
3 ; Mozilla Public License, v. 2.0. You can obtain one at http: | |
4 ; //mozilla.org/MPL/2.0/. | |
5 ;############################################################################### | |
6 ; Copyright(c) 2014, Intel Corp. | |
7 ; Developers and authors: | |
8 ; Shay Gueron and Vlad Krasnov | |
9 ; Intel Corporation, Israel Development Centre, Haifa, Israel | |
10 ; Please send feedback directly to crypto.feedback.alias@intel.com | |
11 | |
12 | |
13 .DATA | |
14 ALIGN 16 | |
15 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh | |
16 Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h | |
17 Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh | |
18 Lcon1 dd 1,1,1,1 | |
19 Lcon2 dd 1bh,1bh,1bh,1bh | |
20 | |
21 .CODE | |
22 | |
23 ctx textequ <rcx> | |
24 output textequ <rdx> | |
25 input textequ <r8> | |
26 inputLen textequ <r9d> | |
27 | |
28 | |
29 aes_rnd MACRO i | |
30 movdqu xmm8, [i*16 + ctx] | |
31 aesenc xmm0, xmm8 | |
32 aesenc xmm1, xmm8 | |
33 aesenc xmm2, xmm8 | |
34 aesenc xmm3, xmm8 | |
35 aesenc xmm4, xmm8 | |
36 aesenc xmm5, xmm8 | |
37 aesenc xmm6, xmm8 | |
38 aesenc xmm7, xmm8 | |
39 ENDM | |
40 | |
41 aes_last_rnd MACRO i | |
42 movdqu xmm8, [i*16 + ctx] | |
43 aesenclast xmm0, xmm8 | |
44 aesenclast xmm1, xmm8 | |
45 aesenclast xmm2, xmm8 | |
46 aesenclast xmm3, xmm8 | |
47 aesenclast xmm4, xmm8 | |
48 aesenclast xmm5, xmm8 | |
49 aesenclast xmm6, xmm8 | |
50 aesenclast xmm7, xmm8 | |
51 ENDM | |
52 | |
53 aes_dec_rnd MACRO i | |
54 movdqu xmm8, [i*16 + ctx] | |
55 aesdec xmm0, xmm8 | |
56 aesdec xmm1, xmm8 | |
57 aesdec xmm2, xmm8 | |
58 aesdec xmm3, xmm8 | |
59 aesdec xmm4, xmm8 | |
60 aesdec xmm5, xmm8 | |
61 aesdec xmm6, xmm8 | |
62 aesdec xmm7, xmm8 | |
63 ENDM | |
64 | |
65 aes_dec_last_rnd MACRO i | |
66 movdqu xmm8, [i*16 + ctx] | |
67 aesdeclast xmm0, xmm8 | |
68 aesdeclast xmm1, xmm8 | |
69 aesdeclast xmm2, xmm8 | |
70 aesdeclast xmm3, xmm8 | |
71 aesdeclast xmm4, xmm8 | |
72 aesdeclast xmm5, xmm8 | |
73 aesdeclast xmm6, xmm8 | |
74 aesdeclast xmm7, xmm8 | |
75 ENDM | |
76 | |
77 | |
78 gen_aes_ecb_func MACRO enc, rnds | |
79 | |
80 LOCAL loop8 | |
81 LOCAL loop1 | |
82 LOCAL bail | |
83 | |
84 xor inputLen, inputLen | |
85 mov input, [rsp + 1*8 + 8*4] | |
86 mov inputLen, [rsp + 1*8 + 8*5] | |
87 | |
88 sub rsp, 3*16 | |
89 | |
90 movdqu [rsp + 0*16], xmm6 | |
91 movdqu [rsp + 1*16], xmm7 | |
92 movdqu [rsp + 2*16], xmm8 | |
93 | |
94 lea ctx, [48+ctx] | |
95 | |
96 loop8: | |
97 cmp inputLen, 8*16 | |
98 jb loop1 | |
99 | |
100 movdqu xmm0, [0*16 + input] | |
101 movdqu xmm1, [1*16 + input] | |
102 movdqu xmm2, [2*16 + input] | |
103 movdqu xmm3, [3*16 + input] | |
104 movdqu xmm4, [4*16 + input] | |
105 movdqu xmm5, [5*16 + input] | |
106 movdqu xmm6, [6*16 + input] | |
107 movdqu xmm7, [7*16 + input] | |
108 | |
109 movdqu xmm8, [0*16 + ctx] | |
110 pxor xmm0, xmm8 | |
111 pxor xmm1, xmm8 | |
112 pxor xmm2, xmm8 | |
113 pxor xmm3, xmm8 | |
114 pxor xmm4, xmm8 | |
115 pxor xmm5, xmm8 | |
116 pxor xmm6, xmm8 | |
117 pxor xmm7, xmm8 | |
118 | |
119 IF enc eq 1 | |
120 rnd textequ <aes_rnd> | |
121 lastrnd textequ <aes_last_rnd> | |
122 aesinst textequ <aesenc> | |
123 aeslastinst textequ <aesenclast> | |
124 ELSE | |
125 rnd textequ <aes_dec_rnd> | |
126 lastrnd textequ <aes_dec_last_rnd> | |
127 aesinst textequ <aesdec> | |
128 aeslastinst textequ <aesdeclast> | |
129 ENDIF | |
130 | |
131 i = 1 | |
132 WHILE i LT rnds | |
133 rnd i | |
134 i = i+1 | |
135 ENDM | |
136 lastrnd rnds | |
137 | |
138 movdqu [0*16 + output], xmm0 | |
139 movdqu [1*16 + output], xmm1 | |
140 movdqu [2*16 + output], xmm2 | |
141 movdqu [3*16 + output], xmm3 | |
142 movdqu [4*16 + output], xmm4 | |
143 movdqu [5*16 + output], xmm5 | |
144 movdqu [6*16 + output], xmm6 | |
145 movdqu [7*16 + output], xmm7 | |
146 | |
147 lea input, [8*16 + input] | |
148 lea output, [8*16 + output] | |
149 sub inputLen, 8*16 | |
150 jmp loop8 | |
151 | |
152 loop1: | |
153 cmp inputLen, 1*16 | |
154 jb bail | |
155 | |
156 movdqu xmm0, [input] | |
157 movdqu xmm7, [0*16 + ctx] | |
158 pxor xmm0, xmm7 | |
159 | |
160 i = 1 | |
161 WHILE i LT rnds | |
162 movdqu xmm7, [i*16 + ctx] | |
163 aesinst xmm0, xmm7 | |
164 i = i+1 | |
165 ENDM | |
166 movdqu xmm7, [rnds*16 + ctx] | |
167 aeslastinst xmm0, xmm7 | |
168 | |
169 movdqu [output], xmm0 | |
170 | |
171 lea input, [1*16 + input] | |
172 lea output, [1*16 + output] | |
173 sub inputLen, 1*16 | |
174 jmp loop1 | |
175 | |
176 bail: | |
177 xor rax, rax | |
178 | |
179 movdqu xmm6, [rsp + 0*16] | |
180 movdqu xmm7, [rsp + 1*16] | |
181 movdqu xmm8, [rsp + 2*16] | |
182 add rsp, 3*16 | |
183 ret | |
184 ENDM | |
185 | |
186 intel_aes_encrypt_ecb_128 PROC | |
187 gen_aes_ecb_func 1, 10 | |
188 intel_aes_encrypt_ecb_128 ENDP | |
189 | |
190 intel_aes_encrypt_ecb_192 PROC | |
191 gen_aes_ecb_func 1, 12 | |
192 intel_aes_encrypt_ecb_192 ENDP | |
193 | |
194 intel_aes_encrypt_ecb_256 PROC | |
195 gen_aes_ecb_func 1, 14 | |
196 intel_aes_encrypt_ecb_256 ENDP | |
197 | |
198 intel_aes_decrypt_ecb_128 PROC | |
199 gen_aes_ecb_func 0, 10 | |
200 intel_aes_decrypt_ecb_128 ENDP | |
201 | |
202 intel_aes_decrypt_ecb_192 PROC | |
203 gen_aes_ecb_func 0, 12 | |
204 intel_aes_decrypt_ecb_192 ENDP | |
205 | |
206 intel_aes_decrypt_ecb_256 PROC | |
207 gen_aes_ecb_func 0, 14 | |
208 intel_aes_decrypt_ecb_256 ENDP | |
209 | |
210 | |
211 KEY textequ <rcx> | |
212 KS textequ <rdx> | |
213 ITR textequ <r8> | |
214 | |
215 intel_aes_encrypt_init_128 PROC | |
216 | |
217 movdqu xmm1, [KEY] | |
218 movdqu [KS], xmm1 | |
219 movdqa xmm2, xmm1 | |
220 | |
221 lea ITR, Lcon1 | |
222 movdqa xmm0, [ITR] | |
223 lea ITR, Lmask | |
224 movdqa xmm4, [ITR] | |
225 | |
226 mov ITR, 8 | |
227 | |
228 Lenc_128_ks_loop: | |
229 lea KS, [16 + KS] | |
230 dec ITR | |
231 | |
232 pshufb xmm2, xmm4 | |
233 aesenclast xmm2, xmm0 | |
234 pslld xmm0, 1 | |
235 movdqa xmm3, xmm1 | |
236 pslldq xmm3, 4 | |
237 pxor xmm1, xmm3 | |
238 pslldq xmm3, 4 | |
239 pxor xmm1, xmm3 | |
240 pslldq xmm3, 4 | |
241 pxor xmm1, xmm3 | |
242 pxor xmm1, xmm2 | |
243 movdqu [KS], xmm1 | |
244 movdqa xmm2, xmm1 | |
245 | |
246 jne Lenc_128_ks_loop | |
247 | |
248 lea ITR, Lcon2 | |
249 movdqa xmm0, [ITR] | |
250 | |
251 pshufb xmm2, xmm4 | |
252 aesenclast xmm2, xmm0 | |
253 pslld xmm0, 1 | |
254 movdqa xmm3, xmm1 | |
255 pslldq xmm3, 4 | |
256 pxor xmm1, xmm3 | |
257 pslldq xmm3, 4 | |
258 pxor xmm1, xmm3 | |
259 pslldq xmm3, 4 | |
260 pxor xmm1, xmm3 | |
261 pxor xmm1, xmm2 | |
262 movdqu [16 + KS], xmm1 | |
263 movdqa xmm2, xmm1 | |
264 | |
265 pshufb xmm2, xmm4 | |
266 aesenclast xmm2, xmm0 | |
267 movdqa xmm3, xmm1 | |
268 pslldq xmm3, 4 | |
269 pxor xmm1, xmm3 | |
270 pslldq xmm3, 4 | |
271 pxor xmm1, xmm3 | |
272 pslldq xmm3, 4 | |
273 pxor xmm1, xmm3 | |
274 pxor xmm1, xmm2 | |
275 movdqu [32 + KS], xmm1 | |
276 movdqa xmm2, xmm1 | |
277 | |
278 ret | |
279 intel_aes_encrypt_init_128 ENDP | |
280 | |
281 | |
282 intel_aes_decrypt_init_128 PROC | |
283 | |
284 push KS | |
285 push KEY | |
286 | |
287 call intel_aes_encrypt_init_128 | |
288 | |
289 pop KEY | |
290 pop KS | |
291 | |
292 movdqu xmm0, [0*16 + KS] | |
293 movdqu xmm1, [10*16 + KS] | |
294 movdqu [10*16 + KS], xmm0 | |
295 movdqu [0*16 + KS], xmm1 | |
296 | |
297 i = 1 | |
298 WHILE i LT 5 | |
299 movdqu xmm0, [i*16 + KS] | |
300 movdqu xmm1, [(10-i)*16 + KS] | |
301 | |
302 aesimc xmm0, xmm0 | |
303 aesimc xmm1, xmm1 | |
304 | |
305 movdqu [(10-i)*16 + KS], xmm0 | |
306 movdqu [i*16 + KS], xmm1 | |
307 | |
308 i = i+1 | |
309 ENDM | |
310 | |
311 movdqu xmm0, [5*16 + KS] | |
312 aesimc xmm0, xmm0 | |
313 movdqu [5*16 + KS], xmm0 | |
314 ret | |
315 intel_aes_decrypt_init_128 ENDP | |
316 | |
317 | |
318 intel_aes_encrypt_init_192 PROC | |
319 | |
320 sub rsp, 16*2 | |
321 movdqu [16*0 + rsp], xmm6 | |
322 movdqu [16*1 + rsp], xmm7 | |
323 | |
324 movdqu xmm1, [KEY] | |
325 mov ITR, [16 + KEY] | |
326 movd xmm3, ITR | |
327 | |
328 movdqu [KS], xmm1 | |
329 movdqa xmm5, xmm3 | |
330 | |
331 lea ITR, Lcon1 | |
332 movdqu xmm0, [ITR] | |
333 lea ITR, Lmask192 | |
334 movdqu xmm4, [ITR] | |
335 | |
336 mov ITR, 4 | |
337 | |
338 Lenc_192_ks_loop: | |
339 movdqa xmm2, xmm3 | |
340 pshufb xmm2, xmm4 | |
341 aesenclast xmm2, xmm0 | |
342 pslld xmm0, 1 | |
343 | |
344 movdqa xmm6, xmm1 | |
345 movdqa xmm7, xmm3 | |
346 pslldq xmm6, 4 | |
347 pslldq xmm7, 4 | |
348 pxor xmm1, xmm6 | |
349 pxor xmm3, xmm7 | |
350 pslldq xmm6, 4 | |
351 pxor xmm1, xmm6 | |
352 pslldq xmm6, 4 | |
353 pxor xmm1, xmm6 | |
354 pxor xmm1, xmm2 | |
355 pshufd xmm2, xmm1, 0ffh | |
356 pxor xmm3, xmm2 | |
357 | |
358 movdqa xmm6, xmm1 | |
359 shufpd xmm5, xmm1, 00h | |
360 shufpd xmm6, xmm3, 01h | |
361 | |
362 movdqu [16 + KS], xmm5 | |
363 movdqu [32 + KS], xmm6 | |
364 | |
365 movdqa xmm2, xmm3 | |
366 pshufb xmm2, xmm4 | |
367 aesenclast xmm2, xmm0 | |
368 pslld xmm0, 1 | |
369 | |
370 movdqa xmm6, xmm1 | |
371 movdqa xmm7, xmm3 | |
372 pslldq xmm6, 4 | |
373 pslldq xmm7, 4 | |
374 pxor xmm1, xmm6 | |
375 pxor xmm3, xmm7 | |
376 pslldq xmm6, 4 | |
377 pxor xmm1, xmm6 | |
378 pslldq xmm6, 4 | |
379 pxor xmm1, xmm6 | |
380 pxor xmm1, xmm2 | |
381 pshufd xmm2, xmm1, 0ffh | |
382 pxor xmm3, xmm2 | |
383 | |
384 movdqu [48 + KS], xmm1 | |
385 movdqa xmm5, xmm3 | |
386 | |
387 lea KS, [48 + KS] | |
388 | |
389 dec ITR | |
390 jnz Lenc_192_ks_loop | |
391 | |
392 movdqu [16 + KS], xmm5 | |
393 | |
394 movdqu xmm7, [16*1 + rsp] | |
395 movdqu xmm6, [16*0 + rsp] | |
396 add rsp, 16*2 | |
397 ret | |
398 intel_aes_encrypt_init_192 ENDP | |
399 | |
400 intel_aes_decrypt_init_192 PROC | |
401 push KS | |
402 push KEY | |
403 | |
404 call intel_aes_encrypt_init_192 | |
405 | |
406 pop KEY | |
407 pop KS | |
408 | |
409 movdqu xmm0, [0*16 + KS] | |
410 movdqu xmm1, [12*16 + KS] | |
411 movdqu [12*16 + KS], xmm0 | |
412 movdqu [0*16 + KS], xmm1 | |
413 | |
414 i = 1 | |
415 WHILE i LT 6 | |
416 movdqu xmm0, [i*16 + KS] | |
417 movdqu xmm1, [(12-i)*16 + KS] | |
418 | |
419 aesimc xmm0, xmm0 | |
420 aesimc xmm1, xmm1 | |
421 | |
422 movdqu [(12-i)*16 + KS], xmm0 | |
423 movdqu [i*16 + KS], xmm1 | |
424 | |
425 i = i+1 | |
426 ENDM | |
427 | |
428 movdqu xmm0, [6*16 + KS] | |
429 aesimc xmm0, xmm0 | |
430 movdqu [6*16 + KS], xmm0 | |
431 ret | |
432 intel_aes_decrypt_init_192 ENDP | |
433 | |
434 | |
435 intel_aes_encrypt_init_256 PROC | |
436 sub rsp, 16*2 | |
437 movdqu [16*0 + rsp], xmm6 | |
438 movdqu [16*1 + rsp], xmm7 | |
439 | |
440 movdqu xmm1, [16*0 + KEY] | |
441 movdqu xmm3, [16*1 + KEY] | |
442 | |
443 movdqu [16*0 + KS], xmm1 | |
444 movdqu [16*1 + KS], xmm3 | |
445 | |
446 lea ITR, Lcon1 | |
447 movdqu xmm0, [ITR] | |
448 lea ITR, Lmask256 | |
449 movdqu xmm5, [ITR] | |
450 | |
451 pxor xmm6, xmm6 | |
452 | |
453 mov ITR, 6 | |
454 | |
455 Lenc_256_ks_loop: | |
456 | |
457 movdqa xmm2, xmm3 | |
458 pshufb xmm2, xmm5 | |
459 aesenclast xmm2, xmm0 | |
460 pslld xmm0, 1 | |
461 movdqa xmm4, xmm1 | |
462 pslldq xmm4, 4 | |
463 pxor xmm1, xmm4 | |
464 pslldq xmm4, 4 | |
465 pxor xmm1, xmm4 | |
466 pslldq xmm4, 4 | |
467 pxor xmm1, xmm4 | |
468 pxor xmm1, xmm2 | |
469 movdqu [16*2 + KS], xmm1 | |
470 | |
471 pshufd xmm2, xmm1, 0ffh | |
472 aesenclast xmm2, xmm6 | |
473 movdqa xmm4, xmm3 | |
474 pslldq xmm4, 4 | |
475 pxor xmm3, xmm4 | |
476 pslldq xmm4, 4 | |
477 pxor xmm3, xmm4 | |
478 pslldq xmm4, 4 | |
479 pxor xmm3, xmm4 | |
480 pxor xmm3, xmm2 | |
481 movdqu [16*3 + KS], xmm3 | |
482 | |
483 lea KS, [32 + KS] | |
484 dec ITR | |
485 jnz Lenc_256_ks_loop | |
486 | |
487 movdqa xmm2, xmm3 | |
488 pshufb xmm2, xmm5 | |
489 aesenclast xmm2, xmm0 | |
490 movdqa xmm4, xmm1 | |
491 pslldq xmm4, 4 | |
492 pxor xmm1, xmm4 | |
493 pslldq xmm4, 4 | |
494 pxor xmm1, xmm4 | |
495 pslldq xmm4, 4 | |
496 pxor xmm1, xmm4 | |
497 pxor xmm1, xmm2 | |
498 movdqu [16*2 + KS], xmm1 | |
499 | |
500 movdqu xmm7, [16*1 + rsp] | |
501 movdqu xmm6, [16*0 + rsp] | |
502 add rsp, 16*2 | |
503 ret | |
504 | |
505 intel_aes_encrypt_init_256 ENDP | |
506 | |
507 | |
508 intel_aes_decrypt_init_256 PROC | |
509 push KS | |
510 push KEY | |
511 | |
512 call intel_aes_encrypt_init_256 | |
513 | |
514 pop KEY | |
515 pop KS | |
516 | |
517 movdqu xmm0, [0*16 + KS] | |
518 movdqu xmm1, [14*16 + KS] | |
519 movdqu [14*16 + KS], xmm0 | |
520 movdqu [0*16 + KS], xmm1 | |
521 | |
522 i = 1 | |
523 WHILE i LT 7 | |
524 movdqu xmm0, [i*16 + KS] | |
525 movdqu xmm1, [(14-i)*16 + KS] | |
526 | |
527 aesimc xmm0, xmm0 | |
528 aesimc xmm1, xmm1 | |
529 | |
530 movdqu [(14-i)*16 + KS], xmm0 | |
531 movdqu [i*16 + KS], xmm1 | |
532 | |
533 i = i+1 | |
534 ENDM | |
535 | |
536 movdqu xmm0, [7*16 + KS] | |
537 aesimc xmm0, xmm0 | |
538 movdqu [7*16 + KS], xmm0 | |
539 ret | |
540 intel_aes_decrypt_init_256 ENDP | |
541 | |
542 | |
543 | |
544 gen_aes_cbc_enc_func MACRO rnds | |
545 | |
546 LOCAL loop1 | |
547 LOCAL bail | |
548 | |
549 mov input, [rsp + 1*8 + 8*4] | |
550 mov inputLen, [rsp + 1*8 + 8*5] | |
551 | |
552 sub rsp, 3*16 | |
553 | |
554 movdqu [rsp + 0*16], xmm6 | |
555 movdqu [rsp + 1*16], xmm7 | |
556 movdqu [rsp + 2*16], xmm8 | |
557 | |
558 lea ctx, [48+ctx] | |
559 | |
560 movdqu xmm0, [-32+ctx] | |
561 | |
562 movdqu xmm2, [0*16 + ctx] | |
563 movdqu xmm3, [1*16 + ctx] | |
564 movdqu xmm4, [2*16 + ctx] | |
565 movdqu xmm5, [3*16 + ctx] | |
566 movdqu xmm6, [4*16 + ctx] | |
567 movdqu xmm7, [5*16 + ctx] | |
568 | |
569 loop1: | |
570 cmp inputLen, 1*16 | |
571 jb bail | |
572 | |
573 movdqu xmm1, [input] | |
574 pxor xmm1, xmm2 | |
575 pxor xmm0, xmm1 | |
576 | |
577 aesenc xmm0, xmm3 | |
578 aesenc xmm0, xmm4 | |
579 aesenc xmm0, xmm5 | |
580 aesenc xmm0, xmm6 | |
581 aesenc xmm0, xmm7 | |
582 | |
583 i = 6 | |
584 WHILE i LT rnds | |
585 movdqu xmm8, [i*16 + ctx] | |
586 aesenc xmm0, xmm8 | |
587 i = i+1 | |
588 ENDM | |
589 movdqu xmm8, [rnds*16 + ctx] | |
590 aesenclast xmm0, xmm8 | |
591 | |
592 movdqu [output], xmm0 | |
593 | |
594 lea input, [1*16 + input] | |
595 lea output, [1*16 + output] | |
596 sub inputLen, 1*16 | |
597 jmp loop1 | |
598 | |
599 bail: | |
600 movdqu [-32+ctx], xmm0 | |
601 | |
602 xor rax, rax | |
603 | |
604 movdqu xmm6, [rsp + 0*16] | |
605 movdqu xmm7, [rsp + 1*16] | |
606 movdqu xmm8, [rsp + 2*16] | |
607 add rsp, 3*16 | |
608 ret | |
609 | |
610 ENDM | |
611 | |
612 gen_aes_cbc_dec_func MACRO rnds | |
613 | |
614 LOCAL loop8 | |
615 LOCAL loop1 | |
616 LOCAL dec1 | |
617 LOCAL bail | |
618 | |
619 mov input, [rsp + 1*8 + 8*4] | |
620 mov inputLen, [rsp + 1*8 + 8*5] | |
621 | |
622 sub rsp, 3*16 | |
623 | |
624 movdqu [rsp + 0*16], xmm6 | |
625 movdqu [rsp + 1*16], xmm7 | |
626 movdqu [rsp + 2*16], xmm8 | |
627 | |
628 lea ctx, [48+ctx] | |
629 | |
630 loop8: | |
631 cmp inputLen, 8*16 | |
632 jb dec1 | |
633 | |
634 movdqu xmm0, [0*16 + input] | |
635 movdqu xmm1, [1*16 + input] | |
636 movdqu xmm2, [2*16 + input] | |
637 movdqu xmm3, [3*16 + input] | |
638 movdqu xmm4, [4*16 + input] | |
639 movdqu xmm5, [5*16 + input] | |
640 movdqu xmm6, [6*16 + input] | |
641 movdqu xmm7, [7*16 + input] | |
642 | |
643 movdqu xmm8, [0*16 + ctx] | |
644 pxor xmm0, xmm8 | |
645 pxor xmm1, xmm8 | |
646 pxor xmm2, xmm8 | |
647 pxor xmm3, xmm8 | |
648 pxor xmm4, xmm8 | |
649 pxor xmm5, xmm8 | |
650 pxor xmm6, xmm8 | |
651 pxor xmm7, xmm8 | |
652 | |
653 i = 1 | |
654 WHILE i LT rnds | |
655 aes_dec_rnd i | |
656 i = i+1 | |
657 ENDM | |
658 aes_dec_last_rnd rnds | |
659 | |
660 movdqu xmm8, [-32 + ctx] | |
661 pxor xmm0, xmm8 | |
662 movdqu xmm8, [0*16 + input] | |
663 pxor xmm1, xmm8 | |
664 movdqu xmm8, [1*16 + input] | |
665 pxor xmm2, xmm8 | |
666 movdqu xmm8, [2*16 + input] | |
667 pxor xmm3, xmm8 | |
668 movdqu xmm8, [3*16 + input] | |
669 pxor xmm4, xmm8 | |
670 movdqu xmm8, [4*16 + input] | |
671 pxor xmm5, xmm8 | |
672 movdqu xmm8, [5*16 + input] | |
673 pxor xmm6, xmm8 | |
674 movdqu xmm8, [6*16 + input] | |
675 pxor xmm7, xmm8 | |
676 movdqu xmm8, [7*16 + input] | |
677 | |
678 movdqu [0*16 + output], xmm0 | |
679 movdqu [1*16 + output], xmm1 | |
680 movdqu [2*16 + output], xmm2 | |
681 movdqu [3*16 + output], xmm3 | |
682 movdqu [4*16 + output], xmm4 | |
683 movdqu [5*16 + output], xmm5 | |
684 movdqu [6*16 + output], xmm6 | |
685 movdqu [7*16 + output], xmm7 | |
686 movdqu [-32 + ctx], xmm8 | |
687 | |
688 lea input, [8*16 + input] | |
689 lea output, [8*16 + output] | |
690 sub inputLen, 8*16 | |
691 jmp loop8 | |
692 dec1: | |
693 | |
694 movdqu xmm3, [-32 + ctx] | |
695 | |
696 loop1: | |
697 cmp inputLen, 1*16 | |
698 jb bail | |
699 | |
700 movdqu xmm0, [input] | |
701 movdqa xmm4, xmm0 | |
702 movdqu xmm7, [0*16 + ctx] | |
703 pxor xmm0, xmm7 | |
704 | |
705 i = 1 | |
706 WHILE i LT rnds | |
707 movdqu xmm7, [i*16 + ctx] | |
708 aesdec xmm0, xmm7 | |
709 i = i+1 | |
710 ENDM | |
711 movdqu xmm7, [rnds*16 + ctx] | |
712 aesdeclast xmm0, xmm7 | |
713 pxor xmm3, xmm0 | |
714 | |
715 movdqu [output], xmm3 | |
716 movdqa xmm3, xmm4 | |
717 | |
718 lea input, [1*16 + input] | |
719 lea output, [1*16 + output] | |
720 sub inputLen, 1*16 | |
721 jmp loop1 | |
722 | |
723 bail: | |
724 movdqu [-32 + ctx], xmm3 | |
725 xor rax, rax | |
726 | |
727 movdqu xmm6, [rsp + 0*16] | |
728 movdqu xmm7, [rsp + 1*16] | |
729 movdqu xmm8, [rsp + 2*16] | |
730 add rsp, 3*16 | |
731 ret | |
732 ENDM | |
733 | |
734 intel_aes_encrypt_cbc_128 PROC | |
735 gen_aes_cbc_enc_func 10 | |
736 intel_aes_encrypt_cbc_128 ENDP | |
737 | |
738 intel_aes_encrypt_cbc_192 PROC | |
739 gen_aes_cbc_enc_func 12 | |
740 intel_aes_encrypt_cbc_192 ENDP | |
741 | |
742 intel_aes_encrypt_cbc_256 PROC | |
743 gen_aes_cbc_enc_func 14 | |
744 intel_aes_encrypt_cbc_256 ENDP | |
745 | |
746 intel_aes_decrypt_cbc_128 PROC | |
747 gen_aes_cbc_dec_func 10 | |
748 intel_aes_decrypt_cbc_128 ENDP | |
749 | |
750 intel_aes_decrypt_cbc_192 PROC | |
751 gen_aes_cbc_dec_func 12 | |
752 intel_aes_decrypt_cbc_192 ENDP | |
753 | |
754 intel_aes_decrypt_cbc_256 PROC | |
755 gen_aes_cbc_dec_func 14 | |
756 intel_aes_decrypt_cbc_256 ENDP | |
757 | |
758 | |
759 | |
760 ctrCtx textequ <r10> | |
761 CTR textequ <r11d> | |
762 CTRSave textequ <eax> | |
763 | |
764 gen_aes_ctr_func MACRO rnds | |
765 | |
766 LOCAL loop8 | |
767 LOCAL loop1 | |
768 LOCAL enc1 | |
769 LOCAL bail | |
770 | |
771 mov input, [rsp + 8*1 + 4*8] | |
772 mov inputLen, [rsp + 8*1 + 5*8] | |
773 | |
774 mov ctrCtx, ctx | |
775 mov ctx, [8+ctrCtx] | |
776 lea ctx, [48+ctx] | |
777 | |
778 sub rsp, 3*16 | |
779 movdqu [rsp + 0*16], xmm6 | |
780 movdqu [rsp + 1*16], xmm7 | |
781 movdqu [rsp + 2*16], xmm8 | |
782 | |
783 | |
784 push rbp | |
785 mov rbp, rsp | |
786 sub rsp, 8*16 | |
787 and rsp, -16 | |
788 | |
789 | |
790 movdqu xmm0, [16+ctrCtx] | |
791 mov CTRSave, DWORD PTR [ctrCtx + 16 + 3*4] | |
792 bswap CTRSave | |
793 movdqu xmm1, [ctx + 0*16] | |
794 | |
795 pxor xmm0, xmm1 | |
796 | |
797 movdqa [rsp + 0*16], xmm0 | |
798 movdqa [rsp + 1*16], xmm0 | |
799 movdqa [rsp + 2*16], xmm0 | |
800 movdqa [rsp + 3*16], xmm0 | |
801 movdqa [rsp + 4*16], xmm0 | |
802 movdqa [rsp + 5*16], xmm0 | |
803 movdqa [rsp + 6*16], xmm0 | |
804 movdqa [rsp + 7*16], xmm0 | |
805 | |
806 inc CTRSave | |
807 mov CTR, CTRSave | |
808 bswap CTR | |
809 xor CTR, DWORD PTR [ctx + 3*4] | |
810 mov DWORD PTR [rsp + 1*16 + 3*4], CTR | |
811 | |
812 inc CTRSave | |
813 mov CTR, CTRSave | |
814 bswap CTR | |
815 xor CTR, DWORD PTR [ctx + 3*4] | |
816 mov DWORD PTR [rsp + 2*16 + 3*4], CTR | |
817 | |
818 inc CTRSave | |
819 mov CTR, CTRSave | |
820 bswap CTR | |
821 xor CTR, DWORD PTR [ctx + 3*4] | |
822 mov DWORD PTR [rsp + 3*16 + 3*4], CTR | |
823 | |
824 inc CTRSave | |
825 mov CTR, CTRSave | |
826 bswap CTR | |
827 xor CTR, DWORD PTR [ctx + 3*4] | |
828 mov DWORD PTR [rsp + 4*16 + 3*4], CTR | |
829 | |
830 inc CTRSave | |
831 mov CTR, CTRSave | |
832 bswap CTR | |
833 xor CTR, DWORD PTR [ctx + 3*4] | |
834 mov DWORD PTR [rsp + 5*16 + 3*4], CTR | |
835 | |
836 inc CTRSave | |
837 mov CTR, CTRSave | |
838 bswap CTR | |
839 xor CTR, DWORD PTR [ctx + 3*4] | |
840 mov DWORD PTR [rsp + 6*16 + 3*4], CTR | |
841 | |
842 inc CTRSave | |
843 mov CTR, CTRSave | |
844 bswap CTR | |
845 xor CTR, DWORD PTR [ctx + 3*4] | |
846 mov DWORD PTR [rsp + 7*16 + 3*4], CTR | |
847 | |
848 | |
849 loop8: | |
850 cmp inputLen, 8*16 | |
851 jb loop1 | |
852 | |
853 movdqu xmm0, [0*16 + rsp] | |
854 movdqu xmm1, [1*16 + rsp] | |
855 movdqu xmm2, [2*16 + rsp] | |
856 movdqu xmm3, [3*16 + rsp] | |
857 movdqu xmm4, [4*16 + rsp] | |
858 movdqu xmm5, [5*16 + rsp] | |
859 movdqu xmm6, [6*16 + rsp] | |
860 movdqu xmm7, [7*16 + rsp] | |
861 | |
862 i = 1 | |
863 WHILE i LE 8 | |
864 aes_rnd i | |
865 | |
866 inc CTRSave | |
867 mov CTR, CTRSave | |
868 bswap CTR | |
869 xor CTR, DWORD PTR [ctx + 3*4] | |
870 mov DWORD PTR [rsp + (i-1)*16 + 3*4], CTR | |
871 | |
872 i = i+1 | |
873 ENDM | |
874 WHILE i LT rnds | |
875 aes_rnd i | |
876 i = i+1 | |
877 ENDM | |
878 aes_last_rnd rnds | |
879 | |
880 movdqu xmm8, [0*16 + input] | |
881 pxor xmm0, xmm8 | |
882 movdqu xmm8, [1*16 + input] | |
883 pxor xmm1, xmm8 | |
884 movdqu xmm8, [2*16 + input] | |
885 pxor xmm2, xmm8 | |
886 movdqu xmm8, [3*16 + input] | |
887 pxor xmm3, xmm8 | |
888 movdqu xmm8, [4*16 + input] | |
889 pxor xmm4, xmm8 | |
890 movdqu xmm8, [5*16 + input] | |
891 pxor xmm5, xmm8 | |
892 movdqu xmm8, [6*16 + input] | |
893 pxor xmm6, xmm8 | |
894 movdqu xmm8, [7*16 + input] | |
895 pxor xmm7, xmm8 | |
896 | |
897 movdqu [0*16 + output], xmm0 | |
898 movdqu [1*16 + output], xmm1 | |
899 movdqu [2*16 + output], xmm2 | |
900 movdqu [3*16 + output], xmm3 | |
901 movdqu [4*16 + output], xmm4 | |
902 movdqu [5*16 + output], xmm5 | |
903 movdqu [6*16 + output], xmm6 | |
904 movdqu [7*16 + output], xmm7 | |
905 | |
906 lea input, [8*16 + input] | |
907 lea output, [8*16 + output] | |
908 sub inputLen, 8*16 | |
909 jmp loop8 | |
910 | |
911 | |
912 loop1: | |
913 cmp inputLen, 1*16 | |
914 jb bail | |
915 | |
916 movdqu xmm0, [rsp] | |
917 add rsp, 16 | |
918 | |
919 i = 1 | |
920 WHILE i LT rnds | |
921 movdqu xmm7, [i*16 + ctx] | |
922 aesenc xmm0, xmm7 | |
923 i = i+1 | |
924 ENDM | |
925 movdqu xmm7, [rnds*16 + ctx] | |
926 aesenclast xmm0, xmm7 | |
927 | |
928 movdqu xmm7, [input] | |
929 pxor xmm0, xmm7 | |
930 movdqu [output], xmm0 | |
931 | |
932 lea input, [1*16 + input] | |
933 lea output, [1*16 + output] | |
934 sub inputLen, 1*16 | |
935 jmp loop1 | |
936 | |
937 bail: | |
938 | |
939 movdqu xmm0, [rsp] | |
940 movdqu xmm1, [ctx + 0*16] | |
941 pxor xmm0, xmm1 | |
942 movdqu [16+ctrCtx], xmm0 | |
943 | |
944 | |
945 xor rax, rax | |
946 mov rsp, rbp | |
947 pop rbp | |
948 | |
949 movdqu xmm6, [rsp + 0*16] | |
950 movdqu xmm7, [rsp + 1*16] | |
951 movdqu xmm8, [rsp + 2*16] | |
952 add rsp, 3*16 | |
953 | |
954 ret | |
955 ENDM | |
956 | |
957 | |
958 intel_aes_encrypt_ctr_128 PROC | |
959 gen_aes_ctr_func 10 | |
960 intel_aes_encrypt_ctr_128 ENDP | |
961 | |
962 intel_aes_encrypt_ctr_192 PROC | |
963 gen_aes_ctr_func 12 | |
964 intel_aes_encrypt_ctr_192 ENDP | |
965 | |
966 intel_aes_encrypt_ctr_256 PROC | |
967 gen_aes_ctr_func 14 | |
968 intel_aes_encrypt_ctr_256 ENDP | |
969 | |
970 | |
971 END |