Mercurial > trustbridge > nss-cmake-static
view nss/lib/freebl/intel-aes-x64-masm.asm @ 4:b513267f632f tip
Build DBM module
author | Andre Heinecke <andre.heinecke@intevation.de> |
---|---|
date | Tue, 05 Aug 2014 18:58:03 +0200 |
parents | 1e5118fa0cb1 |
children |
line wrap: on
line source
; LICENSE: ; This submission to NSS is to be made available under the terms of the ; Mozilla Public License, v. 2.0. You can obtain one at http: ; //mozilla.org/MPL/2.0/. ;############################################################################### ; Copyright(c) 2014, Intel Corp. ; Developers and authors: ; Shay Gueron and Vlad Krasnov ; Intel Corporation, Israel Development Centre, Haifa, Israel ; Please send feedback directly to crypto.feedback.alias@intel.com .DATA ALIGN 16 Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh Lcon1 dd 1,1,1,1 Lcon2 dd 1bh,1bh,1bh,1bh .CODE ctx textequ <rcx> output textequ <rdx> input textequ <r8> inputLen textequ <r9d> aes_rnd MACRO i movdqu xmm8, [i*16 + ctx] aesenc xmm0, xmm8 aesenc xmm1, xmm8 aesenc xmm2, xmm8 aesenc xmm3, xmm8 aesenc xmm4, xmm8 aesenc xmm5, xmm8 aesenc xmm6, xmm8 aesenc xmm7, xmm8 ENDM aes_last_rnd MACRO i movdqu xmm8, [i*16 + ctx] aesenclast xmm0, xmm8 aesenclast xmm1, xmm8 aesenclast xmm2, xmm8 aesenclast xmm3, xmm8 aesenclast xmm4, xmm8 aesenclast xmm5, xmm8 aesenclast xmm6, xmm8 aesenclast xmm7, xmm8 ENDM aes_dec_rnd MACRO i movdqu xmm8, [i*16 + ctx] aesdec xmm0, xmm8 aesdec xmm1, xmm8 aesdec xmm2, xmm8 aesdec xmm3, xmm8 aesdec xmm4, xmm8 aesdec xmm5, xmm8 aesdec xmm6, xmm8 aesdec xmm7, xmm8 ENDM aes_dec_last_rnd MACRO i movdqu xmm8, [i*16 + ctx] aesdeclast xmm0, xmm8 aesdeclast xmm1, xmm8 aesdeclast xmm2, xmm8 aesdeclast xmm3, xmm8 aesdeclast xmm4, xmm8 aesdeclast xmm5, xmm8 aesdeclast xmm6, xmm8 aesdeclast xmm7, xmm8 ENDM gen_aes_ecb_func MACRO enc, rnds LOCAL loop8 LOCAL loop1 LOCAL bail xor inputLen, inputLen mov input, [rsp + 1*8 + 8*4] mov inputLen, [rsp + 1*8 + 8*5] sub rsp, 3*16 movdqu [rsp + 0*16], xmm6 movdqu [rsp + 1*16], xmm7 movdqu [rsp + 2*16], xmm8 lea ctx, [48+ctx] loop8: cmp inputLen, 8*16 jb loop1 movdqu xmm0, [0*16 + input] movdqu xmm1, [1*16 + input] movdqu xmm2, [2*16 + input] movdqu xmm3, [3*16 + input] movdqu xmm4, [4*16 + input] movdqu xmm5, [5*16 + input] movdqu xmm6, [6*16 + input] movdqu xmm7, [7*16 + input] movdqu xmm8, [0*16 + ctx] pxor xmm0, xmm8 pxor xmm1, xmm8 pxor xmm2, xmm8 pxor xmm3, xmm8 pxor xmm4, xmm8 pxor xmm5, xmm8 pxor xmm6, xmm8 pxor xmm7, xmm8 IF enc eq 1 rnd textequ <aes_rnd> lastrnd textequ <aes_last_rnd> aesinst textequ <aesenc> aeslastinst textequ <aesenclast> ELSE rnd textequ <aes_dec_rnd> lastrnd textequ <aes_dec_last_rnd> aesinst textequ <aesdec> aeslastinst textequ <aesdeclast> ENDIF i = 1 WHILE i LT rnds rnd i i = i+1 ENDM lastrnd rnds movdqu [0*16 + output], xmm0 movdqu [1*16 + output], xmm1 movdqu [2*16 + output], xmm2 movdqu [3*16 + output], xmm3 movdqu [4*16 + output], xmm4 movdqu [5*16 + output], xmm5 movdqu [6*16 + output], xmm6 movdqu [7*16 + output], xmm7 lea input, [8*16 + input] lea output, [8*16 + output] sub inputLen, 8*16 jmp loop8 loop1: cmp inputLen, 1*16 jb bail movdqu xmm0, [input] movdqu xmm7, [0*16 + ctx] pxor xmm0, xmm7 i = 1 WHILE i LT rnds movdqu xmm7, [i*16 + ctx] aesinst xmm0, xmm7 i = i+1 ENDM movdqu xmm7, [rnds*16 + ctx] aeslastinst xmm0, xmm7 movdqu [output], xmm0 lea input, [1*16 + input] lea output, [1*16 + output] sub inputLen, 1*16 jmp loop1 bail: xor rax, rax movdqu xmm6, [rsp + 0*16] movdqu xmm7, [rsp + 1*16] movdqu xmm8, [rsp + 2*16] add rsp, 3*16 ret ENDM intel_aes_encrypt_ecb_128 PROC gen_aes_ecb_func 1, 10 intel_aes_encrypt_ecb_128 ENDP intel_aes_encrypt_ecb_192 PROC gen_aes_ecb_func 1, 12 intel_aes_encrypt_ecb_192 ENDP intel_aes_encrypt_ecb_256 PROC gen_aes_ecb_func 1, 14 intel_aes_encrypt_ecb_256 ENDP intel_aes_decrypt_ecb_128 PROC gen_aes_ecb_func 0, 10 intel_aes_decrypt_ecb_128 ENDP intel_aes_decrypt_ecb_192 PROC gen_aes_ecb_func 0, 12 intel_aes_decrypt_ecb_192 ENDP intel_aes_decrypt_ecb_256 PROC gen_aes_ecb_func 0, 14 intel_aes_decrypt_ecb_256 ENDP KEY textequ <rcx> KS textequ <rdx> ITR textequ <r8> intel_aes_encrypt_init_128 PROC movdqu xmm1, [KEY] movdqu [KS], xmm1 movdqa xmm2, xmm1 lea ITR, Lcon1 movdqa xmm0, [ITR] lea ITR, Lmask movdqa xmm4, [ITR] mov ITR, 8 Lenc_128_ks_loop: lea KS, [16 + KS] dec ITR pshufb xmm2, xmm4 aesenclast xmm2, xmm0 pslld xmm0, 1 movdqa xmm3, xmm1 pslldq xmm3, 4 pxor xmm1, xmm3 pslldq xmm3, 4 pxor xmm1, xmm3 pslldq xmm3, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu [KS], xmm1 movdqa xmm2, xmm1 jne Lenc_128_ks_loop lea ITR, Lcon2 movdqa xmm0, [ITR] pshufb xmm2, xmm4 aesenclast xmm2, xmm0 pslld xmm0, 1 movdqa xmm3, xmm1 pslldq xmm3, 4 pxor xmm1, xmm3 pslldq xmm3, 4 pxor xmm1, xmm3 pslldq xmm3, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu [16 + KS], xmm1 movdqa xmm2, xmm1 pshufb xmm2, xmm4 aesenclast xmm2, xmm0 movdqa xmm3, xmm1 pslldq xmm3, 4 pxor xmm1, xmm3 pslldq xmm3, 4 pxor xmm1, xmm3 pslldq xmm3, 4 pxor xmm1, xmm3 pxor xmm1, xmm2 movdqu [32 + KS], xmm1 movdqa xmm2, xmm1 ret intel_aes_encrypt_init_128 ENDP intel_aes_decrypt_init_128 PROC push KS push KEY call intel_aes_encrypt_init_128 pop KEY pop KS movdqu xmm0, [0*16 + KS] movdqu xmm1, [10*16 + KS] movdqu [10*16 + KS], xmm0 movdqu [0*16 + KS], xmm1 i = 1 WHILE i LT 5 movdqu xmm0, [i*16 + KS] movdqu xmm1, [(10-i)*16 + KS] aesimc xmm0, xmm0 aesimc xmm1, xmm1 movdqu [(10-i)*16 + KS], xmm0 movdqu [i*16 + KS], xmm1 i = i+1 ENDM movdqu xmm0, [5*16 + KS] aesimc xmm0, xmm0 movdqu [5*16 + KS], xmm0 ret intel_aes_decrypt_init_128 ENDP intel_aes_encrypt_init_192 PROC sub rsp, 16*2 movdqu [16*0 + rsp], xmm6 movdqu [16*1 + rsp], xmm7 movdqu xmm1, [KEY] mov ITR, [16 + KEY] movd xmm3, ITR movdqu [KS], xmm1 movdqa xmm5, xmm3 lea ITR, Lcon1 movdqu xmm0, [ITR] lea ITR, Lmask192 movdqu xmm4, [ITR] mov ITR, 4 Lenc_192_ks_loop: movdqa xmm2, xmm3 pshufb xmm2, xmm4 aesenclast xmm2, xmm0 pslld xmm0, 1 movdqa xmm6, xmm1 movdqa xmm7, xmm3 pslldq xmm6, 4 pslldq xmm7, 4 pxor xmm1, xmm6 pxor xmm3, xmm7 pslldq xmm6, 4 pxor xmm1, xmm6 pslldq xmm6, 4 pxor xmm1, xmm6 pxor xmm1, xmm2 pshufd xmm2, xmm1, 0ffh pxor xmm3, xmm2 movdqa xmm6, xmm1 shufpd xmm5, xmm1, 00h shufpd xmm6, xmm3, 01h movdqu [16 + KS], xmm5 movdqu [32 + KS], xmm6 movdqa xmm2, xmm3 pshufb xmm2, xmm4 aesenclast xmm2, xmm0 pslld xmm0, 1 movdqa xmm6, xmm1 movdqa xmm7, xmm3 pslldq xmm6, 4 pslldq xmm7, 4 pxor xmm1, xmm6 pxor xmm3, xmm7 pslldq xmm6, 4 pxor xmm1, xmm6 pslldq xmm6, 4 pxor xmm1, xmm6 pxor xmm1, xmm2 pshufd xmm2, xmm1, 0ffh pxor xmm3, xmm2 movdqu [48 + KS], xmm1 movdqa xmm5, xmm3 lea KS, [48 + KS] dec ITR jnz Lenc_192_ks_loop movdqu [16 + KS], xmm5 movdqu xmm7, [16*1 + rsp] movdqu xmm6, [16*0 + rsp] add rsp, 16*2 ret intel_aes_encrypt_init_192 ENDP intel_aes_decrypt_init_192 PROC push KS push KEY call intel_aes_encrypt_init_192 pop KEY pop KS movdqu xmm0, [0*16 + KS] movdqu xmm1, [12*16 + KS] movdqu [12*16 + KS], xmm0 movdqu [0*16 + KS], xmm1 i = 1 WHILE i LT 6 movdqu xmm0, [i*16 + KS] movdqu xmm1, [(12-i)*16 + KS] aesimc xmm0, xmm0 aesimc xmm1, xmm1 movdqu [(12-i)*16 + KS], xmm0 movdqu [i*16 + KS], xmm1 i = i+1 ENDM movdqu xmm0, [6*16 + KS] aesimc xmm0, xmm0 movdqu [6*16 + KS], xmm0 ret intel_aes_decrypt_init_192 ENDP intel_aes_encrypt_init_256 PROC sub rsp, 16*2 movdqu [16*0 + rsp], xmm6 movdqu [16*1 + rsp], xmm7 movdqu xmm1, [16*0 + KEY] movdqu xmm3, [16*1 + KEY] movdqu [16*0 + KS], xmm1 movdqu [16*1 + KS], xmm3 lea ITR, Lcon1 movdqu xmm0, [ITR] lea ITR, Lmask256 movdqu xmm5, [ITR] pxor xmm6, xmm6 mov ITR, 6 Lenc_256_ks_loop: movdqa xmm2, xmm3 pshufb xmm2, xmm5 aesenclast xmm2, xmm0 pslld xmm0, 1 movdqa xmm4, xmm1 pslldq xmm4, 4 pxor xmm1, xmm4 pslldq xmm4, 4 pxor xmm1, xmm4 pslldq xmm4, 4 pxor xmm1, xmm4 pxor xmm1, xmm2 movdqu [16*2 + KS], xmm1 pshufd xmm2, xmm1, 0ffh aesenclast xmm2, xmm6 movdqa xmm4, xmm3 pslldq xmm4, 4 pxor xmm3, xmm4 pslldq xmm4, 4 pxor xmm3, xmm4 pslldq xmm4, 4 pxor xmm3, xmm4 pxor xmm3, xmm2 movdqu [16*3 + KS], xmm3 lea KS, [32 + KS] dec ITR jnz Lenc_256_ks_loop movdqa xmm2, xmm3 pshufb xmm2, xmm5 aesenclast xmm2, xmm0 movdqa xmm4, xmm1 pslldq xmm4, 4 pxor xmm1, xmm4 pslldq xmm4, 4 pxor xmm1, xmm4 pslldq xmm4, 4 pxor xmm1, xmm4 pxor xmm1, xmm2 movdqu [16*2 + KS], xmm1 movdqu xmm7, [16*1 + rsp] movdqu xmm6, [16*0 + rsp] add rsp, 16*2 ret intel_aes_encrypt_init_256 ENDP intel_aes_decrypt_init_256 PROC push KS push KEY call intel_aes_encrypt_init_256 pop KEY pop KS movdqu xmm0, [0*16 + KS] movdqu xmm1, [14*16 + KS] movdqu [14*16 + KS], xmm0 movdqu [0*16 + KS], xmm1 i = 1 WHILE i LT 7 movdqu xmm0, [i*16 + KS] movdqu xmm1, [(14-i)*16 + KS] aesimc xmm0, xmm0 aesimc xmm1, xmm1 movdqu [(14-i)*16 + KS], xmm0 movdqu [i*16 + KS], xmm1 i = i+1 ENDM movdqu xmm0, [7*16 + KS] aesimc xmm0, xmm0 movdqu [7*16 + KS], xmm0 ret intel_aes_decrypt_init_256 ENDP gen_aes_cbc_enc_func MACRO rnds LOCAL loop1 LOCAL bail mov input, [rsp + 1*8 + 8*4] mov inputLen, [rsp + 1*8 + 8*5] sub rsp, 3*16 movdqu [rsp + 0*16], xmm6 movdqu [rsp + 1*16], xmm7 movdqu [rsp + 2*16], xmm8 lea ctx, [48+ctx] movdqu xmm0, [-32+ctx] movdqu xmm2, [0*16 + ctx] movdqu xmm3, [1*16 + ctx] movdqu xmm4, [2*16 + ctx] movdqu xmm5, [3*16 + ctx] movdqu xmm6, [4*16 + ctx] movdqu xmm7, [5*16 + ctx] loop1: cmp inputLen, 1*16 jb bail movdqu xmm1, [input] pxor xmm1, xmm2 pxor xmm0, xmm1 aesenc xmm0, xmm3 aesenc xmm0, xmm4 aesenc xmm0, xmm5 aesenc xmm0, xmm6 aesenc xmm0, xmm7 i = 6 WHILE i LT rnds movdqu xmm8, [i*16 + ctx] aesenc xmm0, xmm8 i = i+1 ENDM movdqu xmm8, [rnds*16 + ctx] aesenclast xmm0, xmm8 movdqu [output], xmm0 lea input, [1*16 + input] lea output, [1*16 + output] sub inputLen, 1*16 jmp loop1 bail: movdqu [-32+ctx], xmm0 xor rax, rax movdqu xmm6, [rsp + 0*16] movdqu xmm7, [rsp + 1*16] movdqu xmm8, [rsp + 2*16] add rsp, 3*16 ret ENDM gen_aes_cbc_dec_func MACRO rnds LOCAL loop8 LOCAL loop1 LOCAL dec1 LOCAL bail mov input, [rsp + 1*8 + 8*4] mov inputLen, [rsp + 1*8 + 8*5] sub rsp, 3*16 movdqu [rsp + 0*16], xmm6 movdqu [rsp + 1*16], xmm7 movdqu [rsp + 2*16], xmm8 lea ctx, [48+ctx] loop8: cmp inputLen, 8*16 jb dec1 movdqu xmm0, [0*16 + input] movdqu xmm1, [1*16 + input] movdqu xmm2, [2*16 + input] movdqu xmm3, [3*16 + input] movdqu xmm4, [4*16 + input] movdqu xmm5, [5*16 + input] movdqu xmm6, [6*16 + input] movdqu xmm7, [7*16 + input] movdqu xmm8, [0*16 + ctx] pxor xmm0, xmm8 pxor xmm1, xmm8 pxor xmm2, xmm8 pxor xmm3, xmm8 pxor xmm4, xmm8 pxor xmm5, xmm8 pxor xmm6, xmm8 pxor xmm7, xmm8 i = 1 WHILE i LT rnds aes_dec_rnd i i = i+1 ENDM aes_dec_last_rnd rnds movdqu xmm8, [-32 + ctx] pxor xmm0, xmm8 movdqu xmm8, [0*16 + input] pxor xmm1, xmm8 movdqu xmm8, [1*16 + input] pxor xmm2, xmm8 movdqu xmm8, [2*16 + input] pxor xmm3, xmm8 movdqu xmm8, [3*16 + input] pxor xmm4, xmm8 movdqu xmm8, [4*16 + input] pxor xmm5, xmm8 movdqu xmm8, [5*16 + input] pxor xmm6, xmm8 movdqu xmm8, [6*16 + input] pxor xmm7, xmm8 movdqu xmm8, [7*16 + input] movdqu [0*16 + output], xmm0 movdqu [1*16 + output], xmm1 movdqu [2*16 + output], xmm2 movdqu [3*16 + output], xmm3 movdqu [4*16 + output], xmm4 movdqu [5*16 + output], xmm5 movdqu [6*16 + output], xmm6 movdqu [7*16 + output], xmm7 movdqu [-32 + ctx], xmm8 lea input, [8*16 + input] lea output, [8*16 + output] sub inputLen, 8*16 jmp loop8 dec1: movdqu xmm3, [-32 + ctx] loop1: cmp inputLen, 1*16 jb bail movdqu xmm0, [input] movdqa xmm4, xmm0 movdqu xmm7, [0*16 + ctx] pxor xmm0, xmm7 i = 1 WHILE i LT rnds movdqu xmm7, [i*16 + ctx] aesdec xmm0, xmm7 i = i+1 ENDM movdqu xmm7, [rnds*16 + ctx] aesdeclast xmm0, xmm7 pxor xmm3, xmm0 movdqu [output], xmm3 movdqa xmm3, xmm4 lea input, [1*16 + input] lea output, [1*16 + output] sub inputLen, 1*16 jmp loop1 bail: movdqu [-32 + ctx], xmm3 xor rax, rax movdqu xmm6, [rsp + 0*16] movdqu xmm7, [rsp + 1*16] movdqu xmm8, [rsp + 2*16] add rsp, 3*16 ret ENDM intel_aes_encrypt_cbc_128 PROC gen_aes_cbc_enc_func 10 intel_aes_encrypt_cbc_128 ENDP intel_aes_encrypt_cbc_192 PROC gen_aes_cbc_enc_func 12 intel_aes_encrypt_cbc_192 ENDP intel_aes_encrypt_cbc_256 PROC gen_aes_cbc_enc_func 14 intel_aes_encrypt_cbc_256 ENDP intel_aes_decrypt_cbc_128 PROC gen_aes_cbc_dec_func 10 intel_aes_decrypt_cbc_128 ENDP intel_aes_decrypt_cbc_192 PROC gen_aes_cbc_dec_func 12 intel_aes_decrypt_cbc_192 ENDP intel_aes_decrypt_cbc_256 PROC gen_aes_cbc_dec_func 14 intel_aes_decrypt_cbc_256 ENDP ctrCtx textequ <r10> CTR textequ <r11d> CTRSave textequ <eax> gen_aes_ctr_func MACRO rnds LOCAL loop8 LOCAL loop1 LOCAL enc1 LOCAL bail mov input, [rsp + 8*1 + 4*8] mov inputLen, [rsp + 8*1 + 5*8] mov ctrCtx, ctx mov ctx, [8+ctrCtx] lea ctx, [48+ctx] sub rsp, 3*16 movdqu [rsp + 0*16], xmm6 movdqu [rsp + 1*16], xmm7 movdqu [rsp + 2*16], xmm8 push rbp mov rbp, rsp sub rsp, 8*16 and rsp, -16 movdqu xmm0, [16+ctrCtx] mov CTRSave, DWORD PTR [ctrCtx + 16 + 3*4] bswap CTRSave movdqu xmm1, [ctx + 0*16] pxor xmm0, xmm1 movdqa [rsp + 0*16], xmm0 movdqa [rsp + 1*16], xmm0 movdqa [rsp + 2*16], xmm0 movdqa [rsp + 3*16], xmm0 movdqa [rsp + 4*16], xmm0 movdqa [rsp + 5*16], xmm0 movdqa [rsp + 6*16], xmm0 movdqa [rsp + 7*16], xmm0 inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 1*16 + 3*4], CTR inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 2*16 + 3*4], CTR inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 3*16 + 3*4], CTR inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 4*16 + 3*4], CTR inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 5*16 + 3*4], CTR inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 6*16 + 3*4], CTR inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + 7*16 + 3*4], CTR loop8: cmp inputLen, 8*16 jb loop1 movdqu xmm0, [0*16 + rsp] movdqu xmm1, [1*16 + rsp] movdqu xmm2, [2*16 + rsp] movdqu xmm3, [3*16 + rsp] movdqu xmm4, [4*16 + rsp] movdqu xmm5, [5*16 + rsp] movdqu xmm6, [6*16 + rsp] movdqu xmm7, [7*16 + rsp] i = 1 WHILE i LE 8 aes_rnd i inc CTRSave mov CTR, CTRSave bswap CTR xor CTR, DWORD PTR [ctx + 3*4] mov DWORD PTR [rsp + (i-1)*16 + 3*4], CTR i = i+1 ENDM WHILE i LT rnds aes_rnd i i = i+1 ENDM aes_last_rnd rnds movdqu xmm8, [0*16 + input] pxor xmm0, xmm8 movdqu xmm8, [1*16 + input] pxor xmm1, xmm8 movdqu xmm8, [2*16 + input] pxor xmm2, xmm8 movdqu xmm8, [3*16 + input] pxor xmm3, xmm8 movdqu xmm8, [4*16 + input] pxor xmm4, xmm8 movdqu xmm8, [5*16 + input] pxor xmm5, xmm8 movdqu xmm8, [6*16 + input] pxor xmm6, xmm8 movdqu xmm8, [7*16 + input] pxor xmm7, xmm8 movdqu [0*16 + output], xmm0 movdqu [1*16 + output], xmm1 movdqu [2*16 + output], xmm2 movdqu [3*16 + output], xmm3 movdqu [4*16 + output], xmm4 movdqu [5*16 + output], xmm5 movdqu [6*16 + output], xmm6 movdqu [7*16 + output], xmm7 lea input, [8*16 + input] lea output, [8*16 + output] sub inputLen, 8*16 jmp loop8 loop1: cmp inputLen, 1*16 jb bail movdqu xmm0, [rsp] add rsp, 16 i = 1 WHILE i LT rnds movdqu xmm7, [i*16 + ctx] aesenc xmm0, xmm7 i = i+1 ENDM movdqu xmm7, [rnds*16 + ctx] aesenclast xmm0, xmm7 movdqu xmm7, [input] pxor xmm0, xmm7 movdqu [output], xmm0 lea input, [1*16 + input] lea output, [1*16 + output] sub inputLen, 1*16 jmp loop1 bail: movdqu xmm0, [rsp] movdqu xmm1, [ctx + 0*16] pxor xmm0, xmm1 movdqu [16+ctrCtx], xmm0 xor rax, rax mov rsp, rbp pop rbp movdqu xmm6, [rsp + 0*16] movdqu xmm7, [rsp + 1*16] movdqu xmm8, [rsp + 2*16] add rsp, 3*16 ret ENDM intel_aes_encrypt_ctr_128 PROC gen_aes_ctr_func 10 intel_aes_encrypt_ctr_128 ENDP intel_aes_encrypt_ctr_192 PROC gen_aes_ctr_func 12 intel_aes_encrypt_ctr_192 ENDP intel_aes_encrypt_ctr_256 PROC gen_aes_ctr_func 14 intel_aes_encrypt_ctr_256 ENDP END