andre@0: /* This Source Code Form is subject to the terms of the Mozilla Public andre@0: * License, v. 2.0. If a copy of the MPL was not distributed with this andre@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ andre@0: andre@0: #ifdef FREEBL_NO_DEPEND andre@0: #include "stubs.h" andre@0: #endif andre@0: andre@0: #include "prinit.h" andre@0: #include "prerr.h" andre@0: #include "secerr.h" andre@0: andre@0: #include "prtypes.h" andre@0: #include "blapi.h" andre@0: #include "rijndael.h" andre@0: andre@0: #include "cts.h" andre@0: #include "ctr.h" andre@0: #include "gcm.h" andre@0: andre@0: #ifdef USE_HW_AES andre@0: #include "intel-aes.h" andre@0: #include "mpi.h" andre@0: andre@0: static int has_intel_aes = 0; andre@0: static PRBool use_hw_aes = PR_FALSE; andre@0: andre@0: #ifdef INTEL_GCM andre@0: #include "intel-gcm.h" andre@0: static int has_intel_avx = 0; andre@0: static int has_intel_clmul = 0; andre@0: static PRBool use_hw_gcm = PR_FALSE; andre@0: #endif andre@0: #endif /* USE_HW_AES */ andre@0: andre@0: /* andre@0: * There are currently five ways to build this code, varying in performance andre@0: * and code size. andre@0: * andre@0: * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab andre@0: * RIJNDAEL_GENERATE_TABLES Generate tables on first andre@0: * encryption/decryption, then store them; andre@0: * use the function gfm andre@0: * RIJNDAEL_GENERATE_TABLES_MACRO Same as above, but use macros to do andre@0: * the generation andre@0: * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table andre@0: * values "on-the-fly", using gfm andre@0: * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros andre@0: * andre@0: * The default is RIJNDAEL_INCLUDE_TABLES. andre@0: */ andre@0: andre@0: /* andre@0: * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4], andre@0: * T**-1[0..4], IMXC[0..4] andre@0: * When building anything else, includes S, S**-1, Rcon andre@0: */ andre@0: #include "rijndael32.tab" andre@0: andre@0: #if defined(RIJNDAEL_INCLUDE_TABLES) andre@0: /* andre@0: * RIJNDAEL_INCLUDE_TABLES andre@0: */ andre@0: #define T0(i) _T0[i] andre@0: #define T1(i) _T1[i] andre@0: #define T2(i) _T2[i] andre@0: #define T3(i) _T3[i] andre@0: #define TInv0(i) _TInv0[i] andre@0: #define TInv1(i) _TInv1[i] andre@0: #define TInv2(i) _TInv2[i] andre@0: #define TInv3(i) _TInv3[i] andre@0: #define IMXC0(b) _IMXC0[b] andre@0: #define IMXC1(b) _IMXC1[b] andre@0: #define IMXC2(b) _IMXC2[b] andre@0: #define IMXC3(b) _IMXC3[b] andre@0: /* The S-box can be recovered from the T-tables */ andre@0: #ifdef IS_LITTLE_ENDIAN andre@0: #define SBOX(b) ((PRUint8)_T3[b]) andre@0: #else andre@0: #define SBOX(b) ((PRUint8)_T1[b]) andre@0: #endif andre@0: #define SINV(b) (_SInv[b]) andre@0: andre@0: #else /* not RIJNDAEL_INCLUDE_TABLES */ andre@0: andre@0: /* andre@0: * Code for generating T-table values. andre@0: */ andre@0: andre@0: #ifdef IS_LITTLE_ENDIAN andre@0: #define WORD4(b0, b1, b2, b3) \ andre@0: (((b3) << 24) | ((b2) << 16) | ((b1) << 8) | (b0)) andre@0: #else andre@0: #define WORD4(b0, b1, b2, b3) \ andre@0: (((b0) << 24) | ((b1) << 16) | ((b2) << 8) | (b3)) andre@0: #endif andre@0: andre@0: /* andre@0: * Define the S and S**-1 tables (both have been stored) andre@0: */ andre@0: #define SBOX(b) (_S[b]) andre@0: #define SINV(b) (_SInv[b]) andre@0: andre@0: /* andre@0: * The function xtime, used for Galois field multiplication andre@0: */ andre@0: #define XTIME(a) \ andre@0: ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1)) andre@0: andre@0: /* Choose GFM method (macros or function) */ andre@0: #if defined(RIJNDAEL_GENERATE_TABLES_MACRO) || \ andre@0: defined(RIJNDAEL_GENERATE_VALUES_MACRO) andre@0: andre@0: /* andre@0: * Galois field GF(2**8) multipliers, in macro form andre@0: */ andre@0: #define GFM01(a) \ andre@0: (a) /* a * 01 = a, the identity */ andre@0: #define GFM02(a) \ andre@0: (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ andre@0: #define GFM04(a) \ andre@0: (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */ andre@0: #define GFM08(a) \ andre@0: (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */ andre@0: #define GFM03(a) \ andre@0: (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */ andre@0: #define GFM09(a) \ andre@0: (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */ andre@0: #define GFM0B(a) \ andre@0: (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */ andre@0: #define GFM0D(a) \ andre@0: (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */ andre@0: #define GFM0E(a) \ andre@0: (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */ andre@0: andre@0: #else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_VALUES */ andre@0: andre@0: /* GF_MULTIPLY andre@0: * andre@0: * multiply two bytes represented in GF(2**8), mod (x**4 + 1) andre@0: */ andre@0: PRUint8 gfm(PRUint8 a, PRUint8 b) andre@0: { andre@0: PRUint8 res = 0; andre@0: while (b > 0) { andre@0: res = (b & 0x01) ? res ^ a : res; andre@0: a = XTIME(a); andre@0: b >>= 1; andre@0: } andre@0: return res; andre@0: } andre@0: andre@0: #define GFM01(a) \ andre@0: (a) /* a * 01 = a, the identity */ andre@0: #define GFM02(a) \ andre@0: (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ andre@0: #define GFM03(a) \ andre@0: (gfm(a, 0x03)) /* a * 03 */ andre@0: #define GFM09(a) \ andre@0: (gfm(a, 0x09)) /* a * 09 */ andre@0: #define GFM0B(a) \ andre@0: (gfm(a, 0x0B)) /* a * 0B */ andre@0: #define GFM0D(a) \ andre@0: (gfm(a, 0x0D)) /* a * 0D */ andre@0: #define GFM0E(a) \ andre@0: (gfm(a, 0x0E)) /* a * 0E */ andre@0: andre@0: #endif /* choosing GFM function */ andre@0: andre@0: /* andre@0: * The T-tables andre@0: */ andre@0: #define G_T0(i) \ andre@0: ( WORD4( GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)) ) ) andre@0: #define G_T1(i) \ andre@0: ( WORD4( GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)) ) ) andre@0: #define G_T2(i) \ andre@0: ( WORD4( GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)) ) ) andre@0: #define G_T3(i) \ andre@0: ( WORD4( GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)) ) ) andre@0: andre@0: /* andre@0: * The inverse T-tables andre@0: */ andre@0: #define G_TInv0(i) \ andre@0: ( WORD4( GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)) ) ) andre@0: #define G_TInv1(i) \ andre@0: ( WORD4( GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)) ) ) andre@0: #define G_TInv2(i) \ andre@0: ( WORD4( GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)) ) ) andre@0: #define G_TInv3(i) \ andre@0: ( WORD4( GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)) ) ) andre@0: andre@0: /* andre@0: * The inverse mix column tables andre@0: */ andre@0: #define G_IMXC0(i) \ andre@0: ( WORD4( GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i) ) ) andre@0: #define G_IMXC1(i) \ andre@0: ( WORD4( GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i) ) ) andre@0: #define G_IMXC2(i) \ andre@0: ( WORD4( GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i) ) ) andre@0: #define G_IMXC3(i) \ andre@0: ( WORD4( GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i) ) ) andre@0: andre@0: /* Now choose the T-table indexing method */ andre@0: #if defined(RIJNDAEL_GENERATE_VALUES) andre@0: /* generate values for the tables with a function*/ andre@0: static PRUint32 gen_TInvXi(PRUint8 tx, PRUint8 i) andre@0: { andre@0: PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; andre@0: si01 = SINV(i); andre@0: si02 = XTIME(si01); andre@0: si04 = XTIME(si02); andre@0: si08 = XTIME(si04); andre@0: si03 = si02 ^ si01; andre@0: si09 = si08 ^ si01; andre@0: si0B = si08 ^ si03; andre@0: si0D = si09 ^ si04; andre@0: si0E = si08 ^ si04 ^ si02; andre@0: switch (tx) { andre@0: case 0: andre@0: return WORD4(si0E, si09, si0D, si0B); andre@0: case 1: andre@0: return WORD4(si0B, si0E, si09, si0D); andre@0: case 2: andre@0: return WORD4(si0D, si0B, si0E, si09); andre@0: case 3: andre@0: return WORD4(si09, si0D, si0B, si0E); andre@0: } andre@0: return -1; andre@0: } andre@0: #define T0(i) G_T0(i) andre@0: #define T1(i) G_T1(i) andre@0: #define T2(i) G_T2(i) andre@0: #define T3(i) G_T3(i) andre@0: #define TInv0(i) gen_TInvXi(0, i) andre@0: #define TInv1(i) gen_TInvXi(1, i) andre@0: #define TInv2(i) gen_TInvXi(2, i) andre@0: #define TInv3(i) gen_TInvXi(3, i) andre@0: #define IMXC0(b) G_IMXC0(b) andre@0: #define IMXC1(b) G_IMXC1(b) andre@0: #define IMXC2(b) G_IMXC2(b) andre@0: #define IMXC3(b) G_IMXC3(b) andre@0: #elif defined(RIJNDAEL_GENERATE_VALUES_MACRO) andre@0: /* generate values for the tables with macros */ andre@0: #define T0(i) G_T0(i) andre@0: #define T1(i) G_T1(i) andre@0: #define T2(i) G_T2(i) andre@0: #define T3(i) G_T3(i) andre@0: #define TInv0(i) G_TInv0(i) andre@0: #define TInv1(i) G_TInv1(i) andre@0: #define TInv2(i) G_TInv2(i) andre@0: #define TInv3(i) G_TInv3(i) andre@0: #define IMXC0(b) G_IMXC0(b) andre@0: #define IMXC1(b) G_IMXC1(b) andre@0: #define IMXC2(b) G_IMXC2(b) andre@0: #define IMXC3(b) G_IMXC3(b) andre@0: #else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_TABLES_MACRO */ andre@0: /* Generate T and T**-1 table values and store, then index */ andre@0: /* The inverse mix column tables are still generated */ andre@0: #define T0(i) rijndaelTables->T0[i] andre@0: #define T1(i) rijndaelTables->T1[i] andre@0: #define T2(i) rijndaelTables->T2[i] andre@0: #define T3(i) rijndaelTables->T3[i] andre@0: #define TInv0(i) rijndaelTables->TInv0[i] andre@0: #define TInv1(i) rijndaelTables->TInv1[i] andre@0: #define TInv2(i) rijndaelTables->TInv2[i] andre@0: #define TInv3(i) rijndaelTables->TInv3[i] andre@0: #define IMXC0(b) G_IMXC0(b) andre@0: #define IMXC1(b) G_IMXC1(b) andre@0: #define IMXC2(b) G_IMXC2(b) andre@0: #define IMXC3(b) G_IMXC3(b) andre@0: #endif /* choose T-table indexing method */ andre@0: andre@0: #endif /* not RIJNDAEL_INCLUDE_TABLES */ andre@0: andre@0: #if defined(RIJNDAEL_GENERATE_TABLES) || \ andre@0: defined(RIJNDAEL_GENERATE_TABLES_MACRO) andre@0: andre@0: /* Code to generate and store the tables */ andre@0: andre@0: struct rijndael_tables_str { andre@0: PRUint32 T0[256]; andre@0: PRUint32 T1[256]; andre@0: PRUint32 T2[256]; andre@0: PRUint32 T3[256]; andre@0: PRUint32 TInv0[256]; andre@0: PRUint32 TInv1[256]; andre@0: PRUint32 TInv2[256]; andre@0: PRUint32 TInv3[256]; andre@0: }; andre@0: andre@0: static struct rijndael_tables_str *rijndaelTables = NULL; andre@0: static PRCallOnceType coRTInit = { 0, 0, 0 }; andre@0: static PRStatus andre@0: init_rijndael_tables(void) andre@0: { andre@0: PRUint32 i; andre@0: PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; andre@0: struct rijndael_tables_str *rts; andre@0: rts = (struct rijndael_tables_str *) andre@0: PORT_Alloc(sizeof(struct rijndael_tables_str)); andre@0: if (!rts) return PR_FAILURE; andre@0: for (i=0; i<256; i++) { andre@0: /* The forward values */ andre@0: si01 = SBOX(i); andre@0: si02 = XTIME(si01); andre@0: si03 = si02 ^ si01; andre@0: rts->T0[i] = WORD4(si02, si01, si01, si03); andre@0: rts->T1[i] = WORD4(si03, si02, si01, si01); andre@0: rts->T2[i] = WORD4(si01, si03, si02, si01); andre@0: rts->T3[i] = WORD4(si01, si01, si03, si02); andre@0: /* The inverse values */ andre@0: si01 = SINV(i); andre@0: si02 = XTIME(si01); andre@0: si04 = XTIME(si02); andre@0: si08 = XTIME(si04); andre@0: si03 = si02 ^ si01; andre@0: si09 = si08 ^ si01; andre@0: si0B = si08 ^ si03; andre@0: si0D = si09 ^ si04; andre@0: si0E = si08 ^ si04 ^ si02; andre@0: rts->TInv0[i] = WORD4(si0E, si09, si0D, si0B); andre@0: rts->TInv1[i] = WORD4(si0B, si0E, si09, si0D); andre@0: rts->TInv2[i] = WORD4(si0D, si0B, si0E, si09); andre@0: rts->TInv3[i] = WORD4(si09, si0D, si0B, si0E); andre@0: } andre@0: /* wait until all the values are in to set */ andre@0: rijndaelTables = rts; andre@0: return PR_SUCCESS; andre@0: } andre@0: andre@0: #endif /* code to generate tables */ andre@0: andre@0: /************************************************************************** andre@0: * andre@0: * Stuff related to the Rijndael key schedule andre@0: * andre@0: *************************************************************************/ andre@0: andre@0: #define SUBBYTE(w) \ andre@0: ((SBOX((w >> 24) & 0xff) << 24) | \ andre@0: (SBOX((w >> 16) & 0xff) << 16) | \ andre@0: (SBOX((w >> 8) & 0xff) << 8) | \ andre@0: (SBOX((w ) & 0xff) )) andre@0: andre@0: #ifdef IS_LITTLE_ENDIAN andre@0: #define ROTBYTE(b) \ andre@0: ((b >> 8) | (b << 24)) andre@0: #else andre@0: #define ROTBYTE(b) \ andre@0: ((b << 8) | (b >> 24)) andre@0: #endif andre@0: andre@0: /* rijndael_key_expansion7 andre@0: * andre@0: * Generate the expanded key from the key input by the user. andre@0: * XXX andre@0: * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte andre@0: * transformation is done periodically. The period is every 4 bytes, and andre@0: * since 7%4 != 0 this happens at different times for each key word (unlike andre@0: * Nk == 8 where it happens twice in every key word, in the same positions). andre@0: * For now, I'm implementing this case "dumbly", w/o any unrolling. andre@0: */ andre@0: static SECStatus andre@0: rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk) andre@0: { andre@0: unsigned int i; andre@0: PRUint32 *W; andre@0: PRUint32 *pW; andre@0: PRUint32 tmp; andre@0: W = cx->expandedKey; andre@0: /* 1. the first Nk words contain the cipher key */ andre@0: memcpy(W, key, Nk * 4); andre@0: i = Nk; andre@0: /* 2. loop until full expanded key is obtained */ andre@0: pW = W + i - 1; andre@0: for (; i < cx->Nb * (cx->Nr + 1); ++i) { andre@0: tmp = *pW++; andre@0: if (i % Nk == 0) andre@0: tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; andre@0: else if (i % Nk == 4) andre@0: tmp = SUBBYTE(tmp); andre@0: *pW = W[i - Nk] ^ tmp; andre@0: } andre@0: return SECSuccess; andre@0: } andre@0: andre@0: /* rijndael_key_expansion andre@0: * andre@0: * Generate the expanded key from the key input by the user. andre@0: */ andre@0: static SECStatus andre@0: rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) andre@0: { andre@0: unsigned int i; andre@0: PRUint32 *W; andre@0: PRUint32 *pW; andre@0: PRUint32 tmp; andre@0: unsigned int round_key_words = cx->Nb * (cx->Nr + 1); andre@0: if (Nk == 7) andre@0: return rijndael_key_expansion7(cx, key, Nk); andre@0: W = cx->expandedKey; andre@0: /* The first Nk words contain the input cipher key */ andre@0: memcpy(W, key, Nk * 4); andre@0: i = Nk; andre@0: pW = W + i - 1; andre@0: /* Loop over all sets of Nk words, except the last */ andre@0: while (i < round_key_words - Nk) { andre@0: tmp = *pW++; andre@0: tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; andre@0: *pW = W[i++ - Nk] ^ tmp; andre@0: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; andre@0: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; andre@0: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; andre@0: if (Nk == 4) andre@0: continue; andre@0: switch (Nk) { andre@0: case 8: tmp = *pW++; tmp = SUBBYTE(tmp); *pW = W[i++ - Nk] ^ tmp; andre@0: case 7: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; andre@0: case 6: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; andre@0: case 5: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; andre@0: } andre@0: } andre@0: /* Generate the last word */ andre@0: tmp = *pW++; andre@0: tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; andre@0: *pW = W[i++ - Nk] ^ tmp; andre@0: /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However, andre@0: * since the above loop generated all but the last Nk key words, there andre@0: * is no more need for the SubByte transformation. andre@0: */ andre@0: if (Nk < 8) { andre@0: for (; i < round_key_words; ++i) { andre@0: tmp = *pW++; andre@0: *pW = W[i - Nk] ^ tmp; andre@0: } andre@0: } else { andre@0: /* except in the case when Nk == 8. Then one more SubByte may have andre@0: * to be performed, at i % Nk == 4. andre@0: */ andre@0: for (; i < round_key_words; ++i) { andre@0: tmp = *pW++; andre@0: if (i % Nk == 4) andre@0: tmp = SUBBYTE(tmp); andre@0: *pW = W[i - Nk] ^ tmp; andre@0: } andre@0: } andre@0: return SECSuccess; andre@0: } andre@0: andre@0: /* rijndael_invkey_expansion andre@0: * andre@0: * Generate the expanded key for the inverse cipher from the key input by andre@0: * the user. andre@0: */ andre@0: static SECStatus andre@0: rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) andre@0: { andre@0: unsigned int r; andre@0: PRUint32 *roundkeyw; andre@0: PRUint8 *b; andre@0: int Nb = cx->Nb; andre@0: /* begins like usual key expansion ... */ andre@0: if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) andre@0: return SECFailure; andre@0: /* ... but has the additional step of InvMixColumn, andre@0: * excepting the first and last round keys. andre@0: */ andre@0: roundkeyw = cx->expandedKey + cx->Nb; andre@0: for (r=1; rNr; ++r) { andre@0: /* each key word, roundkeyw, represents a column in the key andre@0: * matrix. Each column is multiplied by the InvMixColumn matrix. andre@0: * [ 0E 0B 0D 09 ] [ b0 ] andre@0: * [ 09 0E 0B 0D ] * [ b1 ] andre@0: * [ 0D 09 0E 0B ] [ b2 ] andre@0: * [ 0B 0D 09 0E ] [ b3 ] andre@0: */ andre@0: b = (PRUint8 *)roundkeyw; andre@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); andre@0: b = (PRUint8 *)roundkeyw; andre@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); andre@0: b = (PRUint8 *)roundkeyw; andre@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); andre@0: b = (PRUint8 *)roundkeyw; andre@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); andre@0: if (Nb <= 4) andre@0: continue; andre@0: switch (Nb) { andre@0: case 8: b = (PRUint8 *)roundkeyw; andre@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ andre@0: IMXC2(b[2]) ^ IMXC3(b[3]); andre@0: case 7: b = (PRUint8 *)roundkeyw; andre@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ andre@0: IMXC2(b[2]) ^ IMXC3(b[3]); andre@0: case 6: b = (PRUint8 *)roundkeyw; andre@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ andre@0: IMXC2(b[2]) ^ IMXC3(b[3]); andre@0: case 5: b = (PRUint8 *)roundkeyw; andre@0: *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ andre@0: IMXC2(b[2]) ^ IMXC3(b[3]); andre@0: } andre@0: } andre@0: return SECSuccess; andre@0: } andre@0: /************************************************************************** andre@0: * andre@0: * Stuff related to Rijndael encryption/decryption, optimized for andre@0: * a 128-bit blocksize. andre@0: * andre@0: *************************************************************************/ andre@0: andre@0: #ifdef IS_LITTLE_ENDIAN andre@0: #define BYTE0WORD(w) ((w) & 0x000000ff) andre@0: #define BYTE1WORD(w) ((w) & 0x0000ff00) andre@0: #define BYTE2WORD(w) ((w) & 0x00ff0000) andre@0: #define BYTE3WORD(w) ((w) & 0xff000000) andre@0: #else andre@0: #define BYTE0WORD(w) ((w) & 0xff000000) andre@0: #define BYTE1WORD(w) ((w) & 0x00ff0000) andre@0: #define BYTE2WORD(w) ((w) & 0x0000ff00) andre@0: #define BYTE3WORD(w) ((w) & 0x000000ff) andre@0: #endif andre@0: andre@0: typedef union { andre@0: PRUint32 w[4]; andre@0: PRUint8 b[16]; andre@0: } rijndael_state; andre@0: andre@0: #define COLUMN_0(state) state.w[0] andre@0: #define COLUMN_1(state) state.w[1] andre@0: #define COLUMN_2(state) state.w[2] andre@0: #define COLUMN_3(state) state.w[3] andre@0: andre@0: #define STATE_BYTE(i) state.b[i] andre@0: andre@0: static SECStatus andre@0: rijndael_encryptBlock128(AESContext *cx, andre@0: unsigned char *output, andre@0: const unsigned char *input) andre@0: { andre@0: unsigned int r; andre@0: PRUint32 *roundkeyw; andre@0: rijndael_state state; andre@0: PRUint32 C0, C1, C2, C3; andre@0: #if defined(NSS_X86_OR_X64) andre@0: #define pIn input andre@0: #define pOut output andre@0: #else andre@0: unsigned char *pIn, *pOut; andre@0: PRUint32 inBuf[4], outBuf[4]; andre@0: andre@0: if ((ptrdiff_t)input & 0x3) { andre@0: memcpy(inBuf, input, sizeof inBuf); andre@0: pIn = (unsigned char *)inBuf; andre@0: } else { andre@0: pIn = (unsigned char *)input; andre@0: } andre@0: if ((ptrdiff_t)output & 0x3) { andre@0: pOut = (unsigned char *)outBuf; andre@0: } else { andre@0: pOut = (unsigned char *)output; andre@0: } andre@0: #endif andre@0: roundkeyw = cx->expandedKey; andre@0: /* Step 1: Add Round Key 0 to initial state */ andre@0: COLUMN_0(state) = *((PRUint32 *)(pIn )) ^ *roundkeyw++; andre@0: COLUMN_1(state) = *((PRUint32 *)(pIn + 4 )) ^ *roundkeyw++; andre@0: COLUMN_2(state) = *((PRUint32 *)(pIn + 8 )) ^ *roundkeyw++; andre@0: COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++; andre@0: /* Step 2: Loop over rounds [1..NR-1] */ andre@0: for (r=1; rNr; ++r) { andre@0: /* Do ShiftRow, ByteSub, and MixColumn all at once */ andre@0: C0 = T0(STATE_BYTE(0)) ^ andre@0: T1(STATE_BYTE(5)) ^ andre@0: T2(STATE_BYTE(10)) ^ andre@0: T3(STATE_BYTE(15)); andre@0: C1 = T0(STATE_BYTE(4)) ^ andre@0: T1(STATE_BYTE(9)) ^ andre@0: T2(STATE_BYTE(14)) ^ andre@0: T3(STATE_BYTE(3)); andre@0: C2 = T0(STATE_BYTE(8)) ^ andre@0: T1(STATE_BYTE(13)) ^ andre@0: T2(STATE_BYTE(2)) ^ andre@0: T3(STATE_BYTE(7)); andre@0: C3 = T0(STATE_BYTE(12)) ^ andre@0: T1(STATE_BYTE(1)) ^ andre@0: T2(STATE_BYTE(6)) ^ andre@0: T3(STATE_BYTE(11)); andre@0: /* Round key addition */ andre@0: COLUMN_0(state) = C0 ^ *roundkeyw++; andre@0: COLUMN_1(state) = C1 ^ *roundkeyw++; andre@0: COLUMN_2(state) = C2 ^ *roundkeyw++; andre@0: COLUMN_3(state) = C3 ^ *roundkeyw++; andre@0: } andre@0: /* Step 3: Do the last round */ andre@0: /* Final round does not employ MixColumn */ andre@0: C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) | andre@0: (BYTE1WORD(T3(STATE_BYTE(5)))) | andre@0: (BYTE2WORD(T0(STATE_BYTE(10)))) | andre@0: (BYTE3WORD(T1(STATE_BYTE(15))))) ^ andre@0: *roundkeyw++; andre@0: C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) | andre@0: (BYTE1WORD(T3(STATE_BYTE(9)))) | andre@0: (BYTE2WORD(T0(STATE_BYTE(14)))) | andre@0: (BYTE3WORD(T1(STATE_BYTE(3))))) ^ andre@0: *roundkeyw++; andre@0: C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) | andre@0: (BYTE1WORD(T3(STATE_BYTE(13)))) | andre@0: (BYTE2WORD(T0(STATE_BYTE(2)))) | andre@0: (BYTE3WORD(T1(STATE_BYTE(7))))) ^ andre@0: *roundkeyw++; andre@0: C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) | andre@0: (BYTE1WORD(T3(STATE_BYTE(1)))) | andre@0: (BYTE2WORD(T0(STATE_BYTE(6)))) | andre@0: (BYTE3WORD(T1(STATE_BYTE(11))))) ^ andre@0: *roundkeyw++; andre@0: *((PRUint32 *) pOut ) = C0; andre@0: *((PRUint32 *)(pOut + 4)) = C1; andre@0: *((PRUint32 *)(pOut + 8)) = C2; andre@0: *((PRUint32 *)(pOut + 12)) = C3; andre@0: #if defined(NSS_X86_OR_X64) andre@0: #undef pIn andre@0: #undef pOut andre@0: #else andre@0: if ((ptrdiff_t)output & 0x3) { andre@0: memcpy(output, outBuf, sizeof outBuf); andre@0: } andre@0: #endif andre@0: return SECSuccess; andre@0: } andre@0: andre@0: static SECStatus andre@0: rijndael_decryptBlock128(AESContext *cx, andre@0: unsigned char *output, andre@0: const unsigned char *input) andre@0: { andre@0: int r; andre@0: PRUint32 *roundkeyw; andre@0: rijndael_state state; andre@0: PRUint32 C0, C1, C2, C3; andre@0: #if defined(NSS_X86_OR_X64) andre@0: #define pIn input andre@0: #define pOut output andre@0: #else andre@0: unsigned char *pIn, *pOut; andre@0: PRUint32 inBuf[4], outBuf[4]; andre@0: andre@0: if ((ptrdiff_t)input & 0x3) { andre@0: memcpy(inBuf, input, sizeof inBuf); andre@0: pIn = (unsigned char *)inBuf; andre@0: } else { andre@0: pIn = (unsigned char *)input; andre@0: } andre@0: if ((ptrdiff_t)output & 0x3) { andre@0: pOut = (unsigned char *)outBuf; andre@0: } else { andre@0: pOut = (unsigned char *)output; andre@0: } andre@0: #endif andre@0: roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; andre@0: /* reverse the final key addition */ andre@0: COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--; andre@0: COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--; andre@0: COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--; andre@0: COLUMN_0(state) = *((PRUint32 *)(pIn )) ^ *roundkeyw--; andre@0: /* Loop over rounds in reverse [NR..1] */ andre@0: for (r=cx->Nr; r>1; --r) { andre@0: /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ andre@0: C0 = TInv0(STATE_BYTE(0)) ^ andre@0: TInv1(STATE_BYTE(13)) ^ andre@0: TInv2(STATE_BYTE(10)) ^ andre@0: TInv3(STATE_BYTE(7)); andre@0: C1 = TInv0(STATE_BYTE(4)) ^ andre@0: TInv1(STATE_BYTE(1)) ^ andre@0: TInv2(STATE_BYTE(14)) ^ andre@0: TInv3(STATE_BYTE(11)); andre@0: C2 = TInv0(STATE_BYTE(8)) ^ andre@0: TInv1(STATE_BYTE(5)) ^ andre@0: TInv2(STATE_BYTE(2)) ^ andre@0: TInv3(STATE_BYTE(15)); andre@0: C3 = TInv0(STATE_BYTE(12)) ^ andre@0: TInv1(STATE_BYTE(9)) ^ andre@0: TInv2(STATE_BYTE(6)) ^ andre@0: TInv3(STATE_BYTE(3)); andre@0: /* Invert the key addition step */ andre@0: COLUMN_3(state) = C3 ^ *roundkeyw--; andre@0: COLUMN_2(state) = C2 ^ *roundkeyw--; andre@0: COLUMN_1(state) = C1 ^ *roundkeyw--; andre@0: COLUMN_0(state) = C0 ^ *roundkeyw--; andre@0: } andre@0: /* inverse sub */ andre@0: pOut[ 0] = SINV(STATE_BYTE( 0)); andre@0: pOut[ 1] = SINV(STATE_BYTE(13)); andre@0: pOut[ 2] = SINV(STATE_BYTE(10)); andre@0: pOut[ 3] = SINV(STATE_BYTE( 7)); andre@0: pOut[ 4] = SINV(STATE_BYTE( 4)); andre@0: pOut[ 5] = SINV(STATE_BYTE( 1)); andre@0: pOut[ 6] = SINV(STATE_BYTE(14)); andre@0: pOut[ 7] = SINV(STATE_BYTE(11)); andre@0: pOut[ 8] = SINV(STATE_BYTE( 8)); andre@0: pOut[ 9] = SINV(STATE_BYTE( 5)); andre@0: pOut[10] = SINV(STATE_BYTE( 2)); andre@0: pOut[11] = SINV(STATE_BYTE(15)); andre@0: pOut[12] = SINV(STATE_BYTE(12)); andre@0: pOut[13] = SINV(STATE_BYTE( 9)); andre@0: pOut[14] = SINV(STATE_BYTE( 6)); andre@0: pOut[15] = SINV(STATE_BYTE( 3)); andre@0: /* final key addition */ andre@0: *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--; andre@0: *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--; andre@0: *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--; andre@0: *((PRUint32 *) pOut ) ^= *roundkeyw--; andre@0: #if defined(NSS_X86_OR_X64) andre@0: #undef pIn andre@0: #undef pOut andre@0: #else andre@0: if ((ptrdiff_t)output & 0x3) { andre@0: memcpy(output, outBuf, sizeof outBuf); andre@0: } andre@0: #endif andre@0: return SECSuccess; andre@0: } andre@0: andre@0: /************************************************************************** andre@0: * andre@0: * Stuff related to general Rijndael encryption/decryption, for blocksizes andre@0: * greater than 128 bits. andre@0: * andre@0: * XXX This code is currently untested! So far, AES specs have only been andre@0: * released for 128 bit blocksizes. This will be tested, but for now andre@0: * only the code above has been tested using known values. andre@0: * andre@0: *************************************************************************/ andre@0: andre@0: #define COLUMN(array, j) *((PRUint32 *)(array + j)) andre@0: andre@0: SECStatus andre@0: rijndael_encryptBlock(AESContext *cx, andre@0: unsigned char *output, andre@0: const unsigned char *input) andre@0: { andre@0: return SECFailure; andre@0: #ifdef rijndael_large_blocks_fixed andre@0: unsigned int j, r, Nb; andre@0: unsigned int c2=0, c3=0; andre@0: PRUint32 *roundkeyw; andre@0: PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; andre@0: Nb = cx->Nb; andre@0: roundkeyw = cx->expandedKey; andre@0: /* Step 1: Add Round Key 0 to initial state */ andre@0: for (j=0; j<4*Nb; j+=4) { andre@0: COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw++; andre@0: } andre@0: /* Step 2: Loop over rounds [1..NR-1] */ andre@0: for (r=1; rNr; ++r) { andre@0: for (j=0; jNb; andre@0: roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; andre@0: /* reverse key addition */ andre@0: for (j=4*Nb; j>=0; j-=4) { andre@0: COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw--; andre@0: } andre@0: /* Loop over rounds in reverse [NR..1] */ andre@0: for (r=cx->Nr; r>1; --r) { andre@0: /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ andre@0: for (j=0; j=0; j-=4) { andre@0: COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw--; andre@0: } andre@0: } andre@0: /* inverse sub */ andre@0: for (j=0; j<4*Nb; ++j) { andre@0: output[j] = SINV(clone[j]); andre@0: } andre@0: /* final key addition */ andre@0: for (j=4*Nb; j>=0; j-=4) { andre@0: COLUMN(output, j) ^= *roundkeyw--; andre@0: } andre@0: return SECSuccess; andre@0: #endif andre@0: } andre@0: andre@0: /************************************************************************** andre@0: * andre@0: * Rijndael modes of operation (ECB and CBC) andre@0: * andre@0: *************************************************************************/ andre@0: andre@0: static SECStatus andre@0: rijndael_encryptECB(AESContext *cx, unsigned char *output, andre@0: unsigned int *outputLen, unsigned int maxOutputLen, andre@0: const unsigned char *input, unsigned int inputLen, andre@0: unsigned int blocksize) andre@0: { andre@0: SECStatus rv; andre@0: AESBlockFunc *encryptor; andre@0: andre@0: encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) andre@0: ? &rijndael_encryptBlock128 andre@0: : &rijndael_encryptBlock; andre@0: while (inputLen > 0) { andre@0: rv = (*encryptor)(cx, output, input); andre@0: if (rv != SECSuccess) andre@0: return rv; andre@0: output += blocksize; andre@0: input += blocksize; andre@0: inputLen -= blocksize; andre@0: } andre@0: return SECSuccess; andre@0: } andre@0: andre@0: static SECStatus andre@0: rijndael_encryptCBC(AESContext *cx, unsigned char *output, andre@0: unsigned int *outputLen, unsigned int maxOutputLen, andre@0: const unsigned char *input, unsigned int inputLen, andre@0: unsigned int blocksize) andre@0: { andre@0: unsigned int j; andre@0: SECStatus rv; andre@0: AESBlockFunc *encryptor; andre@0: unsigned char *lastblock; andre@0: unsigned char inblock[RIJNDAEL_MAX_STATE_SIZE * 8]; andre@0: andre@0: if (!inputLen) andre@0: return SECSuccess; andre@0: lastblock = cx->iv; andre@0: encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) andre@0: ? &rijndael_encryptBlock128 andre@0: : &rijndael_encryptBlock; andre@0: while (inputLen > 0) { andre@0: /* XOR with the last block (IV if first block) */ andre@0: for (j=0; jiv, lastblock, blocksize); andre@0: return SECSuccess; andre@0: } andre@0: andre@0: static SECStatus andre@0: rijndael_decryptECB(AESContext *cx, unsigned char *output, andre@0: unsigned int *outputLen, unsigned int maxOutputLen, andre@0: const unsigned char *input, unsigned int inputLen, andre@0: unsigned int blocksize) andre@0: { andre@0: SECStatus rv; andre@0: AESBlockFunc *decryptor; andre@0: andre@0: decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) andre@0: ? &rijndael_decryptBlock128 andre@0: : &rijndael_decryptBlock; andre@0: while (inputLen > 0) { andre@0: rv = (*decryptor)(cx, output, input); andre@0: if (rv != SECSuccess) andre@0: return rv; andre@0: output += blocksize; andre@0: input += blocksize; andre@0: inputLen -= blocksize; andre@0: } andre@0: return SECSuccess; andre@0: } andre@0: andre@0: static SECStatus andre@0: rijndael_decryptCBC(AESContext *cx, unsigned char *output, andre@0: unsigned int *outputLen, unsigned int maxOutputLen, andre@0: const unsigned char *input, unsigned int inputLen, andre@0: unsigned int blocksize) andre@0: { andre@0: SECStatus rv; andre@0: AESBlockFunc *decryptor; andre@0: const unsigned char *in; andre@0: unsigned char *out; andre@0: unsigned int j; andre@0: unsigned char newIV[RIJNDAEL_MAX_BLOCKSIZE]; andre@0: andre@0: andre@0: if (!inputLen) andre@0: return SECSuccess; andre@0: PORT_Assert(output - input >= 0 || input - output >= (int)inputLen ); andre@0: decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) andre@0: ? &rijndael_decryptBlock128 andre@0: : &rijndael_decryptBlock; andre@0: in = input + (inputLen - blocksize); andre@0: memcpy(newIV, in, blocksize); andre@0: out = output + (inputLen - blocksize); andre@0: while (inputLen > blocksize) { andre@0: rv = (*decryptor)(cx, out, in); andre@0: if (rv != SECSuccess) andre@0: return rv; andre@0: for (j=0; jiv[j]; andre@0: } andre@0: memcpy(cx->iv, newIV, blocksize); andre@0: return SECSuccess; andre@0: } andre@0: andre@0: /************************************************************************ andre@0: * andre@0: * BLAPI Interface functions andre@0: * andre@0: * The following functions implement the encryption routines defined in andre@0: * BLAPI for the AES cipher, Rijndael. andre@0: * andre@0: ***********************************************************************/ andre@0: andre@0: AESContext * AES_AllocateContext(void) andre@0: { andre@0: return PORT_ZNew(AESContext); andre@0: } andre@0: andre@0: andre@0: #ifdef INTEL_GCM andre@0: /* andre@0: * Adapted from the example code in "How to detect New Instruction support in andre@0: * the 4th generation Intel Core processor family" by Max Locktyukhin. andre@0: * andre@0: * XGETBV: andre@0: * Reads an extended control register (XCR) specified by ECX into EDX:EAX. andre@0: */ andre@0: static PRBool andre@0: check_xcr0_ymm() andre@0: { andre@0: PRUint32 xcr0; andre@0: #if defined(_MSC_VER) andre@0: #if defined(_M_IX86) andre@0: __asm { andre@0: mov ecx, 0 andre@0: xgetbv andre@0: mov xcr0, eax andre@0: } andre@0: #else andre@0: xcr0 = (PRUint32)_xgetbv(0); /* Requires VS2010 SP1 or later. */ andre@0: #endif andre@0: #else andre@0: __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); andre@0: #endif andre@0: /* Check if xmm and ymm state are enabled in XCR0. */ andre@0: return (xcr0 & 6) == 6; andre@0: } andre@0: #endif andre@0: andre@0: /* andre@0: ** Initialize a new AES context suitable for AES encryption/decryption in andre@0: ** the ECB or CBC mode. andre@0: ** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC andre@0: */ andre@0: static SECStatus andre@0: aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, andre@0: const unsigned char *iv, int mode, unsigned int encrypt, andre@0: unsigned int blocksize) andre@0: { andre@0: unsigned int Nk; andre@0: /* According to Rijndael AES Proposal, section 12.1, block and key andre@0: * lengths between 128 and 256 bits are supported, as long as the andre@0: * length in bytes is divisible by 4. andre@0: */ andre@0: if (key == NULL || andre@0: keysize < RIJNDAEL_MIN_BLOCKSIZE || andre@0: keysize > RIJNDAEL_MAX_BLOCKSIZE || andre@0: keysize % 4 != 0 || andre@0: blocksize < RIJNDAEL_MIN_BLOCKSIZE || andre@0: blocksize > RIJNDAEL_MAX_BLOCKSIZE || andre@0: blocksize % 4 != 0) { andre@0: PORT_SetError(SEC_ERROR_INVALID_ARGS); andre@0: return SECFailure; andre@0: } andre@0: if (mode != NSS_AES && mode != NSS_AES_CBC) { andre@0: PORT_SetError(SEC_ERROR_INVALID_ARGS); andre@0: return SECFailure; andre@0: } andre@0: if (mode == NSS_AES_CBC && iv == NULL) { andre@0: PORT_SetError(SEC_ERROR_INVALID_ARGS); andre@0: return SECFailure; andre@0: } andre@0: if (!cx) { andre@0: PORT_SetError(SEC_ERROR_INVALID_ARGS); andre@0: return SECFailure; andre@0: } andre@0: #ifdef USE_HW_AES andre@0: if (has_intel_aes == 0) { andre@0: unsigned long eax, ebx, ecx, edx; andre@0: char *disable_hw_aes = getenv("NSS_DISABLE_HW_AES"); andre@0: andre@0: if (disable_hw_aes == NULL) { andre@0: freebl_cpuid(1, &eax, &ebx, &ecx, &edx); andre@0: has_intel_aes = (ecx & (1 << 25)) != 0 ? 1 : -1; andre@0: #ifdef INTEL_GCM andre@0: has_intel_clmul = (ecx & (1 << 1)) != 0 ? 1 : -1; andre@0: if ((ecx & (1 << 27)) != 0 && (ecx & (1 << 28)) != 0 && andre@0: check_xcr0_ymm()) { andre@0: has_intel_avx = 1; andre@0: } else { andre@0: has_intel_avx = -1; andre@0: } andre@0: #endif andre@0: } else { andre@0: has_intel_aes = -1; andre@0: #ifdef INTEL_GCM andre@0: has_intel_avx = -1; andre@0: has_intel_clmul = -1; andre@0: #endif andre@0: } andre@0: } andre@0: use_hw_aes = (PRBool) andre@0: (has_intel_aes > 0 && (keysize % 8) == 0 && blocksize == 16); andre@0: #ifdef INTEL_GCM andre@0: use_hw_gcm = (PRBool) andre@0: (use_hw_aes && has_intel_avx>0 && has_intel_clmul>0); andre@0: #endif andre@0: #endif /* USE_HW_AES */ andre@0: /* Nb = (block size in bits) / 32 */ andre@0: cx->Nb = blocksize / 4; andre@0: /* Nk = (key size in bits) / 32 */ andre@0: Nk = keysize / 4; andre@0: /* Obtain number of rounds from "table" */ andre@0: cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb); andre@0: /* copy in the iv, if neccessary */ andre@0: if (mode == NSS_AES_CBC) { andre@0: memcpy(cx->iv, iv, blocksize); andre@0: #ifdef USE_HW_AES andre@0: if (use_hw_aes) { andre@0: cx->worker = (freeblCipherFunc) andre@0: intel_aes_cbc_worker(encrypt, keysize); andre@0: } else andre@0: #endif andre@0: { andre@0: cx->worker = (freeblCipherFunc) (encrypt andre@0: ? &rijndael_encryptCBC : &rijndael_decryptCBC); andre@0: } andre@0: } else { andre@0: #ifdef USE_HW_AES andre@0: if (use_hw_aes) { andre@0: cx->worker = (freeblCipherFunc) andre@0: intel_aes_ecb_worker(encrypt, keysize); andre@0: } else andre@0: #endif andre@0: { andre@0: cx->worker = (freeblCipherFunc) (encrypt andre@0: ? &rijndael_encryptECB : &rijndael_decryptECB); andre@0: } andre@0: } andre@0: PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE); andre@0: if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) { andre@0: PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); andre@0: goto cleanup; andre@0: } andre@0: #ifdef USE_HW_AES andre@0: if (use_hw_aes) { andre@0: intel_aes_init(encrypt, keysize); andre@0: } else andre@0: #endif andre@0: { andre@0: andre@0: #if defined(RIJNDAEL_GENERATE_TABLES) || \ andre@0: defined(RIJNDAEL_GENERATE_TABLES_MACRO) andre@0: if (rijndaelTables == NULL) { andre@0: if (PR_CallOnce(&coRTInit, init_rijndael_tables) andre@0: != PR_SUCCESS) { andre@0: return SecFailure; andre@0: } andre@0: } andre@0: #endif andre@0: /* Generate expanded key */ andre@0: if (encrypt) { andre@0: if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) andre@0: goto cleanup; andre@0: } else { andre@0: if (rijndael_invkey_expansion(cx, key, Nk) != SECSuccess) andre@0: goto cleanup; andre@0: } andre@0: } andre@0: cx->worker_cx = cx; andre@0: cx->destroy = NULL; andre@0: cx->isBlock = PR_TRUE; andre@0: return SECSuccess; andre@0: cleanup: andre@0: return SECFailure; andre@0: } andre@0: andre@0: SECStatus andre@0: AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, andre@0: const unsigned char *iv, int mode, unsigned int encrypt, andre@0: unsigned int blocksize) andre@0: { andre@0: int basemode = mode; andre@0: PRBool baseencrypt = encrypt; andre@0: SECStatus rv; andre@0: andre@0: switch (mode) { andre@0: case NSS_AES_CTS: andre@0: basemode = NSS_AES_CBC; andre@0: break; andre@0: case NSS_AES_GCM: andre@0: case NSS_AES_CTR: andre@0: basemode = NSS_AES; andre@0: baseencrypt = PR_TRUE; andre@0: break; andre@0: } andre@0: /* make sure enough is initializes so we can safely call Destroy */ andre@0: cx->worker_cx = NULL; andre@0: cx->destroy = NULL; andre@0: rv = aes_InitContext(cx, key, keysize, iv, basemode, andre@0: baseencrypt, blocksize); andre@0: if (rv != SECSuccess) { andre@0: AES_DestroyContext(cx, PR_FALSE); andre@0: return rv; andre@0: } andre@0: andre@0: /* finally, set up any mode specific contexts */ andre@0: switch (mode) { andre@0: case NSS_AES_CTS: andre@0: cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv, blocksize); andre@0: cx->worker = (freeblCipherFunc) andre@0: (encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate); andre@0: cx->destroy = (freeblDestroyFunc) CTS_DestroyContext; andre@0: cx->isBlock = PR_FALSE; andre@0: break; andre@0: case NSS_AES_GCM: andre@0: #ifdef INTEL_GCM andre@0: if(use_hw_gcm) { andre@0: cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv, blocksize); andre@0: cx->worker = (freeblCipherFunc) andre@0: (encrypt ? intel_AES_GCM_EncryptUpdate : intel_AES_GCM_DecryptUpdate); andre@0: cx->destroy = (freeblDestroyFunc) intel_AES_GCM_DestroyContext; andre@0: cx->isBlock = PR_FALSE; andre@0: } else andre@0: #endif andre@0: { andre@0: cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv, blocksize); andre@0: cx->worker = (freeblCipherFunc) andre@0: (encrypt ? GCM_EncryptUpdate : GCM_DecryptUpdate); andre@0: cx->destroy = (freeblDestroyFunc) GCM_DestroyContext; andre@0: cx->isBlock = PR_FALSE; andre@0: } andre@0: break; andre@0: case NSS_AES_CTR: andre@0: cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv, blocksize); andre@0: #if defined(USE_HW_AES) && defined(_MSC_VER) andre@0: if (use_hw_aes) { andre@0: cx->worker = (freeblCipherFunc) CTR_Update_HW_AES; andre@0: } else andre@0: #endif andre@0: { andre@0: cx->worker = (freeblCipherFunc) CTR_Update; andre@0: } andre@0: cx->destroy = (freeblDestroyFunc) CTR_DestroyContext; andre@0: cx->isBlock = PR_FALSE; andre@0: break; andre@0: default: andre@0: /* everything has already been set up by aes_InitContext, just andre@0: * return */ andre@0: return SECSuccess; andre@0: } andre@0: /* check to see if we succeeded in getting the worker context */ andre@0: if (cx->worker_cx == NULL) { andre@0: /* no, just destroy the existing context */ andre@0: cx->destroy = NULL; /* paranoia, though you can see a dozen lines */ andre@0: /* below that this isn't necessary */ andre@0: AES_DestroyContext(cx, PR_FALSE); andre@0: return SECFailure; andre@0: } andre@0: return SECSuccess; andre@0: } andre@0: andre@0: /* AES_CreateContext andre@0: * andre@0: * create a new context for Rijndael operations andre@0: */ andre@0: AESContext * andre@0: AES_CreateContext(const unsigned char *key, const unsigned char *iv, andre@0: int mode, int encrypt, andre@0: unsigned int keysize, unsigned int blocksize) andre@0: { andre@0: AESContext *cx = AES_AllocateContext(); andre@0: if (cx) { andre@0: SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt, andre@0: blocksize); andre@0: if (rv != SECSuccess) { andre@0: AES_DestroyContext(cx, PR_TRUE); andre@0: cx = NULL; andre@0: } andre@0: } andre@0: return cx; andre@0: } andre@0: andre@0: /* andre@0: * AES_DestroyContext andre@0: * andre@0: * Zero an AES cipher context. If freeit is true, also free the pointer andre@0: * to the context. andre@0: */ andre@0: void andre@0: AES_DestroyContext(AESContext *cx, PRBool freeit) andre@0: { andre@0: if (cx->worker_cx && cx->destroy) { andre@0: (*cx->destroy)(cx->worker_cx, PR_TRUE); andre@0: cx->worker_cx = NULL; andre@0: cx->destroy = NULL; andre@0: } andre@0: if (freeit) andre@0: PORT_Free(cx); andre@0: } andre@0: andre@0: /* andre@0: * AES_Encrypt andre@0: * andre@0: * Encrypt an arbitrary-length buffer. The output buffer must already be andre@0: * allocated to at least inputLen. andre@0: */ andre@0: SECStatus andre@0: AES_Encrypt(AESContext *cx, unsigned char *output, andre@0: unsigned int *outputLen, unsigned int maxOutputLen, andre@0: const unsigned char *input, unsigned int inputLen) andre@0: { andre@0: int blocksize; andre@0: /* Check args */ andre@0: if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { andre@0: PORT_SetError(SEC_ERROR_INVALID_ARGS); andre@0: return SECFailure; andre@0: } andre@0: blocksize = 4 * cx->Nb; andre@0: if (cx->isBlock && (inputLen % blocksize != 0)) { andre@0: PORT_SetError(SEC_ERROR_INPUT_LEN); andre@0: return SECFailure; andre@0: } andre@0: if (maxOutputLen < inputLen) { andre@0: PORT_SetError(SEC_ERROR_OUTPUT_LEN); andre@0: return SECFailure; andre@0: } andre@0: *outputLen = inputLen; andre@0: return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, andre@0: input, inputLen, blocksize); andre@0: } andre@0: andre@0: /* andre@0: * AES_Decrypt andre@0: * andre@0: * Decrypt and arbitrary-length buffer. The output buffer must already be andre@0: * allocated to at least inputLen. andre@0: */ andre@0: SECStatus andre@0: AES_Decrypt(AESContext *cx, unsigned char *output, andre@0: unsigned int *outputLen, unsigned int maxOutputLen, andre@0: const unsigned char *input, unsigned int inputLen) andre@0: { andre@0: int blocksize; andre@0: /* Check args */ andre@0: if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { andre@0: PORT_SetError(SEC_ERROR_INVALID_ARGS); andre@0: return SECFailure; andre@0: } andre@0: blocksize = 4 * cx->Nb; andre@0: if (cx->isBlock && (inputLen % blocksize != 0)) { andre@0: PORT_SetError(SEC_ERROR_INPUT_LEN); andre@0: return SECFailure; andre@0: } andre@0: if (maxOutputLen < inputLen) { andre@0: PORT_SetError(SEC_ERROR_OUTPUT_LEN); andre@0: return SECFailure; andre@0: } andre@0: *outputLen = inputLen; andre@0: return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, andre@0: input, inputLen, blocksize); andre@0: }