Mercurial > trustbridge > nss-cmake-static
comparison nss/lib/freebl/rijndael.c @ 0:1e5118fa0cb1
This is NSS with a Cmake Buildsyste
To compile a static NSS library for Windows we've used the
Chromium-NSS fork and added a Cmake buildsystem to compile
it statically for Windows. See README.chromium for chromium
changes and README.trustbridge for our modifications.
author | Andre Heinecke <andre.heinecke@intevation.de> |
---|---|
date | Mon, 28 Jul 2014 10:47:06 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1e5118fa0cb1 |
---|---|
1 /* This Source Code Form is subject to the terms of the Mozilla Public | |
2 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | |
4 | |
5 #ifdef FREEBL_NO_DEPEND | |
6 #include "stubs.h" | |
7 #endif | |
8 | |
9 #include "prinit.h" | |
10 #include "prerr.h" | |
11 #include "secerr.h" | |
12 | |
13 #include "prtypes.h" | |
14 #include "blapi.h" | |
15 #include "rijndael.h" | |
16 | |
17 #include "cts.h" | |
18 #include "ctr.h" | |
19 #include "gcm.h" | |
20 | |
21 #ifdef USE_HW_AES | |
22 #include "intel-aes.h" | |
23 #include "mpi.h" | |
24 | |
25 static int has_intel_aes = 0; | |
26 static PRBool use_hw_aes = PR_FALSE; | |
27 | |
28 #ifdef INTEL_GCM | |
29 #include "intel-gcm.h" | |
30 static int has_intel_avx = 0; | |
31 static int has_intel_clmul = 0; | |
32 static PRBool use_hw_gcm = PR_FALSE; | |
33 #endif | |
34 #endif /* USE_HW_AES */ | |
35 | |
36 /* | |
37 * There are currently five ways to build this code, varying in performance | |
38 * and code size. | |
39 * | |
40 * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab | |
41 * RIJNDAEL_GENERATE_TABLES Generate tables on first | |
42 * encryption/decryption, then store them; | |
43 * use the function gfm | |
44 * RIJNDAEL_GENERATE_TABLES_MACRO Same as above, but use macros to do | |
45 * the generation | |
46 * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table | |
47 * values "on-the-fly", using gfm | |
48 * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros | |
49 * | |
50 * The default is RIJNDAEL_INCLUDE_TABLES. | |
51 */ | |
52 | |
53 /* | |
54 * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4], | |
55 * T**-1[0..4], IMXC[0..4] | |
56 * When building anything else, includes S, S**-1, Rcon | |
57 */ | |
58 #include "rijndael32.tab" | |
59 | |
60 #if defined(RIJNDAEL_INCLUDE_TABLES) | |
61 /* | |
62 * RIJNDAEL_INCLUDE_TABLES | |
63 */ | |
64 #define T0(i) _T0[i] | |
65 #define T1(i) _T1[i] | |
66 #define T2(i) _T2[i] | |
67 #define T3(i) _T3[i] | |
68 #define TInv0(i) _TInv0[i] | |
69 #define TInv1(i) _TInv1[i] | |
70 #define TInv2(i) _TInv2[i] | |
71 #define TInv3(i) _TInv3[i] | |
72 #define IMXC0(b) _IMXC0[b] | |
73 #define IMXC1(b) _IMXC1[b] | |
74 #define IMXC2(b) _IMXC2[b] | |
75 #define IMXC3(b) _IMXC3[b] | |
76 /* The S-box can be recovered from the T-tables */ | |
77 #ifdef IS_LITTLE_ENDIAN | |
78 #define SBOX(b) ((PRUint8)_T3[b]) | |
79 #else | |
80 #define SBOX(b) ((PRUint8)_T1[b]) | |
81 #endif | |
82 #define SINV(b) (_SInv[b]) | |
83 | |
84 #else /* not RIJNDAEL_INCLUDE_TABLES */ | |
85 | |
86 /* | |
87 * Code for generating T-table values. | |
88 */ | |
89 | |
90 #ifdef IS_LITTLE_ENDIAN | |
91 #define WORD4(b0, b1, b2, b3) \ | |
92 (((b3) << 24) | ((b2) << 16) | ((b1) << 8) | (b0)) | |
93 #else | |
94 #define WORD4(b0, b1, b2, b3) \ | |
95 (((b0) << 24) | ((b1) << 16) | ((b2) << 8) | (b3)) | |
96 #endif | |
97 | |
98 /* | |
99 * Define the S and S**-1 tables (both have been stored) | |
100 */ | |
101 #define SBOX(b) (_S[b]) | |
102 #define SINV(b) (_SInv[b]) | |
103 | |
104 /* | |
105 * The function xtime, used for Galois field multiplication | |
106 */ | |
107 #define XTIME(a) \ | |
108 ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1)) | |
109 | |
110 /* Choose GFM method (macros or function) */ | |
111 #if defined(RIJNDAEL_GENERATE_TABLES_MACRO) || \ | |
112 defined(RIJNDAEL_GENERATE_VALUES_MACRO) | |
113 | |
114 /* | |
115 * Galois field GF(2**8) multipliers, in macro form | |
116 */ | |
117 #define GFM01(a) \ | |
118 (a) /* a * 01 = a, the identity */ | |
119 #define GFM02(a) \ | |
120 (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ | |
121 #define GFM04(a) \ | |
122 (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */ | |
123 #define GFM08(a) \ | |
124 (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */ | |
125 #define GFM03(a) \ | |
126 (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */ | |
127 #define GFM09(a) \ | |
128 (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */ | |
129 #define GFM0B(a) \ | |
130 (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */ | |
131 #define GFM0D(a) \ | |
132 (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */ | |
133 #define GFM0E(a) \ | |
134 (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */ | |
135 | |
136 #else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_VALUES */ | |
137 | |
138 /* GF_MULTIPLY | |
139 * | |
140 * multiply two bytes represented in GF(2**8), mod (x**4 + 1) | |
141 */ | |
142 PRUint8 gfm(PRUint8 a, PRUint8 b) | |
143 { | |
144 PRUint8 res = 0; | |
145 while (b > 0) { | |
146 res = (b & 0x01) ? res ^ a : res; | |
147 a = XTIME(a); | |
148 b >>= 1; | |
149 } | |
150 return res; | |
151 } | |
152 | |
153 #define GFM01(a) \ | |
154 (a) /* a * 01 = a, the identity */ | |
155 #define GFM02(a) \ | |
156 (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ | |
157 #define GFM03(a) \ | |
158 (gfm(a, 0x03)) /* a * 03 */ | |
159 #define GFM09(a) \ | |
160 (gfm(a, 0x09)) /* a * 09 */ | |
161 #define GFM0B(a) \ | |
162 (gfm(a, 0x0B)) /* a * 0B */ | |
163 #define GFM0D(a) \ | |
164 (gfm(a, 0x0D)) /* a * 0D */ | |
165 #define GFM0E(a) \ | |
166 (gfm(a, 0x0E)) /* a * 0E */ | |
167 | |
168 #endif /* choosing GFM function */ | |
169 | |
170 /* | |
171 * The T-tables | |
172 */ | |
173 #define G_T0(i) \ | |
174 ( WORD4( GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)) ) ) | |
175 #define G_T1(i) \ | |
176 ( WORD4( GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)) ) ) | |
177 #define G_T2(i) \ | |
178 ( WORD4( GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)) ) ) | |
179 #define G_T3(i) \ | |
180 ( WORD4( GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)) ) ) | |
181 | |
182 /* | |
183 * The inverse T-tables | |
184 */ | |
185 #define G_TInv0(i) \ | |
186 ( WORD4( GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)) ) ) | |
187 #define G_TInv1(i) \ | |
188 ( WORD4( GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)) ) ) | |
189 #define G_TInv2(i) \ | |
190 ( WORD4( GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)) ) ) | |
191 #define G_TInv3(i) \ | |
192 ( WORD4( GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)) ) ) | |
193 | |
194 /* | |
195 * The inverse mix column tables | |
196 */ | |
197 #define G_IMXC0(i) \ | |
198 ( WORD4( GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i) ) ) | |
199 #define G_IMXC1(i) \ | |
200 ( WORD4( GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i) ) ) | |
201 #define G_IMXC2(i) \ | |
202 ( WORD4( GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i) ) ) | |
203 #define G_IMXC3(i) \ | |
204 ( WORD4( GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i) ) ) | |
205 | |
206 /* Now choose the T-table indexing method */ | |
207 #if defined(RIJNDAEL_GENERATE_VALUES) | |
208 /* generate values for the tables with a function*/ | |
209 static PRUint32 gen_TInvXi(PRUint8 tx, PRUint8 i) | |
210 { | |
211 PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; | |
212 si01 = SINV(i); | |
213 si02 = XTIME(si01); | |
214 si04 = XTIME(si02); | |
215 si08 = XTIME(si04); | |
216 si03 = si02 ^ si01; | |
217 si09 = si08 ^ si01; | |
218 si0B = si08 ^ si03; | |
219 si0D = si09 ^ si04; | |
220 si0E = si08 ^ si04 ^ si02; | |
221 switch (tx) { | |
222 case 0: | |
223 return WORD4(si0E, si09, si0D, si0B); | |
224 case 1: | |
225 return WORD4(si0B, si0E, si09, si0D); | |
226 case 2: | |
227 return WORD4(si0D, si0B, si0E, si09); | |
228 case 3: | |
229 return WORD4(si09, si0D, si0B, si0E); | |
230 } | |
231 return -1; | |
232 } | |
233 #define T0(i) G_T0(i) | |
234 #define T1(i) G_T1(i) | |
235 #define T2(i) G_T2(i) | |
236 #define T3(i) G_T3(i) | |
237 #define TInv0(i) gen_TInvXi(0, i) | |
238 #define TInv1(i) gen_TInvXi(1, i) | |
239 #define TInv2(i) gen_TInvXi(2, i) | |
240 #define TInv3(i) gen_TInvXi(3, i) | |
241 #define IMXC0(b) G_IMXC0(b) | |
242 #define IMXC1(b) G_IMXC1(b) | |
243 #define IMXC2(b) G_IMXC2(b) | |
244 #define IMXC3(b) G_IMXC3(b) | |
245 #elif defined(RIJNDAEL_GENERATE_VALUES_MACRO) | |
246 /* generate values for the tables with macros */ | |
247 #define T0(i) G_T0(i) | |
248 #define T1(i) G_T1(i) | |
249 #define T2(i) G_T2(i) | |
250 #define T3(i) G_T3(i) | |
251 #define TInv0(i) G_TInv0(i) | |
252 #define TInv1(i) G_TInv1(i) | |
253 #define TInv2(i) G_TInv2(i) | |
254 #define TInv3(i) G_TInv3(i) | |
255 #define IMXC0(b) G_IMXC0(b) | |
256 #define IMXC1(b) G_IMXC1(b) | |
257 #define IMXC2(b) G_IMXC2(b) | |
258 #define IMXC3(b) G_IMXC3(b) | |
259 #else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_TABLES_MACRO */ | |
260 /* Generate T and T**-1 table values and store, then index */ | |
261 /* The inverse mix column tables are still generated */ | |
262 #define T0(i) rijndaelTables->T0[i] | |
263 #define T1(i) rijndaelTables->T1[i] | |
264 #define T2(i) rijndaelTables->T2[i] | |
265 #define T3(i) rijndaelTables->T3[i] | |
266 #define TInv0(i) rijndaelTables->TInv0[i] | |
267 #define TInv1(i) rijndaelTables->TInv1[i] | |
268 #define TInv2(i) rijndaelTables->TInv2[i] | |
269 #define TInv3(i) rijndaelTables->TInv3[i] | |
270 #define IMXC0(b) G_IMXC0(b) | |
271 #define IMXC1(b) G_IMXC1(b) | |
272 #define IMXC2(b) G_IMXC2(b) | |
273 #define IMXC3(b) G_IMXC3(b) | |
274 #endif /* choose T-table indexing method */ | |
275 | |
276 #endif /* not RIJNDAEL_INCLUDE_TABLES */ | |
277 | |
278 #if defined(RIJNDAEL_GENERATE_TABLES) || \ | |
279 defined(RIJNDAEL_GENERATE_TABLES_MACRO) | |
280 | |
281 /* Code to generate and store the tables */ | |
282 | |
283 struct rijndael_tables_str { | |
284 PRUint32 T0[256]; | |
285 PRUint32 T1[256]; | |
286 PRUint32 T2[256]; | |
287 PRUint32 T3[256]; | |
288 PRUint32 TInv0[256]; | |
289 PRUint32 TInv1[256]; | |
290 PRUint32 TInv2[256]; | |
291 PRUint32 TInv3[256]; | |
292 }; | |
293 | |
294 static struct rijndael_tables_str *rijndaelTables = NULL; | |
295 static PRCallOnceType coRTInit = { 0, 0, 0 }; | |
296 static PRStatus | |
297 init_rijndael_tables(void) | |
298 { | |
299 PRUint32 i; | |
300 PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; | |
301 struct rijndael_tables_str *rts; | |
302 rts = (struct rijndael_tables_str *) | |
303 PORT_Alloc(sizeof(struct rijndael_tables_str)); | |
304 if (!rts) return PR_FAILURE; | |
305 for (i=0; i<256; i++) { | |
306 /* The forward values */ | |
307 si01 = SBOX(i); | |
308 si02 = XTIME(si01); | |
309 si03 = si02 ^ si01; | |
310 rts->T0[i] = WORD4(si02, si01, si01, si03); | |
311 rts->T1[i] = WORD4(si03, si02, si01, si01); | |
312 rts->T2[i] = WORD4(si01, si03, si02, si01); | |
313 rts->T3[i] = WORD4(si01, si01, si03, si02); | |
314 /* The inverse values */ | |
315 si01 = SINV(i); | |
316 si02 = XTIME(si01); | |
317 si04 = XTIME(si02); | |
318 si08 = XTIME(si04); | |
319 si03 = si02 ^ si01; | |
320 si09 = si08 ^ si01; | |
321 si0B = si08 ^ si03; | |
322 si0D = si09 ^ si04; | |
323 si0E = si08 ^ si04 ^ si02; | |
324 rts->TInv0[i] = WORD4(si0E, si09, si0D, si0B); | |
325 rts->TInv1[i] = WORD4(si0B, si0E, si09, si0D); | |
326 rts->TInv2[i] = WORD4(si0D, si0B, si0E, si09); | |
327 rts->TInv3[i] = WORD4(si09, si0D, si0B, si0E); | |
328 } | |
329 /* wait until all the values are in to set */ | |
330 rijndaelTables = rts; | |
331 return PR_SUCCESS; | |
332 } | |
333 | |
334 #endif /* code to generate tables */ | |
335 | |
336 /************************************************************************** | |
337 * | |
338 * Stuff related to the Rijndael key schedule | |
339 * | |
340 *************************************************************************/ | |
341 | |
342 #define SUBBYTE(w) \ | |
343 ((SBOX((w >> 24) & 0xff) << 24) | \ | |
344 (SBOX((w >> 16) & 0xff) << 16) | \ | |
345 (SBOX((w >> 8) & 0xff) << 8) | \ | |
346 (SBOX((w ) & 0xff) )) | |
347 | |
348 #ifdef IS_LITTLE_ENDIAN | |
349 #define ROTBYTE(b) \ | |
350 ((b >> 8) | (b << 24)) | |
351 #else | |
352 #define ROTBYTE(b) \ | |
353 ((b << 8) | (b >> 24)) | |
354 #endif | |
355 | |
356 /* rijndael_key_expansion7 | |
357 * | |
358 * Generate the expanded key from the key input by the user. | |
359 * XXX | |
360 * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte | |
361 * transformation is done periodically. The period is every 4 bytes, and | |
362 * since 7%4 != 0 this happens at different times for each key word (unlike | |
363 * Nk == 8 where it happens twice in every key word, in the same positions). | |
364 * For now, I'm implementing this case "dumbly", w/o any unrolling. | |
365 */ | |
366 static SECStatus | |
367 rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk) | |
368 { | |
369 unsigned int i; | |
370 PRUint32 *W; | |
371 PRUint32 *pW; | |
372 PRUint32 tmp; | |
373 W = cx->expandedKey; | |
374 /* 1. the first Nk words contain the cipher key */ | |
375 memcpy(W, key, Nk * 4); | |
376 i = Nk; | |
377 /* 2. loop until full expanded key is obtained */ | |
378 pW = W + i - 1; | |
379 for (; i < cx->Nb * (cx->Nr + 1); ++i) { | |
380 tmp = *pW++; | |
381 if (i % Nk == 0) | |
382 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; | |
383 else if (i % Nk == 4) | |
384 tmp = SUBBYTE(tmp); | |
385 *pW = W[i - Nk] ^ tmp; | |
386 } | |
387 return SECSuccess; | |
388 } | |
389 | |
390 /* rijndael_key_expansion | |
391 * | |
392 * Generate the expanded key from the key input by the user. | |
393 */ | |
394 static SECStatus | |
395 rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) | |
396 { | |
397 unsigned int i; | |
398 PRUint32 *W; | |
399 PRUint32 *pW; | |
400 PRUint32 tmp; | |
401 unsigned int round_key_words = cx->Nb * (cx->Nr + 1); | |
402 if (Nk == 7) | |
403 return rijndael_key_expansion7(cx, key, Nk); | |
404 W = cx->expandedKey; | |
405 /* The first Nk words contain the input cipher key */ | |
406 memcpy(W, key, Nk * 4); | |
407 i = Nk; | |
408 pW = W + i - 1; | |
409 /* Loop over all sets of Nk words, except the last */ | |
410 while (i < round_key_words - Nk) { | |
411 tmp = *pW++; | |
412 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; | |
413 *pW = W[i++ - Nk] ^ tmp; | |
414 tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; | |
415 tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; | |
416 tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; | |
417 if (Nk == 4) | |
418 continue; | |
419 switch (Nk) { | |
420 case 8: tmp = *pW++; tmp = SUBBYTE(tmp); *pW = W[i++ - Nk] ^ tmp; | |
421 case 7: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; | |
422 case 6: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; | |
423 case 5: tmp = *pW++; *pW = W[i++ - Nk] ^ tmp; | |
424 } | |
425 } | |
426 /* Generate the last word */ | |
427 tmp = *pW++; | |
428 tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; | |
429 *pW = W[i++ - Nk] ^ tmp; | |
430 /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However, | |
431 * since the above loop generated all but the last Nk key words, there | |
432 * is no more need for the SubByte transformation. | |
433 */ | |
434 if (Nk < 8) { | |
435 for (; i < round_key_words; ++i) { | |
436 tmp = *pW++; | |
437 *pW = W[i - Nk] ^ tmp; | |
438 } | |
439 } else { | |
440 /* except in the case when Nk == 8. Then one more SubByte may have | |
441 * to be performed, at i % Nk == 4. | |
442 */ | |
443 for (; i < round_key_words; ++i) { | |
444 tmp = *pW++; | |
445 if (i % Nk == 4) | |
446 tmp = SUBBYTE(tmp); | |
447 *pW = W[i - Nk] ^ tmp; | |
448 } | |
449 } | |
450 return SECSuccess; | |
451 } | |
452 | |
453 /* rijndael_invkey_expansion | |
454 * | |
455 * Generate the expanded key for the inverse cipher from the key input by | |
456 * the user. | |
457 */ | |
458 static SECStatus | |
459 rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) | |
460 { | |
461 unsigned int r; | |
462 PRUint32 *roundkeyw; | |
463 PRUint8 *b; | |
464 int Nb = cx->Nb; | |
465 /* begins like usual key expansion ... */ | |
466 if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) | |
467 return SECFailure; | |
468 /* ... but has the additional step of InvMixColumn, | |
469 * excepting the first and last round keys. | |
470 */ | |
471 roundkeyw = cx->expandedKey + cx->Nb; | |
472 for (r=1; r<cx->Nr; ++r) { | |
473 /* each key word, roundkeyw, represents a column in the key | |
474 * matrix. Each column is multiplied by the InvMixColumn matrix. | |
475 * [ 0E 0B 0D 09 ] [ b0 ] | |
476 * [ 09 0E 0B 0D ] * [ b1 ] | |
477 * [ 0D 09 0E 0B ] [ b2 ] | |
478 * [ 0B 0D 09 0E ] [ b3 ] | |
479 */ | |
480 b = (PRUint8 *)roundkeyw; | |
481 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); | |
482 b = (PRUint8 *)roundkeyw; | |
483 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); | |
484 b = (PRUint8 *)roundkeyw; | |
485 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); | |
486 b = (PRUint8 *)roundkeyw; | |
487 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); | |
488 if (Nb <= 4) | |
489 continue; | |
490 switch (Nb) { | |
491 case 8: b = (PRUint8 *)roundkeyw; | |
492 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ | |
493 IMXC2(b[2]) ^ IMXC3(b[3]); | |
494 case 7: b = (PRUint8 *)roundkeyw; | |
495 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ | |
496 IMXC2(b[2]) ^ IMXC3(b[3]); | |
497 case 6: b = (PRUint8 *)roundkeyw; | |
498 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ | |
499 IMXC2(b[2]) ^ IMXC3(b[3]); | |
500 case 5: b = (PRUint8 *)roundkeyw; | |
501 *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ | |
502 IMXC2(b[2]) ^ IMXC3(b[3]); | |
503 } | |
504 } | |
505 return SECSuccess; | |
506 } | |
507 /************************************************************************** | |
508 * | |
509 * Stuff related to Rijndael encryption/decryption, optimized for | |
510 * a 128-bit blocksize. | |
511 * | |
512 *************************************************************************/ | |
513 | |
514 #ifdef IS_LITTLE_ENDIAN | |
515 #define BYTE0WORD(w) ((w) & 0x000000ff) | |
516 #define BYTE1WORD(w) ((w) & 0x0000ff00) | |
517 #define BYTE2WORD(w) ((w) & 0x00ff0000) | |
518 #define BYTE3WORD(w) ((w) & 0xff000000) | |
519 #else | |
520 #define BYTE0WORD(w) ((w) & 0xff000000) | |
521 #define BYTE1WORD(w) ((w) & 0x00ff0000) | |
522 #define BYTE2WORD(w) ((w) & 0x0000ff00) | |
523 #define BYTE3WORD(w) ((w) & 0x000000ff) | |
524 #endif | |
525 | |
526 typedef union { | |
527 PRUint32 w[4]; | |
528 PRUint8 b[16]; | |
529 } rijndael_state; | |
530 | |
531 #define COLUMN_0(state) state.w[0] | |
532 #define COLUMN_1(state) state.w[1] | |
533 #define COLUMN_2(state) state.w[2] | |
534 #define COLUMN_3(state) state.w[3] | |
535 | |
536 #define STATE_BYTE(i) state.b[i] | |
537 | |
538 static SECStatus | |
539 rijndael_encryptBlock128(AESContext *cx, | |
540 unsigned char *output, | |
541 const unsigned char *input) | |
542 { | |
543 unsigned int r; | |
544 PRUint32 *roundkeyw; | |
545 rijndael_state state; | |
546 PRUint32 C0, C1, C2, C3; | |
547 #if defined(NSS_X86_OR_X64) | |
548 #define pIn input | |
549 #define pOut output | |
550 #else | |
551 unsigned char *pIn, *pOut; | |
552 PRUint32 inBuf[4], outBuf[4]; | |
553 | |
554 if ((ptrdiff_t)input & 0x3) { | |
555 memcpy(inBuf, input, sizeof inBuf); | |
556 pIn = (unsigned char *)inBuf; | |
557 } else { | |
558 pIn = (unsigned char *)input; | |
559 } | |
560 if ((ptrdiff_t)output & 0x3) { | |
561 pOut = (unsigned char *)outBuf; | |
562 } else { | |
563 pOut = (unsigned char *)output; | |
564 } | |
565 #endif | |
566 roundkeyw = cx->expandedKey; | |
567 /* Step 1: Add Round Key 0 to initial state */ | |
568 COLUMN_0(state) = *((PRUint32 *)(pIn )) ^ *roundkeyw++; | |
569 COLUMN_1(state) = *((PRUint32 *)(pIn + 4 )) ^ *roundkeyw++; | |
570 COLUMN_2(state) = *((PRUint32 *)(pIn + 8 )) ^ *roundkeyw++; | |
571 COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++; | |
572 /* Step 2: Loop over rounds [1..NR-1] */ | |
573 for (r=1; r<cx->Nr; ++r) { | |
574 /* Do ShiftRow, ByteSub, and MixColumn all at once */ | |
575 C0 = T0(STATE_BYTE(0)) ^ | |
576 T1(STATE_BYTE(5)) ^ | |
577 T2(STATE_BYTE(10)) ^ | |
578 T3(STATE_BYTE(15)); | |
579 C1 = T0(STATE_BYTE(4)) ^ | |
580 T1(STATE_BYTE(9)) ^ | |
581 T2(STATE_BYTE(14)) ^ | |
582 T3(STATE_BYTE(3)); | |
583 C2 = T0(STATE_BYTE(8)) ^ | |
584 T1(STATE_BYTE(13)) ^ | |
585 T2(STATE_BYTE(2)) ^ | |
586 T3(STATE_BYTE(7)); | |
587 C3 = T0(STATE_BYTE(12)) ^ | |
588 T1(STATE_BYTE(1)) ^ | |
589 T2(STATE_BYTE(6)) ^ | |
590 T3(STATE_BYTE(11)); | |
591 /* Round key addition */ | |
592 COLUMN_0(state) = C0 ^ *roundkeyw++; | |
593 COLUMN_1(state) = C1 ^ *roundkeyw++; | |
594 COLUMN_2(state) = C2 ^ *roundkeyw++; | |
595 COLUMN_3(state) = C3 ^ *roundkeyw++; | |
596 } | |
597 /* Step 3: Do the last round */ | |
598 /* Final round does not employ MixColumn */ | |
599 C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) | | |
600 (BYTE1WORD(T3(STATE_BYTE(5)))) | | |
601 (BYTE2WORD(T0(STATE_BYTE(10)))) | | |
602 (BYTE3WORD(T1(STATE_BYTE(15))))) ^ | |
603 *roundkeyw++; | |
604 C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) | | |
605 (BYTE1WORD(T3(STATE_BYTE(9)))) | | |
606 (BYTE2WORD(T0(STATE_BYTE(14)))) | | |
607 (BYTE3WORD(T1(STATE_BYTE(3))))) ^ | |
608 *roundkeyw++; | |
609 C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) | | |
610 (BYTE1WORD(T3(STATE_BYTE(13)))) | | |
611 (BYTE2WORD(T0(STATE_BYTE(2)))) | | |
612 (BYTE3WORD(T1(STATE_BYTE(7))))) ^ | |
613 *roundkeyw++; | |
614 C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) | | |
615 (BYTE1WORD(T3(STATE_BYTE(1)))) | | |
616 (BYTE2WORD(T0(STATE_BYTE(6)))) | | |
617 (BYTE3WORD(T1(STATE_BYTE(11))))) ^ | |
618 *roundkeyw++; | |
619 *((PRUint32 *) pOut ) = C0; | |
620 *((PRUint32 *)(pOut + 4)) = C1; | |
621 *((PRUint32 *)(pOut + 8)) = C2; | |
622 *((PRUint32 *)(pOut + 12)) = C3; | |
623 #if defined(NSS_X86_OR_X64) | |
624 #undef pIn | |
625 #undef pOut | |
626 #else | |
627 if ((ptrdiff_t)output & 0x3) { | |
628 memcpy(output, outBuf, sizeof outBuf); | |
629 } | |
630 #endif | |
631 return SECSuccess; | |
632 } | |
633 | |
634 static SECStatus | |
635 rijndael_decryptBlock128(AESContext *cx, | |
636 unsigned char *output, | |
637 const unsigned char *input) | |
638 { | |
639 int r; | |
640 PRUint32 *roundkeyw; | |
641 rijndael_state state; | |
642 PRUint32 C0, C1, C2, C3; | |
643 #if defined(NSS_X86_OR_X64) | |
644 #define pIn input | |
645 #define pOut output | |
646 #else | |
647 unsigned char *pIn, *pOut; | |
648 PRUint32 inBuf[4], outBuf[4]; | |
649 | |
650 if ((ptrdiff_t)input & 0x3) { | |
651 memcpy(inBuf, input, sizeof inBuf); | |
652 pIn = (unsigned char *)inBuf; | |
653 } else { | |
654 pIn = (unsigned char *)input; | |
655 } | |
656 if ((ptrdiff_t)output & 0x3) { | |
657 pOut = (unsigned char *)outBuf; | |
658 } else { | |
659 pOut = (unsigned char *)output; | |
660 } | |
661 #endif | |
662 roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; | |
663 /* reverse the final key addition */ | |
664 COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--; | |
665 COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--; | |
666 COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--; | |
667 COLUMN_0(state) = *((PRUint32 *)(pIn )) ^ *roundkeyw--; | |
668 /* Loop over rounds in reverse [NR..1] */ | |
669 for (r=cx->Nr; r>1; --r) { | |
670 /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ | |
671 C0 = TInv0(STATE_BYTE(0)) ^ | |
672 TInv1(STATE_BYTE(13)) ^ | |
673 TInv2(STATE_BYTE(10)) ^ | |
674 TInv3(STATE_BYTE(7)); | |
675 C1 = TInv0(STATE_BYTE(4)) ^ | |
676 TInv1(STATE_BYTE(1)) ^ | |
677 TInv2(STATE_BYTE(14)) ^ | |
678 TInv3(STATE_BYTE(11)); | |
679 C2 = TInv0(STATE_BYTE(8)) ^ | |
680 TInv1(STATE_BYTE(5)) ^ | |
681 TInv2(STATE_BYTE(2)) ^ | |
682 TInv3(STATE_BYTE(15)); | |
683 C3 = TInv0(STATE_BYTE(12)) ^ | |
684 TInv1(STATE_BYTE(9)) ^ | |
685 TInv2(STATE_BYTE(6)) ^ | |
686 TInv3(STATE_BYTE(3)); | |
687 /* Invert the key addition step */ | |
688 COLUMN_3(state) = C3 ^ *roundkeyw--; | |
689 COLUMN_2(state) = C2 ^ *roundkeyw--; | |
690 COLUMN_1(state) = C1 ^ *roundkeyw--; | |
691 COLUMN_0(state) = C0 ^ *roundkeyw--; | |
692 } | |
693 /* inverse sub */ | |
694 pOut[ 0] = SINV(STATE_BYTE( 0)); | |
695 pOut[ 1] = SINV(STATE_BYTE(13)); | |
696 pOut[ 2] = SINV(STATE_BYTE(10)); | |
697 pOut[ 3] = SINV(STATE_BYTE( 7)); | |
698 pOut[ 4] = SINV(STATE_BYTE( 4)); | |
699 pOut[ 5] = SINV(STATE_BYTE( 1)); | |
700 pOut[ 6] = SINV(STATE_BYTE(14)); | |
701 pOut[ 7] = SINV(STATE_BYTE(11)); | |
702 pOut[ 8] = SINV(STATE_BYTE( 8)); | |
703 pOut[ 9] = SINV(STATE_BYTE( 5)); | |
704 pOut[10] = SINV(STATE_BYTE( 2)); | |
705 pOut[11] = SINV(STATE_BYTE(15)); | |
706 pOut[12] = SINV(STATE_BYTE(12)); | |
707 pOut[13] = SINV(STATE_BYTE( 9)); | |
708 pOut[14] = SINV(STATE_BYTE( 6)); | |
709 pOut[15] = SINV(STATE_BYTE( 3)); | |
710 /* final key addition */ | |
711 *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--; | |
712 *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--; | |
713 *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--; | |
714 *((PRUint32 *) pOut ) ^= *roundkeyw--; | |
715 #if defined(NSS_X86_OR_X64) | |
716 #undef pIn | |
717 #undef pOut | |
718 #else | |
719 if ((ptrdiff_t)output & 0x3) { | |
720 memcpy(output, outBuf, sizeof outBuf); | |
721 } | |
722 #endif | |
723 return SECSuccess; | |
724 } | |
725 | |
726 /************************************************************************** | |
727 * | |
728 * Stuff related to general Rijndael encryption/decryption, for blocksizes | |
729 * greater than 128 bits. | |
730 * | |
731 * XXX This code is currently untested! So far, AES specs have only been | |
732 * released for 128 bit blocksizes. This will be tested, but for now | |
733 * only the code above has been tested using known values. | |
734 * | |
735 *************************************************************************/ | |
736 | |
737 #define COLUMN(array, j) *((PRUint32 *)(array + j)) | |
738 | |
739 SECStatus | |
740 rijndael_encryptBlock(AESContext *cx, | |
741 unsigned char *output, | |
742 const unsigned char *input) | |
743 { | |
744 return SECFailure; | |
745 #ifdef rijndael_large_blocks_fixed | |
746 unsigned int j, r, Nb; | |
747 unsigned int c2=0, c3=0; | |
748 PRUint32 *roundkeyw; | |
749 PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; | |
750 Nb = cx->Nb; | |
751 roundkeyw = cx->expandedKey; | |
752 /* Step 1: Add Round Key 0 to initial state */ | |
753 for (j=0; j<4*Nb; j+=4) { | |
754 COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw++; | |
755 } | |
756 /* Step 2: Loop over rounds [1..NR-1] */ | |
757 for (r=1; r<cx->Nr; ++r) { | |
758 for (j=0; j<Nb; ++j) { | |
759 COLUMN(output, j) = T0(STATE_BYTE(4* j )) ^ | |
760 T1(STATE_BYTE(4*((j+ 1)%Nb)+1)) ^ | |
761 T2(STATE_BYTE(4*((j+c2)%Nb)+2)) ^ | |
762 T3(STATE_BYTE(4*((j+c3)%Nb)+3)); | |
763 } | |
764 for (j=0; j<4*Nb; j+=4) { | |
765 COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw++; | |
766 } | |
767 } | |
768 /* Step 3: Do the last round */ | |
769 /* Final round does not employ MixColumn */ | |
770 for (j=0; j<Nb; ++j) { | |
771 COLUMN(output, j) = ((BYTE0WORD(T2(STATE_BYTE(4* j )))) | | |
772 (BYTE1WORD(T3(STATE_BYTE(4*(j+ 1)%Nb)+1))) | | |
773 (BYTE2WORD(T0(STATE_BYTE(4*(j+c2)%Nb)+2))) | | |
774 (BYTE3WORD(T1(STATE_BYTE(4*(j+c3)%Nb)+3)))) ^ | |
775 *roundkeyw++; | |
776 } | |
777 return SECSuccess; | |
778 #endif | |
779 } | |
780 | |
781 SECStatus | |
782 rijndael_decryptBlock(AESContext *cx, | |
783 unsigned char *output, | |
784 const unsigned char *input) | |
785 { | |
786 return SECFailure; | |
787 #ifdef rijndael_large_blocks_fixed | |
788 int j, r, Nb; | |
789 int c2=0, c3=0; | |
790 PRUint32 *roundkeyw; | |
791 PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; | |
792 Nb = cx->Nb; | |
793 roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; | |
794 /* reverse key addition */ | |
795 for (j=4*Nb; j>=0; j-=4) { | |
796 COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw--; | |
797 } | |
798 /* Loop over rounds in reverse [NR..1] */ | |
799 for (r=cx->Nr; r>1; --r) { | |
800 /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ | |
801 for (j=0; j<Nb; ++j) { | |
802 COLUMN(output, 4*j) = TInv0(STATE_BYTE(4* j )) ^ | |
803 TInv1(STATE_BYTE(4*(j+Nb- 1)%Nb)+1) ^ | |
804 TInv2(STATE_BYTE(4*(j+Nb-c2)%Nb)+2) ^ | |
805 TInv3(STATE_BYTE(4*(j+Nb-c3)%Nb)+3); | |
806 } | |
807 /* Invert the key addition step */ | |
808 for (j=4*Nb; j>=0; j-=4) { | |
809 COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw--; | |
810 } | |
811 } | |
812 /* inverse sub */ | |
813 for (j=0; j<4*Nb; ++j) { | |
814 output[j] = SINV(clone[j]); | |
815 } | |
816 /* final key addition */ | |
817 for (j=4*Nb; j>=0; j-=4) { | |
818 COLUMN(output, j) ^= *roundkeyw--; | |
819 } | |
820 return SECSuccess; | |
821 #endif | |
822 } | |
823 | |
824 /************************************************************************** | |
825 * | |
826 * Rijndael modes of operation (ECB and CBC) | |
827 * | |
828 *************************************************************************/ | |
829 | |
830 static SECStatus | |
831 rijndael_encryptECB(AESContext *cx, unsigned char *output, | |
832 unsigned int *outputLen, unsigned int maxOutputLen, | |
833 const unsigned char *input, unsigned int inputLen, | |
834 unsigned int blocksize) | |
835 { | |
836 SECStatus rv; | |
837 AESBlockFunc *encryptor; | |
838 | |
839 encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) | |
840 ? &rijndael_encryptBlock128 | |
841 : &rijndael_encryptBlock; | |
842 while (inputLen > 0) { | |
843 rv = (*encryptor)(cx, output, input); | |
844 if (rv != SECSuccess) | |
845 return rv; | |
846 output += blocksize; | |
847 input += blocksize; | |
848 inputLen -= blocksize; | |
849 } | |
850 return SECSuccess; | |
851 } | |
852 | |
853 static SECStatus | |
854 rijndael_encryptCBC(AESContext *cx, unsigned char *output, | |
855 unsigned int *outputLen, unsigned int maxOutputLen, | |
856 const unsigned char *input, unsigned int inputLen, | |
857 unsigned int blocksize) | |
858 { | |
859 unsigned int j; | |
860 SECStatus rv; | |
861 AESBlockFunc *encryptor; | |
862 unsigned char *lastblock; | |
863 unsigned char inblock[RIJNDAEL_MAX_STATE_SIZE * 8]; | |
864 | |
865 if (!inputLen) | |
866 return SECSuccess; | |
867 lastblock = cx->iv; | |
868 encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) | |
869 ? &rijndael_encryptBlock128 | |
870 : &rijndael_encryptBlock; | |
871 while (inputLen > 0) { | |
872 /* XOR with the last block (IV if first block) */ | |
873 for (j=0; j<blocksize; ++j) | |
874 inblock[j] = input[j] ^ lastblock[j]; | |
875 /* encrypt */ | |
876 rv = (*encryptor)(cx, output, inblock); | |
877 if (rv != SECSuccess) | |
878 return rv; | |
879 /* move to the next block */ | |
880 lastblock = output; | |
881 output += blocksize; | |
882 input += blocksize; | |
883 inputLen -= blocksize; | |
884 } | |
885 memcpy(cx->iv, lastblock, blocksize); | |
886 return SECSuccess; | |
887 } | |
888 | |
889 static SECStatus | |
890 rijndael_decryptECB(AESContext *cx, unsigned char *output, | |
891 unsigned int *outputLen, unsigned int maxOutputLen, | |
892 const unsigned char *input, unsigned int inputLen, | |
893 unsigned int blocksize) | |
894 { | |
895 SECStatus rv; | |
896 AESBlockFunc *decryptor; | |
897 | |
898 decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) | |
899 ? &rijndael_decryptBlock128 | |
900 : &rijndael_decryptBlock; | |
901 while (inputLen > 0) { | |
902 rv = (*decryptor)(cx, output, input); | |
903 if (rv != SECSuccess) | |
904 return rv; | |
905 output += blocksize; | |
906 input += blocksize; | |
907 inputLen -= blocksize; | |
908 } | |
909 return SECSuccess; | |
910 } | |
911 | |
912 static SECStatus | |
913 rijndael_decryptCBC(AESContext *cx, unsigned char *output, | |
914 unsigned int *outputLen, unsigned int maxOutputLen, | |
915 const unsigned char *input, unsigned int inputLen, | |
916 unsigned int blocksize) | |
917 { | |
918 SECStatus rv; | |
919 AESBlockFunc *decryptor; | |
920 const unsigned char *in; | |
921 unsigned char *out; | |
922 unsigned int j; | |
923 unsigned char newIV[RIJNDAEL_MAX_BLOCKSIZE]; | |
924 | |
925 | |
926 if (!inputLen) | |
927 return SECSuccess; | |
928 PORT_Assert(output - input >= 0 || input - output >= (int)inputLen ); | |
929 decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) | |
930 ? &rijndael_decryptBlock128 | |
931 : &rijndael_decryptBlock; | |
932 in = input + (inputLen - blocksize); | |
933 memcpy(newIV, in, blocksize); | |
934 out = output + (inputLen - blocksize); | |
935 while (inputLen > blocksize) { | |
936 rv = (*decryptor)(cx, out, in); | |
937 if (rv != SECSuccess) | |
938 return rv; | |
939 for (j=0; j<blocksize; ++j) | |
940 out[j] ^= in[(int)(j - blocksize)]; | |
941 out -= blocksize; | |
942 in -= blocksize; | |
943 inputLen -= blocksize; | |
944 } | |
945 if (in == input) { | |
946 rv = (*decryptor)(cx, out, in); | |
947 if (rv != SECSuccess) | |
948 return rv; | |
949 for (j=0; j<blocksize; ++j) | |
950 out[j] ^= cx->iv[j]; | |
951 } | |
952 memcpy(cx->iv, newIV, blocksize); | |
953 return SECSuccess; | |
954 } | |
955 | |
956 /************************************************************************ | |
957 * | |
958 * BLAPI Interface functions | |
959 * | |
960 * The following functions implement the encryption routines defined in | |
961 * BLAPI for the AES cipher, Rijndael. | |
962 * | |
963 ***********************************************************************/ | |
964 | |
965 AESContext * AES_AllocateContext(void) | |
966 { | |
967 return PORT_ZNew(AESContext); | |
968 } | |
969 | |
970 | |
971 #ifdef INTEL_GCM | |
972 /* | |
973 * Adapted from the example code in "How to detect New Instruction support in | |
974 * the 4th generation Intel Core processor family" by Max Locktyukhin. | |
975 * | |
976 * XGETBV: | |
977 * Reads an extended control register (XCR) specified by ECX into EDX:EAX. | |
978 */ | |
979 static PRBool | |
980 check_xcr0_ymm() | |
981 { | |
982 PRUint32 xcr0; | |
983 #if defined(_MSC_VER) | |
984 #if defined(_M_IX86) | |
985 __asm { | |
986 mov ecx, 0 | |
987 xgetbv | |
988 mov xcr0, eax | |
989 } | |
990 #else | |
991 xcr0 = (PRUint32)_xgetbv(0); /* Requires VS2010 SP1 or later. */ | |
992 #endif | |
993 #else | |
994 __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); | |
995 #endif | |
996 /* Check if xmm and ymm state are enabled in XCR0. */ | |
997 return (xcr0 & 6) == 6; | |
998 } | |
999 #endif | |
1000 | |
1001 /* | |
1002 ** Initialize a new AES context suitable for AES encryption/decryption in | |
1003 ** the ECB or CBC mode. | |
1004 ** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC | |
1005 */ | |
1006 static SECStatus | |
1007 aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, | |
1008 const unsigned char *iv, int mode, unsigned int encrypt, | |
1009 unsigned int blocksize) | |
1010 { | |
1011 unsigned int Nk; | |
1012 /* According to Rijndael AES Proposal, section 12.1, block and key | |
1013 * lengths between 128 and 256 bits are supported, as long as the | |
1014 * length in bytes is divisible by 4. | |
1015 */ | |
1016 if (key == NULL || | |
1017 keysize < RIJNDAEL_MIN_BLOCKSIZE || | |
1018 keysize > RIJNDAEL_MAX_BLOCKSIZE || | |
1019 keysize % 4 != 0 || | |
1020 blocksize < RIJNDAEL_MIN_BLOCKSIZE || | |
1021 blocksize > RIJNDAEL_MAX_BLOCKSIZE || | |
1022 blocksize % 4 != 0) { | |
1023 PORT_SetError(SEC_ERROR_INVALID_ARGS); | |
1024 return SECFailure; | |
1025 } | |
1026 if (mode != NSS_AES && mode != NSS_AES_CBC) { | |
1027 PORT_SetError(SEC_ERROR_INVALID_ARGS); | |
1028 return SECFailure; | |
1029 } | |
1030 if (mode == NSS_AES_CBC && iv == NULL) { | |
1031 PORT_SetError(SEC_ERROR_INVALID_ARGS); | |
1032 return SECFailure; | |
1033 } | |
1034 if (!cx) { | |
1035 PORT_SetError(SEC_ERROR_INVALID_ARGS); | |
1036 return SECFailure; | |
1037 } | |
1038 #ifdef USE_HW_AES | |
1039 if (has_intel_aes == 0) { | |
1040 unsigned long eax, ebx, ecx, edx; | |
1041 char *disable_hw_aes = getenv("NSS_DISABLE_HW_AES"); | |
1042 | |
1043 if (disable_hw_aes == NULL) { | |
1044 freebl_cpuid(1, &eax, &ebx, &ecx, &edx); | |
1045 has_intel_aes = (ecx & (1 << 25)) != 0 ? 1 : -1; | |
1046 #ifdef INTEL_GCM | |
1047 has_intel_clmul = (ecx & (1 << 1)) != 0 ? 1 : -1; | |
1048 if ((ecx & (1 << 27)) != 0 && (ecx & (1 << 28)) != 0 && | |
1049 check_xcr0_ymm()) { | |
1050 has_intel_avx = 1; | |
1051 } else { | |
1052 has_intel_avx = -1; | |
1053 } | |
1054 #endif | |
1055 } else { | |
1056 has_intel_aes = -1; | |
1057 #ifdef INTEL_GCM | |
1058 has_intel_avx = -1; | |
1059 has_intel_clmul = -1; | |
1060 #endif | |
1061 } | |
1062 } | |
1063 use_hw_aes = (PRBool) | |
1064 (has_intel_aes > 0 && (keysize % 8) == 0 && blocksize == 16); | |
1065 #ifdef INTEL_GCM | |
1066 use_hw_gcm = (PRBool) | |
1067 (use_hw_aes && has_intel_avx>0 && has_intel_clmul>0); | |
1068 #endif | |
1069 #endif /* USE_HW_AES */ | |
1070 /* Nb = (block size in bits) / 32 */ | |
1071 cx->Nb = blocksize / 4; | |
1072 /* Nk = (key size in bits) / 32 */ | |
1073 Nk = keysize / 4; | |
1074 /* Obtain number of rounds from "table" */ | |
1075 cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb); | |
1076 /* copy in the iv, if neccessary */ | |
1077 if (mode == NSS_AES_CBC) { | |
1078 memcpy(cx->iv, iv, blocksize); | |
1079 #ifdef USE_HW_AES | |
1080 if (use_hw_aes) { | |
1081 cx->worker = (freeblCipherFunc) | |
1082 intel_aes_cbc_worker(encrypt, keysize); | |
1083 } else | |
1084 #endif | |
1085 { | |
1086 cx->worker = (freeblCipherFunc) (encrypt | |
1087 ? &rijndael_encryptCBC : &rijndael_decryptCBC); | |
1088 } | |
1089 } else { | |
1090 #ifdef USE_HW_AES | |
1091 if (use_hw_aes) { | |
1092 cx->worker = (freeblCipherFunc) | |
1093 intel_aes_ecb_worker(encrypt, keysize); | |
1094 } else | |
1095 #endif | |
1096 { | |
1097 cx->worker = (freeblCipherFunc) (encrypt | |
1098 ? &rijndael_encryptECB : &rijndael_decryptECB); | |
1099 } | |
1100 } | |
1101 PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE); | |
1102 if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) { | |
1103 PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); | |
1104 goto cleanup; | |
1105 } | |
1106 #ifdef USE_HW_AES | |
1107 if (use_hw_aes) { | |
1108 intel_aes_init(encrypt, keysize); | |
1109 } else | |
1110 #endif | |
1111 { | |
1112 | |
1113 #if defined(RIJNDAEL_GENERATE_TABLES) || \ | |
1114 defined(RIJNDAEL_GENERATE_TABLES_MACRO) | |
1115 if (rijndaelTables == NULL) { | |
1116 if (PR_CallOnce(&coRTInit, init_rijndael_tables) | |
1117 != PR_SUCCESS) { | |
1118 return SecFailure; | |
1119 } | |
1120 } | |
1121 #endif | |
1122 /* Generate expanded key */ | |
1123 if (encrypt) { | |
1124 if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) | |
1125 goto cleanup; | |
1126 } else { | |
1127 if (rijndael_invkey_expansion(cx, key, Nk) != SECSuccess) | |
1128 goto cleanup; | |
1129 } | |
1130 } | |
1131 cx->worker_cx = cx; | |
1132 cx->destroy = NULL; | |
1133 cx->isBlock = PR_TRUE; | |
1134 return SECSuccess; | |
1135 cleanup: | |
1136 return SECFailure; | |
1137 } | |
1138 | |
1139 SECStatus | |
1140 AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, | |
1141 const unsigned char *iv, int mode, unsigned int encrypt, | |
1142 unsigned int blocksize) | |
1143 { | |
1144 int basemode = mode; | |
1145 PRBool baseencrypt = encrypt; | |
1146 SECStatus rv; | |
1147 | |
1148 switch (mode) { | |
1149 case NSS_AES_CTS: | |
1150 basemode = NSS_AES_CBC; | |
1151 break; | |
1152 case NSS_AES_GCM: | |
1153 case NSS_AES_CTR: | |
1154 basemode = NSS_AES; | |
1155 baseencrypt = PR_TRUE; | |
1156 break; | |
1157 } | |
1158 /* make sure enough is initializes so we can safely call Destroy */ | |
1159 cx->worker_cx = NULL; | |
1160 cx->destroy = NULL; | |
1161 rv = aes_InitContext(cx, key, keysize, iv, basemode, | |
1162 baseencrypt, blocksize); | |
1163 if (rv != SECSuccess) { | |
1164 AES_DestroyContext(cx, PR_FALSE); | |
1165 return rv; | |
1166 } | |
1167 | |
1168 /* finally, set up any mode specific contexts */ | |
1169 switch (mode) { | |
1170 case NSS_AES_CTS: | |
1171 cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv, blocksize); | |
1172 cx->worker = (freeblCipherFunc) | |
1173 (encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate); | |
1174 cx->destroy = (freeblDestroyFunc) CTS_DestroyContext; | |
1175 cx->isBlock = PR_FALSE; | |
1176 break; | |
1177 case NSS_AES_GCM: | |
1178 #ifdef INTEL_GCM | |
1179 if(use_hw_gcm) { | |
1180 cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv, blocksize); | |
1181 cx->worker = (freeblCipherFunc) | |
1182 (encrypt ? intel_AES_GCM_EncryptUpdate : intel_AES_GCM_DecryptUpdate); | |
1183 cx->destroy = (freeblDestroyFunc) intel_AES_GCM_DestroyContext; | |
1184 cx->isBlock = PR_FALSE; | |
1185 } else | |
1186 #endif | |
1187 { | |
1188 cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv, blocksize); | |
1189 cx->worker = (freeblCipherFunc) | |
1190 (encrypt ? GCM_EncryptUpdate : GCM_DecryptUpdate); | |
1191 cx->destroy = (freeblDestroyFunc) GCM_DestroyContext; | |
1192 cx->isBlock = PR_FALSE; | |
1193 } | |
1194 break; | |
1195 case NSS_AES_CTR: | |
1196 cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv, blocksize); | |
1197 #if defined(USE_HW_AES) && defined(_MSC_VER) | |
1198 if (use_hw_aes) { | |
1199 cx->worker = (freeblCipherFunc) CTR_Update_HW_AES; | |
1200 } else | |
1201 #endif | |
1202 { | |
1203 cx->worker = (freeblCipherFunc) CTR_Update; | |
1204 } | |
1205 cx->destroy = (freeblDestroyFunc) CTR_DestroyContext; | |
1206 cx->isBlock = PR_FALSE; | |
1207 break; | |
1208 default: | |
1209 /* everything has already been set up by aes_InitContext, just | |
1210 * return */ | |
1211 return SECSuccess; | |
1212 } | |
1213 /* check to see if we succeeded in getting the worker context */ | |
1214 if (cx->worker_cx == NULL) { | |
1215 /* no, just destroy the existing context */ | |
1216 cx->destroy = NULL; /* paranoia, though you can see a dozen lines */ | |
1217 /* below that this isn't necessary */ | |
1218 AES_DestroyContext(cx, PR_FALSE); | |
1219 return SECFailure; | |
1220 } | |
1221 return SECSuccess; | |
1222 } | |
1223 | |
1224 /* AES_CreateContext | |
1225 * | |
1226 * create a new context for Rijndael operations | |
1227 */ | |
1228 AESContext * | |
1229 AES_CreateContext(const unsigned char *key, const unsigned char *iv, | |
1230 int mode, int encrypt, | |
1231 unsigned int keysize, unsigned int blocksize) | |
1232 { | |
1233 AESContext *cx = AES_AllocateContext(); | |
1234 if (cx) { | |
1235 SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt, | |
1236 blocksize); | |
1237 if (rv != SECSuccess) { | |
1238 AES_DestroyContext(cx, PR_TRUE); | |
1239 cx = NULL; | |
1240 } | |
1241 } | |
1242 return cx; | |
1243 } | |
1244 | |
1245 /* | |
1246 * AES_DestroyContext | |
1247 * | |
1248 * Zero an AES cipher context. If freeit is true, also free the pointer | |
1249 * to the context. | |
1250 */ | |
1251 void | |
1252 AES_DestroyContext(AESContext *cx, PRBool freeit) | |
1253 { | |
1254 if (cx->worker_cx && cx->destroy) { | |
1255 (*cx->destroy)(cx->worker_cx, PR_TRUE); | |
1256 cx->worker_cx = NULL; | |
1257 cx->destroy = NULL; | |
1258 } | |
1259 if (freeit) | |
1260 PORT_Free(cx); | |
1261 } | |
1262 | |
1263 /* | |
1264 * AES_Encrypt | |
1265 * | |
1266 * Encrypt an arbitrary-length buffer. The output buffer must already be | |
1267 * allocated to at least inputLen. | |
1268 */ | |
1269 SECStatus | |
1270 AES_Encrypt(AESContext *cx, unsigned char *output, | |
1271 unsigned int *outputLen, unsigned int maxOutputLen, | |
1272 const unsigned char *input, unsigned int inputLen) | |
1273 { | |
1274 int blocksize; | |
1275 /* Check args */ | |
1276 if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { | |
1277 PORT_SetError(SEC_ERROR_INVALID_ARGS); | |
1278 return SECFailure; | |
1279 } | |
1280 blocksize = 4 * cx->Nb; | |
1281 if (cx->isBlock && (inputLen % blocksize != 0)) { | |
1282 PORT_SetError(SEC_ERROR_INPUT_LEN); | |
1283 return SECFailure; | |
1284 } | |
1285 if (maxOutputLen < inputLen) { | |
1286 PORT_SetError(SEC_ERROR_OUTPUT_LEN); | |
1287 return SECFailure; | |
1288 } | |
1289 *outputLen = inputLen; | |
1290 return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, | |
1291 input, inputLen, blocksize); | |
1292 } | |
1293 | |
1294 /* | |
1295 * AES_Decrypt | |
1296 * | |
1297 * Decrypt and arbitrary-length buffer. The output buffer must already be | |
1298 * allocated to at least inputLen. | |
1299 */ | |
1300 SECStatus | |
1301 AES_Decrypt(AESContext *cx, unsigned char *output, | |
1302 unsigned int *outputLen, unsigned int maxOutputLen, | |
1303 const unsigned char *input, unsigned int inputLen) | |
1304 { | |
1305 int blocksize; | |
1306 /* Check args */ | |
1307 if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { | |
1308 PORT_SetError(SEC_ERROR_INVALID_ARGS); | |
1309 return SECFailure; | |
1310 } | |
1311 blocksize = 4 * cx->Nb; | |
1312 if (cx->isBlock && (inputLen % blocksize != 0)) { | |
1313 PORT_SetError(SEC_ERROR_INPUT_LEN); | |
1314 return SECFailure; | |
1315 } | |
1316 if (maxOutputLen < inputLen) { | |
1317 PORT_SetError(SEC_ERROR_OUTPUT_LEN); | |
1318 return SECFailure; | |
1319 } | |
1320 *outputLen = inputLen; | |
1321 return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, | |
1322 input, inputLen, blocksize); | |
1323 } |