andre@0: /* This Source Code Form is subject to the terms of the Mozilla Public andre@0: * License, v. 2.0. If a copy of the MPL was not distributed with this andre@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ andre@0: andre@0: #include "ecp.h" andre@0: #include "mpi.h" andre@0: #include "mplogic.h" andre@0: #include "mpi-priv.h" andre@0: andre@0: /* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1. a can be r. andre@0: * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to andre@0: * Elliptic Curve Cryptography. */ andre@0: static mp_err andre@0: ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth) andre@0: { andre@0: mp_err res = MP_OKAY; andre@0: mp_size a_used = MP_USED(a); andre@0: int a_bits = mpl_significant_bits(a); andre@0: mp_digit carry; andre@0: andre@0: #ifdef ECL_THIRTY_TWO_BIT andre@0: mp_digit a8=0, a9=0, a10=0, a11=0, a12=0, a13=0, a14=0, a15=0; andre@0: mp_digit r0, r1, r2, r3, r4, r5, r6, r7; andre@0: int r8; /* must be a signed value ! */ andre@0: #else andre@0: mp_digit a4=0, a5=0, a6=0, a7=0; andre@0: mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l; andre@0: mp_digit r0, r1, r2, r3; andre@0: int r4; /* must be a signed value ! */ andre@0: #endif andre@0: /* for polynomials larger than twice the field size andre@0: * use regular reduction */ andre@0: if (a_bits < 256) { andre@0: if (a == r) return MP_OKAY; andre@0: return mp_copy(a,r); andre@0: } andre@0: if (a_bits > 512) { andre@0: MP_CHECKOK(mp_mod(a, &meth->irr, r)); andre@0: } else { andre@0: andre@0: #ifdef ECL_THIRTY_TWO_BIT andre@0: switch (a_used) { andre@0: case 16: andre@0: a15 = MP_DIGIT(a,15); andre@0: case 15: andre@0: a14 = MP_DIGIT(a,14); andre@0: case 14: andre@0: a13 = MP_DIGIT(a,13); andre@0: case 13: andre@0: a12 = MP_DIGIT(a,12); andre@0: case 12: andre@0: a11 = MP_DIGIT(a,11); andre@0: case 11: andre@0: a10 = MP_DIGIT(a,10); andre@0: case 10: andre@0: a9 = MP_DIGIT(a,9); andre@0: case 9: andre@0: a8 = MP_DIGIT(a,8); andre@0: } andre@0: andre@0: r0 = MP_DIGIT(a,0); andre@0: r1 = MP_DIGIT(a,1); andre@0: r2 = MP_DIGIT(a,2); andre@0: r3 = MP_DIGIT(a,3); andre@0: r4 = MP_DIGIT(a,4); andre@0: r5 = MP_DIGIT(a,5); andre@0: r6 = MP_DIGIT(a,6); andre@0: r7 = MP_DIGIT(a,7); andre@0: andre@0: /* sum 1 */ andre@0: MP_ADD_CARRY(r3, a11, r3, 0, carry); andre@0: MP_ADD_CARRY(r4, a12, r4, carry, carry); andre@0: MP_ADD_CARRY(r5, a13, r5, carry, carry); andre@0: MP_ADD_CARRY(r6, a14, r6, carry, carry); andre@0: MP_ADD_CARRY(r7, a15, r7, carry, carry); andre@0: r8 = carry; andre@0: MP_ADD_CARRY(r3, a11, r3, 0, carry); andre@0: MP_ADD_CARRY(r4, a12, r4, carry, carry); andre@0: MP_ADD_CARRY(r5, a13, r5, carry, carry); andre@0: MP_ADD_CARRY(r6, a14, r6, carry, carry); andre@0: MP_ADD_CARRY(r7, a15, r7, carry, carry); andre@0: r8 += carry; andre@0: /* sum 2 */ andre@0: MP_ADD_CARRY(r3, a12, r3, 0, carry); andre@0: MP_ADD_CARRY(r4, a13, r4, carry, carry); andre@0: MP_ADD_CARRY(r5, a14, r5, carry, carry); andre@0: MP_ADD_CARRY(r6, a15, r6, carry, carry); andre@0: MP_ADD_CARRY(r7, 0, r7, carry, carry); andre@0: r8 += carry; andre@0: /* combine last bottom of sum 3 with second sum 2 */ andre@0: MP_ADD_CARRY(r0, a8, r0, 0, carry); andre@0: MP_ADD_CARRY(r1, a9, r1, carry, carry); andre@0: MP_ADD_CARRY(r2, a10, r2, carry, carry); andre@0: MP_ADD_CARRY(r3, a12, r3, carry, carry); andre@0: MP_ADD_CARRY(r4, a13, r4, carry, carry); andre@0: MP_ADD_CARRY(r5, a14, r5, carry, carry); andre@0: MP_ADD_CARRY(r6, a15, r6, carry, carry); andre@0: MP_ADD_CARRY(r7, a15, r7, carry, carry); /* from sum 3 */ andre@0: r8 += carry; andre@0: /* sum 3 (rest of it)*/ andre@0: MP_ADD_CARRY(r6, a14, r6, 0, carry); andre@0: MP_ADD_CARRY(r7, 0, r7, carry, carry); andre@0: r8 += carry; andre@0: /* sum 4 (rest of it)*/ andre@0: MP_ADD_CARRY(r0, a9, r0, 0, carry); andre@0: MP_ADD_CARRY(r1, a10, r1, carry, carry); andre@0: MP_ADD_CARRY(r2, a11, r2, carry, carry); andre@0: MP_ADD_CARRY(r3, a13, r3, carry, carry); andre@0: MP_ADD_CARRY(r4, a14, r4, carry, carry); andre@0: MP_ADD_CARRY(r5, a15, r5, carry, carry); andre@0: MP_ADD_CARRY(r6, a13, r6, carry, carry); andre@0: MP_ADD_CARRY(r7, a8, r7, carry, carry); andre@0: r8 += carry; andre@0: /* diff 5 */ andre@0: MP_SUB_BORROW(r0, a11, r0, 0, carry); andre@0: MP_SUB_BORROW(r1, a12, r1, carry, carry); andre@0: MP_SUB_BORROW(r2, a13, r2, carry, carry); andre@0: MP_SUB_BORROW(r3, 0, r3, carry, carry); andre@0: MP_SUB_BORROW(r4, 0, r4, carry, carry); andre@0: MP_SUB_BORROW(r5, 0, r5, carry, carry); andre@0: MP_SUB_BORROW(r6, a8, r6, carry, carry); andre@0: MP_SUB_BORROW(r7, a10, r7, carry, carry); andre@0: r8 -= carry; andre@0: /* diff 6 */ andre@0: MP_SUB_BORROW(r0, a12, r0, 0, carry); andre@0: MP_SUB_BORROW(r1, a13, r1, carry, carry); andre@0: MP_SUB_BORROW(r2, a14, r2, carry, carry); andre@0: MP_SUB_BORROW(r3, a15, r3, carry, carry); andre@0: MP_SUB_BORROW(r4, 0, r4, carry, carry); andre@0: MP_SUB_BORROW(r5, 0, r5, carry, carry); andre@0: MP_SUB_BORROW(r6, a9, r6, carry, carry); andre@0: MP_SUB_BORROW(r7, a11, r7, carry, carry); andre@0: r8 -= carry; andre@0: /* diff 7 */ andre@0: MP_SUB_BORROW(r0, a13, r0, 0, carry); andre@0: MP_SUB_BORROW(r1, a14, r1, carry, carry); andre@0: MP_SUB_BORROW(r2, a15, r2, carry, carry); andre@0: MP_SUB_BORROW(r3, a8, r3, carry, carry); andre@0: MP_SUB_BORROW(r4, a9, r4, carry, carry); andre@0: MP_SUB_BORROW(r5, a10, r5, carry, carry); andre@0: MP_SUB_BORROW(r6, 0, r6, carry, carry); andre@0: MP_SUB_BORROW(r7, a12, r7, carry, carry); andre@0: r8 -= carry; andre@0: /* diff 8 */ andre@0: MP_SUB_BORROW(r0, a14, r0, 0, carry); andre@0: MP_SUB_BORROW(r1, a15, r1, carry, carry); andre@0: MP_SUB_BORROW(r2, 0, r2, carry, carry); andre@0: MP_SUB_BORROW(r3, a9, r3, carry, carry); andre@0: MP_SUB_BORROW(r4, a10, r4, carry, carry); andre@0: MP_SUB_BORROW(r5, a11, r5, carry, carry); andre@0: MP_SUB_BORROW(r6, 0, r6, carry, carry); andre@0: MP_SUB_BORROW(r7, a13, r7, carry, carry); andre@0: r8 -= carry; andre@0: andre@0: /* reduce the overflows */ andre@0: while (r8 > 0) { andre@0: mp_digit r8_d = r8; andre@0: MP_ADD_CARRY(r0, r8_d, r0, 0, carry); andre@0: MP_ADD_CARRY(r1, 0, r1, carry, carry); andre@0: MP_ADD_CARRY(r2, 0, r2, carry, carry); andre@0: MP_ADD_CARRY(r3, 0-r8_d, r3, carry, carry); andre@0: MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry, carry); andre@0: MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry, carry); andre@0: MP_ADD_CARRY(r6, 0-(r8_d+1), r6, carry, carry); andre@0: MP_ADD_CARRY(r7, (r8_d-1), r7, carry, carry); andre@0: r8 = carry; andre@0: } andre@0: andre@0: /* reduce the underflows */ andre@0: while (r8 < 0) { andre@0: mp_digit r8_d = -r8; andre@0: MP_SUB_BORROW(r0, r8_d, r0, 0, carry); andre@0: MP_SUB_BORROW(r1, 0, r1, carry, carry); andre@0: MP_SUB_BORROW(r2, 0, r2, carry, carry); andre@0: MP_SUB_BORROW(r3, 0-r8_d, r3, carry, carry); andre@0: MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry, carry); andre@0: MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry, carry); andre@0: MP_SUB_BORROW(r6, 0-(r8_d+1), r6, carry, carry); andre@0: MP_SUB_BORROW(r7, (r8_d-1), r7, carry, carry); andre@0: r8 = 0-carry; andre@0: } andre@0: if (a != r) { andre@0: MP_CHECKOK(s_mp_pad(r,8)); andre@0: } andre@0: MP_SIGN(r) = MP_ZPOS; andre@0: MP_USED(r) = 8; andre@0: andre@0: MP_DIGIT(r,7) = r7; andre@0: MP_DIGIT(r,6) = r6; andre@0: MP_DIGIT(r,5) = r5; andre@0: MP_DIGIT(r,4) = r4; andre@0: MP_DIGIT(r,3) = r3; andre@0: MP_DIGIT(r,2) = r2; andre@0: MP_DIGIT(r,1) = r1; andre@0: MP_DIGIT(r,0) = r0; andre@0: andre@0: /* final reduction if necessary */ andre@0: if ((r7 == MP_DIGIT_MAX) && andre@0: ((r6 > 1) || ((r6 == 1) && andre@0: (r5 || r4 || r3 || andre@0: ((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX) andre@0: && (r0 == MP_DIGIT_MAX)))))) { andre@0: MP_CHECKOK(mp_sub(r, &meth->irr, r)); andre@0: } andre@0: andre@0: s_mp_clamp(r); andre@0: #else andre@0: switch (a_used) { andre@0: case 8: andre@0: a7 = MP_DIGIT(a,7); andre@0: case 7: andre@0: a6 = MP_DIGIT(a,6); andre@0: case 6: andre@0: a5 = MP_DIGIT(a,5); andre@0: case 5: andre@0: a4 = MP_DIGIT(a,4); andre@0: } andre@0: a7l = a7 << 32; andre@0: a7h = a7 >> 32; andre@0: a6l = a6 << 32; andre@0: a6h = a6 >> 32; andre@0: a5l = a5 << 32; andre@0: a5h = a5 >> 32; andre@0: a4l = a4 << 32; andre@0: a4h = a4 >> 32; andre@0: r3 = MP_DIGIT(a,3); andre@0: r2 = MP_DIGIT(a,2); andre@0: r1 = MP_DIGIT(a,1); andre@0: r0 = MP_DIGIT(a,0); andre@0: andre@0: /* sum 1 */ andre@0: MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry); andre@0: MP_ADD_CARRY(r2, a6, r2, carry, carry); andre@0: MP_ADD_CARRY(r3, a7, r3, carry, carry); andre@0: r4 = carry; andre@0: MP_ADD_CARRY(r1, a5h << 32, r1, 0, carry); andre@0: MP_ADD_CARRY(r2, a6, r2, carry, carry); andre@0: MP_ADD_CARRY(r3, a7, r3, carry, carry); andre@0: r4 += carry; andre@0: /* sum 2 */ andre@0: MP_ADD_CARRY(r1, a6l, r1, 0, carry); andre@0: MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry); andre@0: MP_ADD_CARRY(r3, a7h, r3, carry, carry); andre@0: r4 += carry; andre@0: MP_ADD_CARRY(r1, a6l, r1, 0, carry); andre@0: MP_ADD_CARRY(r2, a6h | a7l, r2, carry, carry); andre@0: MP_ADD_CARRY(r3, a7h, r3, carry, carry); andre@0: r4 += carry; andre@0: andre@0: /* sum 3 */ andre@0: MP_ADD_CARRY(r0, a4, r0, 0, carry); andre@0: MP_ADD_CARRY(r1, a5l >> 32, r1, carry, carry); andre@0: MP_ADD_CARRY(r2, 0, r2, carry, carry); andre@0: MP_ADD_CARRY(r3, a7, r3, carry, carry); andre@0: r4 += carry; andre@0: /* sum 4 */ andre@0: MP_ADD_CARRY(r0, a4h | a5l, r0, 0, carry); andre@0: MP_ADD_CARRY(r1, a5h|(a6h<<32), r1, carry, carry); andre@0: MP_ADD_CARRY(r2, a7, r2, carry, carry); andre@0: MP_ADD_CARRY(r3, a6h | a4l, r3, carry, carry); andre@0: r4 += carry; andre@0: /* diff 5 */ andre@0: MP_SUB_BORROW(r0, a5h | a6l, r0, 0, carry); andre@0: MP_SUB_BORROW(r1, a6h, r1, carry, carry); andre@0: MP_SUB_BORROW(r2, 0, r2, carry, carry); andre@0: MP_SUB_BORROW(r3, (a4l>>32)|a5l,r3, carry, carry); andre@0: r4 -= carry; andre@0: /* diff 6 */ andre@0: MP_SUB_BORROW(r0, a6, r0, 0, carry); andre@0: MP_SUB_BORROW(r1, a7, r1, carry, carry); andre@0: MP_SUB_BORROW(r2, 0, r2, carry, carry); andre@0: MP_SUB_BORROW(r3, a4h|(a5h<<32),r3, carry, carry); andre@0: r4 -= carry; andre@0: /* diff 7 */ andre@0: MP_SUB_BORROW(r0, a6h|a7l, r0, 0, carry); andre@0: MP_SUB_BORROW(r1, a7h|a4l, r1, carry, carry); andre@0: MP_SUB_BORROW(r2, a4h|a5l, r2, carry, carry); andre@0: MP_SUB_BORROW(r3, a6l, r3, carry, carry); andre@0: r4 -= carry; andre@0: /* diff 8 */ andre@0: MP_SUB_BORROW(r0, a7, r0, 0, carry); andre@0: MP_SUB_BORROW(r1, a4h<<32, r1, carry, carry); andre@0: MP_SUB_BORROW(r2, a5, r2, carry, carry); andre@0: MP_SUB_BORROW(r3, a6h<<32, r3, carry, carry); andre@0: r4 -= carry; andre@0: andre@0: /* reduce the overflows */ andre@0: while (r4 > 0) { andre@0: mp_digit r4_long = r4; andre@0: mp_digit r4l = (r4_long << 32); andre@0: MP_ADD_CARRY(r0, r4_long, r0, 0, carry); andre@0: MP_ADD_CARRY(r1, 0-r4l, r1, carry, carry); andre@0: MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry, carry); andre@0: MP_ADD_CARRY(r3, r4l-r4_long-1,r3, carry, carry); andre@0: r4 = carry; andre@0: } andre@0: andre@0: /* reduce the underflows */ andre@0: while (r4 < 0) { andre@0: mp_digit r4_long = -r4; andre@0: mp_digit r4l = (r4_long << 32); andre@0: MP_SUB_BORROW(r0, r4_long, r0, 0, carry); andre@0: MP_SUB_BORROW(r1, 0-r4l, r1, carry, carry); andre@0: MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry, carry); andre@0: MP_SUB_BORROW(r3, r4l-r4_long-1,r3, carry, carry); andre@0: r4 = 0-carry; andre@0: } andre@0: andre@0: if (a != r) { andre@0: MP_CHECKOK(s_mp_pad(r,4)); andre@0: } andre@0: MP_SIGN(r) = MP_ZPOS; andre@0: MP_USED(r) = 4; andre@0: andre@0: MP_DIGIT(r,3) = r3; andre@0: MP_DIGIT(r,2) = r2; andre@0: MP_DIGIT(r,1) = r1; andre@0: MP_DIGIT(r,0) = r0; andre@0: andre@0: /* final reduction if necessary */ andre@0: if ((r3 > 0xFFFFFFFF00000001ULL) || andre@0: ((r3 == 0xFFFFFFFF00000001ULL) && andre@0: (r2 || (r1 >> 32)|| andre@0: (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) { andre@0: /* very rare, just use mp_sub */ andre@0: MP_CHECKOK(mp_sub(r, &meth->irr, r)); andre@0: } andre@0: andre@0: s_mp_clamp(r); andre@0: #endif andre@0: } andre@0: andre@0: CLEANUP: andre@0: return res; andre@0: } andre@0: andre@0: /* Compute the square of polynomial a, reduce modulo p256. Store the andre@0: * result in r. r could be a. Uses optimized modular reduction for p256. andre@0: */ andre@0: static mp_err andre@0: ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) andre@0: { andre@0: mp_err res = MP_OKAY; andre@0: andre@0: MP_CHECKOK(mp_sqr(a, r)); andre@0: MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); andre@0: CLEANUP: andre@0: return res; andre@0: } andre@0: andre@0: /* Compute the product of two polynomials a and b, reduce modulo p256. andre@0: * Store the result in r. r could be a or b; a could be b. Uses andre@0: * optimized modular reduction for p256. */ andre@0: static mp_err andre@0: ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r, andre@0: const GFMethod *meth) andre@0: { andre@0: mp_err res = MP_OKAY; andre@0: andre@0: MP_CHECKOK(mp_mul(a, b, r)); andre@0: MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); andre@0: CLEANUP: andre@0: return res; andre@0: } andre@0: andre@0: /* Wire in fast field arithmetic and precomputation of base point for andre@0: * named curves. */ andre@0: mp_err andre@0: ec_group_set_gfp256(ECGroup *group, ECCurveName name) andre@0: { andre@0: if (name == ECCurve_NIST_P256) { andre@0: group->meth->field_mod = &ec_GFp_nistp256_mod; andre@0: group->meth->field_mul = &ec_GFp_nistp256_mul; andre@0: group->meth->field_sqr = &ec_GFp_nistp256_sqr; andre@0: } andre@0: return MP_OKAY; andre@0: }