3 * Multi Precision Integer functions
5 * Copyright 2004 Michael Jung
6 * Based on public domain code by Tom St Denis (tomstdenis@iahu.ca)
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
24 * This file contains code from the LibTomCrypt cryptographic
25 * library written by Tom St Denis (tomstdenis@iahu.ca). LibTomCrypt
26 * is in the public domain. The code in this file is tailored to
27 * special requirements. Take a look at http://libtomcrypt.org for the
34 /* Known optimal configurations
35 CPU /Compiler /MUL CUTOFF/SQR CUTOFF
36 -------------------------------------------------------------
37 Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
39 static const int KARATSUBA_MUL_CUTOFF = 88, /* Min. number of digits before Karatsuba multiplication is used. */
40 KARATSUBA_SQR_CUTOFF = 128; /* Min. number of digits before Karatsuba squaring is used. */
42 static void bn_reverse(unsigned char *s, int len);
43 static int s_mp_add(mp_int *a, mp_int *b, mp_int *c);
44 static int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y);
45 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
46 static int s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs);
47 static int s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs);
48 static int s_mp_sqr(const mp_int *a, mp_int *b);
49 static int s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
50 static int mp_exptmod_fast(const mp_int *G, const mp_int *X, mp_int *P, mp_int *Y, int mode);
51 static int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c);
52 static int mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c);
53 static int mp_karatsuba_sqr(const mp_int *a, mp_int *b);
55 /* grow as required */
56 static int mp_grow (mp_int * a, int size)
61 /* if the alloc size is smaller alloc more ram */
62 if (a->alloc < size) {
63 /* ensure there are always at least MP_PREC digits extra on top */
64 size += (MP_PREC * 2) - (size % MP_PREC);
66 /* reallocate the array a->dp
68 * We store the return in a temporary variable
69 * in case the operation failed we don't want
70 * to overwrite the dp member of a.
72 tmp = realloc (a->dp, sizeof (mp_digit) * size);
74 /* reallocation failed but "a" is still valid [can be freed] */
78 /* reallocation succeeded so set a->dp */
81 /* zero excess digits */
84 for (; i < a->alloc; i++) {
92 static int mp_div_2(const mp_int * a, mp_int * b)
97 if (b->alloc < a->used) {
98 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
106 register mp_digit r, rr, *tmpa, *tmpb;
109 tmpa = a->dp + b->used - 1;
112 tmpb = b->dp + b->used - 1;
116 for (x = b->used - 1; x >= 0; x--) {
117 /* get the carry for the next iteration */
120 /* shift the current digit, add in carry and store */
121 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
123 /* forward carry to next iteration */
127 /* zero excess digits */
128 tmpb = b->dp + b->used;
129 for (x = b->used; x < oldused; x++) {
138 /* swap the elements of two integers, for cases where you can't simply swap the
139 * mp_int pointers around
142 mp_exch (mp_int * a, mp_int * b)
151 /* init a new mp_int */
152 static int mp_init (mp_int * a)
156 /* allocate memory required and clear it */
157 a->dp = malloc (sizeof (mp_digit) * MP_PREC);
162 /* set the digits to zero */
163 for (i = 0; i < MP_PREC; i++) {
167 /* set the used to zero, allocated digits to the default precision
168 * and sign to positive */
176 /* init an mp_init for a given size */
177 static int mp_init_size (mp_int * a, int size)
181 /* pad size so there are always extra digits */
182 size += (MP_PREC * 2) - (size % MP_PREC);
185 a->dp = malloc (sizeof (mp_digit) * size);
190 /* set the members */
195 /* zero the digits */
196 for (x = 0; x < size; x++) {
203 /* clear one (frees) */
205 mp_clear (mp_int * a)
209 /* only do anything if a hasn't been freed previously */
211 /* first zero the digits */
212 for (i = 0; i < a->used; i++) {
219 /* reset members to make debugging easier */
221 a->alloc = a->used = 0;
232 memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
237 * Simple function copies the input and fixes the sign to positive
240 mp_abs (const mp_int * a, mp_int * b)
246 if ((res = mp_copy (a, b)) != MP_OKAY) {
251 /* force the sign of b to positive */
257 /* computes the modular inverse via binary extended euclidean algorithm,
258 * that is c = 1/a mod b
260 * Based on slow invmod except this is optimized for the case where b is
261 * odd as per HAC Note 14.64 on pp. 610
264 fast_mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
266 mp_int x, y, u, v, B, D;
269 /* 2. [modified] b must be odd */
270 if (mp_iseven (b) == 1) {
274 /* init all our temps */
275 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
279 /* x == modulus, y == value to invert */
280 if ((res = mp_copy (b, &x)) != MP_OKAY) {
284 /* we need y = |a| */
285 if ((res = mp_abs (a, &y)) != MP_OKAY) {
289 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
290 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
293 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
299 /* 4. while u is even do */
300 while (mp_iseven (&u) == 1) {
302 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
305 /* 4.2 if B is odd then */
306 if (mp_isodd (&B) == 1) {
307 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
312 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
317 /* 5. while v is even do */
318 while (mp_iseven (&v) == 1) {
320 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
323 /* 5.2 if D is odd then */
324 if (mp_isodd (&D) == 1) {
326 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
331 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
336 /* 6. if u >= v then */
337 if (mp_cmp (&u, &v) != MP_LT) {
338 /* u = u - v, B = B - D */
339 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
343 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
347 /* v - v - u, D = D - B */
348 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
352 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
357 /* if not zero goto step 4 */
358 if (mp_iszero (&u) == 0) {
362 /* now a = C, b = D, gcd == g*v */
364 /* if v != 1 then there is no inverse */
365 if (mp_cmp_d (&v, 1) != MP_EQ) {
370 /* b is now the inverse */
372 while (D.sign == MP_NEG) {
373 if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
381 __ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
385 /* computes xR**-1 == x (mod N) via Montgomery Reduction
387 * This is an optimized implementation of montgomery_reduce
388 * which uses the comba method to quickly calculate the columns of the
391 * Based on Algorithm 14.32 on pp.601 of HAC.
394 fast_mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho)
397 mp_word W[MP_WARRAY];
399 /* get old used count */
402 /* grow a as required */
403 if (x->alloc < n->used + 1) {
404 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
409 /* first we have to get the digits of the input into
410 * an array of double precision words W[...]
413 register mp_word *_W;
414 register mp_digit *tmpx;
416 /* alias for the W[] array */
419 /* alias for the digits of x*/
422 /* copy the digits of a into W[0..a->used-1] */
423 for (ix = 0; ix < x->used; ix++) {
427 /* zero the high words of W[a->used..m->used*2] */
428 for (; ix < n->used * 2 + 1; ix++) {
433 /* now we proceed to zero successive digits
434 * from the least significant upwards
436 for (ix = 0; ix < n->used; ix++) {
437 /* mu = ai * m' mod b
439 * We avoid a double precision multiplication (which isn't required)
440 * by casting the value down to a mp_digit. Note this requires
441 * that W[ix-1] have the carry cleared (see after the inner loop)
443 register mp_digit mu;
444 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
446 /* a = a + mu * m * b**i
448 * This is computed in place and on the fly. The multiplication
449 * by b**i is handled by offsetting which columns the results
452 * Note the comba method normally doesn't handle carries in the
453 * inner loop In this case we fix the carry from the previous
454 * column since the Montgomery reduction requires digits of the
455 * result (so far) [see above] to work. This is
456 * handled by fixing up one carry after the inner loop. The
457 * carry fixups are done in order so after these loops the
458 * first m->used words of W[] have the carries fixed
462 register mp_digit *tmpn;
463 register mp_word *_W;
465 /* alias for the digits of the modulus */
468 /* Alias for the columns set by an offset of ix */
472 for (iy = 0; iy < n->used; iy++) {
473 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
477 /* now fix carry for next digit, W[ix+1] */
478 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
481 /* now we have to propagate the carries and
482 * shift the words downward [all those least
483 * significant digits we zeroed].
486 register mp_digit *tmpx;
487 register mp_word *_W, *_W1;
489 /* nox fix rest of carries */
491 /* alias for current word */
494 /* alias for next word, where the carry goes */
497 for (; ix <= n->used * 2 + 1; ix++) {
498 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
501 /* copy out, A = A/b**n
503 * The result is A/b**n but instead of converting from an
504 * array of mp_word to mp_digit than calling mp_rshd
505 * we just copy them in the right order
508 /* alias for destination word */
511 /* alias for shifted double precision result */
514 for (ix = 0; ix < n->used + 1; ix++) {
515 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
518 /* zero oldused digits, if the input a was larger than
519 * m->used+1 we'll have to clear the digits
521 for (; ix < olduse; ix++) {
526 /* set the max used and clamp */
527 x->used = n->used + 1;
530 /* if A >= m then A = A - m */
531 if (mp_cmp_mag (x, n) != MP_LT) {
532 return s_mp_sub (x, n, x);
537 /* Fast (comba) multiplier
539 * This is the fast column-array [comba] multiplier. It is
540 * designed to compute the columns of the product first
541 * then handle the carries afterwards. This has the effect
542 * of making the nested loops that compute the columns very
543 * simple and schedulable on super-scalar processors.
545 * This has been modified to produce a variable number of
546 * digits of output so if say only a half-product is required
547 * you don't have to compute the upper half (a feature
548 * required for fast Barrett reduction).
550 * Based on Algorithm 14.12 on pp.595 of HAC.
554 fast_s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
556 int olduse, res, pa, ix, iz;
557 mp_digit W[MP_WARRAY];
560 /* grow the destination as required */
561 if (c->alloc < digs) {
562 if ((res = mp_grow (c, digs)) != MP_OKAY) {
567 /* number of output digits to produce */
568 pa = MIN(digs, a->used + b->used);
570 /* clear the carry */
572 for (ix = 0; ix <= pa; ix++) {
575 mp_digit *tmpx, *tmpy;
577 /* get offsets into the two bignums */
578 ty = MIN(b->used-1, ix);
581 /* setup temp aliases */
585 /* This is the number of times the loop will iterate, essentially it's
586 while (tx++ < a->used && ty-- >= 0) { ... }
588 iy = MIN(a->used-tx, ty+1);
591 for (iz = 0; iz < iy; ++iz) {
592 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
596 W[ix] = ((mp_digit)_W) & MP_MASK;
598 /* make next carry */
599 _W = _W >> ((mp_word)DIGIT_BIT);
607 register mp_digit *tmpc;
609 for (ix = 0; ix < digs; ix++) {
610 /* now extract the previous digit [below the carry] */
614 /* clear unused digits [that existed in the old copy of c] */
615 for (; ix < olduse; ix++) {
623 /* this is a modified version of fast_s_mul_digs that only produces
624 * output digits *above* digs. See the comments for fast_s_mul_digs
625 * to see how it works.
627 * This is used in the Barrett reduction since for one of the multiplications
628 * only the higher digits were needed. This essentially halves the work.
630 * Based on Algorithm 14.12 on pp.595 of HAC.
633 fast_s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
635 int olduse, res, pa, ix, iz;
636 mp_digit W[MP_WARRAY];
639 /* grow the destination as required */
640 pa = a->used + b->used;
642 if ((res = mp_grow (c, pa)) != MP_OKAY) {
647 /* number of output digits to produce */
648 pa = a->used + b->used;
650 for (ix = digs; ix <= pa; ix++) {
652 mp_digit *tmpx, *tmpy;
654 /* get offsets into the two bignums */
655 ty = MIN(b->used-1, ix);
658 /* setup temp aliases */
662 /* This is the number of times the loop will iterate, essentially it's
663 while (tx++ < a->used && ty-- >= 0) { ... }
665 iy = MIN(a->used-tx, ty+1);
668 for (iz = 0; iz < iy; iz++) {
669 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
673 W[ix] = ((mp_digit)_W) & MP_MASK;
675 /* make next carry */
676 _W = _W >> ((mp_word)DIGIT_BIT);
684 register mp_digit *tmpc;
687 for (ix = digs; ix <= pa; ix++) {
688 /* now extract the previous digit [below the carry] */
692 /* clear unused digits [that existed in the old copy of c] */
693 for (; ix < olduse; ix++) {
703 * This is the comba method where the columns of the product
704 * are computed first then the carries are computed. This
705 * has the effect of making a very simple inner loop that
706 * is executed the most
708 * W2 represents the outer products and W the inner.
710 * A further optimizations is made because the inner
711 * products are of the form "A * B * 2". The *2 part does
712 * not need to be computed until the end which is good
713 * because 64-bit shifts are slow!
715 * Based on Algorithm 14.16 on pp.597 of HAC.
718 /* the jist of squaring...
720 you do like mult except the offset of the tmpx [one that starts closer to zero]
721 can't equal the offset of tmpy. So basically you set up iy like before then you min it with
722 (ty-tx) so that it never happens. You double all those you add in the inner loop
724 After that loop you do the squares and add them in.
726 Remove W2 and don't memset W
730 static int fast_s_mp_sqr (const mp_int * a, mp_int * b)
732 int olduse, res, pa, ix, iz;
733 mp_digit W[MP_WARRAY], *tmpx;
736 /* grow the destination as required */
737 pa = a->used + a->used;
739 if ((res = mp_grow (b, pa)) != MP_OKAY) {
744 /* number of output digits to produce */
746 for (ix = 0; ix <= pa; ix++) {
754 /* get offsets into the two bignums */
755 ty = MIN(a->used-1, ix);
758 /* setup temp aliases */
762 /* This is the number of times the loop will iterate, essentially it's
763 while (tx++ < a->used && ty-- >= 0) { ... }
765 iy = MIN(a->used-tx, ty+1);
767 /* now for squaring tx can never equal ty
768 * we halve the distance since they approach at a rate of 2x
769 * and we have to round because odd cases need to be executed
771 iy = MIN(iy, (ty-tx+1)>>1);
774 for (iz = 0; iz < iy; iz++) {
775 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
778 /* double the inner product and add carry */
781 /* even columns have the square term in them */
783 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
789 /* make next carry */
790 W1 = _W >> ((mp_word)DIGIT_BIT);
795 b->used = a->used+a->used;
800 for (ix = 0; ix < pa; ix++) {
801 *tmpb++ = W[ix] & MP_MASK;
804 /* clear unused digits [that existed in the old copy of c] */
805 for (; ix < olduse; ix++) {
815 * Simple algorithm which zeroes the int, grows it then just sets one bit
819 mp_2expt (mp_int * a, int b)
823 /* zero a as per default */
826 /* grow a to accommodate the single bit */
827 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
831 /* set the used count of where the bit will go */
832 a->used = b / DIGIT_BIT + 1;
834 /* put the single bit in its place */
835 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
840 /* high level addition (handles signs) */
841 int mp_add (mp_int * a, mp_int * b, mp_int * c)
845 /* get sign of both inputs */
849 /* handle two cases, not four */
851 /* both positive or both negative */
852 /* add their magnitudes, copy the sign */
854 res = s_mp_add (a, b, c);
856 /* one positive, the other negative */
857 /* subtract the one with the greater magnitude from */
858 /* the one of the lesser magnitude. The result gets */
859 /* the sign of the one with the greater magnitude. */
860 if (mp_cmp_mag (a, b) == MP_LT) {
862 res = s_mp_sub (b, a, c);
865 res = s_mp_sub (a, b, c);
872 /* single digit addition */
874 mp_add_d (mp_int * a, mp_digit b, mp_int * c)
876 int res, ix, oldused;
877 mp_digit *tmpa, *tmpc, mu;
879 /* grow c as required */
880 if (c->alloc < a->used + 1) {
881 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
886 /* if a is negative and |a| >= b, call c = |a| - b */
887 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
888 /* temporarily fix sign of a */
892 res = mp_sub_d(a, b, c);
895 a->sign = c->sign = MP_NEG;
900 /* old number of used digits in c */
903 /* sign always positive */
909 /* destination alias */
912 /* if a is positive */
913 if (a->sign == MP_ZPOS) {
914 /* add digit, after this we're propagating
918 mu = *tmpc >> DIGIT_BIT;
921 /* now handle rest of the digits */
922 for (ix = 1; ix < a->used; ix++) {
923 *tmpc = *tmpa++ + mu;
924 mu = *tmpc >> DIGIT_BIT;
927 /* set final carry */
932 c->used = a->used + 1;
934 /* a was negative and |a| < b */
937 /* the result is a single digit */
939 *tmpc++ = b - a->dp[0];
944 /* setup count so the clearing of oldused
945 * can fall through correctly
950 /* now zero to oldused */
951 while (ix++ < oldused) {
959 /* trim unused digits
961 * This is used to ensure that leading zero digits are
962 * trimed and the leading "used" digit will be non-zero
963 * Typically very fast. Also fixes the sign if there
964 * are no more leading digits
967 mp_clamp (mp_int * a)
969 /* decrease used while the most significant digit is
972 while (a->used > 0 && a->dp[a->used - 1] == 0) {
976 /* reset the sign flag if used == 0 */
982 void mp_clear_multi(mp_int *mp, ...)
984 mp_int* next_mp = mp;
987 while (next_mp != NULL) {
989 next_mp = va_arg(args, mp_int*);
994 /* compare two ints (signed)*/
996 mp_cmp (const mp_int * a, const mp_int * b)
998 /* compare based on sign */
999 if (a->sign != b->sign) {
1000 if (a->sign == MP_NEG) {
1007 /* compare digits */
1008 if (a->sign == MP_NEG) {
1009 /* if negative compare opposite direction */
1010 return mp_cmp_mag(b, a);
1012 return mp_cmp_mag(a, b);
1016 /* compare a digit */
1017 int mp_cmp_d(const mp_int * a, mp_digit b)
1019 /* compare based on sign */
1020 if (a->sign == MP_NEG) {
1024 /* compare based on magnitude */
1029 /* compare the only digit of a to b */
1032 } else if (a->dp[0] < b) {
1039 /* compare maginitude of two ints (unsigned) */
1040 int mp_cmp_mag (const mp_int * a, const mp_int * b)
1043 mp_digit *tmpa, *tmpb;
1045 /* compare based on # of non-zero digits */
1046 if (a->used > b->used) {
1050 if (a->used < b->used) {
1055 tmpa = a->dp + (a->used - 1);
1058 tmpb = b->dp + (a->used - 1);
1060 /* compare based on digits */
1061 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
1062 if (*tmpa > *tmpb) {
1066 if (*tmpa < *tmpb) {
1073 static const int lnz[16] = {
1074 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
1077 /* Counts the number of lsbs which are zero before the first zero bit */
1078 int mp_cnt_lsb(const mp_int *a)
1084 if (mp_iszero(a) == 1) {
1088 /* scan lower digits until non-zero */
1089 for (x = 0; x < a->used && a->dp[x] == 0; x++);
1093 /* now scan this digit until a 1 is found */
1106 mp_copy (const mp_int * a, mp_int * b)
1110 /* if dst == src do nothing */
1116 if (b->alloc < a->used) {
1117 if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1122 /* zero b and copy the parameters over */
1124 register mp_digit *tmpa, *tmpb;
1126 /* pointer aliases */
1134 /* copy all the digits */
1135 for (n = 0; n < a->used; n++) {
1139 /* clear high digits */
1140 for (; n < b->used; n++) {
1145 /* copy used count and sign */
1151 /* returns the number of bits in an int */
1153 mp_count_bits (const mp_int * a)
1163 /* get number of digits and add that */
1164 r = (a->used - 1) * DIGIT_BIT;
1166 /* take the last digit and count the bits in it */
1167 q = a->dp[a->used - 1];
1170 q >>= ((mp_digit) 1);
1175 /* calc a value mod 2**b */
1177 mp_mod_2d (const mp_int * a, int b, mp_int * c)
1181 /* if b is <= 0 then zero the int */
1187 /* if the modulus is larger than the value than return */
1188 if (b > a->used * DIGIT_BIT) {
1189 res = mp_copy (a, c);
1194 if ((res = mp_copy (a, c)) != MP_OKAY) {
1198 /* zero digits above the last digit of the modulus */
1199 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
1202 /* clear the digit that is not completely outside/inside the modulus */
1203 c->dp[b / DIGIT_BIT] &= (1 << ((mp_digit)b % DIGIT_BIT)) - 1;
1208 /* shift right a certain amount of digits */
1209 static void mp_rshd (mp_int * a, int b)
1213 /* if b <= 0 then ignore it */
1218 /* if b > used then simply zero it and return */
1225 register mp_digit *bottom, *top;
1227 /* shift the digits down */
1232 /* top [offset into digits] */
1235 /* this is implemented as a sliding window where
1236 * the window is b-digits long and digits from
1237 * the top of the window are copied to the bottom
1241 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
1243 \-------------------/ ---->
1245 for (x = 0; x < (a->used - b); x++) {
1249 /* zero the top digits */
1250 for (; x < a->used; x++) {
1255 /* remove excess digits */
1259 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1260 static int mp_div_2d (const mp_int * a, int b, mp_int * c, mp_int * d)
1267 /* if the shift count is <= 0 then we do no work */
1269 res = mp_copy (a, c);
1276 if ((res = mp_init (&t)) != MP_OKAY) {
1280 /* get the remainder */
1282 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
1289 if ((res = mp_copy (a, c)) != MP_OKAY) {
1294 /* shift by as many digits in the bit count */
1295 if (b >= DIGIT_BIT) {
1296 mp_rshd (c, b / DIGIT_BIT);
1299 /* shift any bit count < DIGIT_BIT */
1300 D = (mp_digit) (b % DIGIT_BIT);
1302 register mp_digit *tmpc, mask, shift;
1305 mask = (((mp_digit)1) << D) - 1;
1308 shift = DIGIT_BIT - D;
1311 tmpc = c->dp + (c->used - 1);
1315 for (x = c->used - 1; x >= 0; x--) {
1316 /* get the lower bits of this word in a temp */
1319 /* shift the current word and mix in the carry bits from the previous word */
1320 *tmpc = (*tmpc >> D) | (r << shift);
1323 /* set the carry to the carry bits of the current word found above */
1335 /* shift left a certain amount of digits */
1336 static int mp_lshd (mp_int * a, int b)
1340 /* if its less than zero return */
1345 /* grow to fit the new digits */
1346 if (a->alloc < a->used + b) {
1347 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
1353 register mp_digit *top, *bottom;
1355 /* increment the used by the shift amount then copy upwards */
1359 top = a->dp + a->used - 1;
1362 bottom = a->dp + a->used - 1 - b;
1364 /* much like mp_rshd this is implemented using a sliding window
1365 * except the window goes the otherway around. Copying from
1366 * the bottom to the top. see bn_mp_rshd.c for more info.
1368 for (x = a->used - 1; x >= b; x--) {
1372 /* zero the lower digits */
1374 for (x = 0; x < b; x++) {
1381 /* shift left by a certain bit count */
1382 static int mp_mul_2d (const mp_int * a, int b, mp_int * c)
1389 if ((res = mp_copy (a, c)) != MP_OKAY) {
1394 if (c->alloc < c->used + b/DIGIT_BIT + 1) {
1395 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
1400 /* shift by as many digits in the bit count */
1401 if (b >= DIGIT_BIT) {
1402 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
1407 /* shift any bit count < DIGIT_BIT */
1408 d = (mp_digit) (b % DIGIT_BIT);
1410 register mp_digit *tmpc, shift, mask, r, rr;
1413 /* bitmask for carries */
1414 mask = (((mp_digit)1) << d) - 1;
1416 /* shift for msbs */
1417 shift = DIGIT_BIT - d;
1424 for (x = 0; x < c->used; x++) {
1425 /* get the higher bits of the current word */
1426 rr = (*tmpc >> shift) & mask;
1428 /* shift the current word and OR in the carry */
1429 *tmpc = ((*tmpc << d) | r) & MP_MASK;
1432 /* set the carry to the carry bits of the current word */
1436 /* set final carry */
1438 c->dp[(c->used)++] = r;
1445 /* multiply by a digit */
1447 mp_mul_d (const mp_int * a, mp_digit b, mp_int * c)
1449 mp_digit u, *tmpa, *tmpc;
1451 int ix, res, olduse;
1453 /* make sure c is big enough to hold a*b */
1454 if (c->alloc < a->used + 1) {
1455 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
1460 /* get the original destinations used count */
1466 /* alias for a->dp [source] */
1469 /* alias for c->dp [dest] */
1475 /* compute columns */
1476 for (ix = 0; ix < a->used; ix++) {
1477 /* compute product and carry sum for this term */
1478 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
1480 /* mask off higher bits to get a single digit */
1481 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
1483 /* send carry into next iteration */
1484 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
1487 /* store final carry [if any] */
1490 /* now zero digits above the top */
1491 while (ix++ < olduse) {
1495 /* set used count */
1496 c->used = a->used + 1;
1502 /* integer signed division.
1503 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1504 * HAC pp.598 Algorithm 14.20
1506 * Note that the description in HAC is horribly
1507 * incomplete. For example, it doesn't consider
1508 * the case where digits are removed from 'x' in
1509 * the inner loop. It also doesn't consider the
1510 * case that y has fewer than three digits, etc..
1512 * The overall algorithm is as described as
1513 * 14.20 from HAC but fixed to treat these cases.
1515 static int mp_div (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
1517 mp_int q, x, y, t1, t2;
1518 int res, n, t, i, norm, neg;
1520 /* is divisor zero ? */
1521 if (mp_iszero (b) == 1) {
1525 /* if a < b then q=0, r = a */
1526 if (mp_cmp_mag (a, b) == MP_LT) {
1528 res = mp_copy (a, d);
1538 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
1541 q.used = a->used + 2;
1543 if ((res = mp_init (&t1)) != MP_OKAY) {
1547 if ((res = mp_init (&t2)) != MP_OKAY) {
1551 if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
1555 if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
1560 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1561 x.sign = y.sign = MP_ZPOS;
1563 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1564 norm = mp_count_bits(&y) % DIGIT_BIT;
1565 if (norm < DIGIT_BIT-1) {
1566 norm = (DIGIT_BIT-1) - norm;
1567 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
1570 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
1577 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1581 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1582 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1586 while (mp_cmp (&x, &y) != MP_LT) {
1588 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
1593 /* reset y by shifting it back down */
1594 mp_rshd (&y, n - t);
1596 /* step 3. for i from n down to (t + 1) */
1597 for (i = n; i >= (t + 1); i--) {
1602 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1603 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1604 if (x.dp[i] == y.dp[t]) {
1605 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1608 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1609 tmp |= ((mp_word) x.dp[i - 1]);
1610 tmp /= ((mp_word) y.dp[t]);
1611 if (tmp > (mp_word) MP_MASK)
1613 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1616 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1617 xi * b**2 + xi-1 * b + xi-2
1621 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1623 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1625 /* find left hand */
1627 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1630 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1634 /* find right hand */
1635 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1636 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1639 } while (mp_cmp_mag(&t1, &t2) == MP_GT);
1641 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1642 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1646 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1650 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
1654 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1655 if (x.sign == MP_NEG) {
1656 if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
1659 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1662 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
1666 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1670 /* now q is the quotient and x is the remainder
1671 * [which we have to normalize]
1674 /* get sign before writing to c */
1675 x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1684 mp_div_2d (&x, norm, &x, NULL);
1692 __T2:mp_clear (&t2);
1693 __T1:mp_clear (&t1);
1698 static int s_is_power_of_two(mp_digit b, int *p)
1702 for (x = 1; x < DIGIT_BIT; x++) {
1703 if (b == (((mp_digit)1)<<x)) {
1711 /* single digit division (based on routine from MPI) */
1712 static int mp_div_d (const mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
1719 /* cannot divide by zero */
1725 if (b == 1 || mp_iszero(a) == 1) {
1730 return mp_copy(a, c);
1735 /* power of two ? */
1736 if (s_is_power_of_two(b, &ix) == 1) {
1738 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
1741 return mp_div_2d(a, ix, c, NULL);
1746 /* no easy answer [c'est la vie]. Just division */
1747 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1754 for (ix = a->used - 1; ix >= 0; ix--) {
1755 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
1758 t = (mp_digit)(w / b);
1759 w -= ((mp_word)t) * ((mp_word)b);
1779 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
1781 * Based on algorithm from the paper
1783 * "Generating Efficient Primes for Discrete Log Cryptosystems"
1784 * Chae Hoon Lim, Pil Loong Lee,
1785 * POSTECH Information Research Laboratories
1787 * The modulus must be of a special format [see manual]
1789 * Has been modified to use algorithm 7.10 from the LTM book instead
1791 * Input x must be in the range 0 <= x <= (n-1)**2
1794 mp_dr_reduce (mp_int * x, const mp_int * n, mp_digit k)
1798 mp_digit mu, *tmpx1, *tmpx2;
1800 /* m = digits in modulus */
1803 /* ensure that "x" has at least 2m digits */
1804 if (x->alloc < m + m) {
1805 if ((err = mp_grow (x, m + m)) != MP_OKAY) {
1810 /* top of loop, this is where the code resumes if
1811 * another reduction pass is required.
1814 /* aliases for digits */
1815 /* alias for lower half of x */
1818 /* alias for upper half of x, or x/B**m */
1821 /* set carry to zero */
1824 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
1825 for (i = 0; i < m; i++) {
1826 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
1827 *tmpx1++ = (mp_digit)(r & MP_MASK);
1828 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
1831 /* set final carry */
1834 /* zero words above m */
1835 for (i = m + 1; i < x->used; i++) {
1839 /* clamp, sub and return */
1842 /* if x >= n then subtract and reduce again
1843 * Each successive "recursion" makes the input smaller and smaller.
1845 if (mp_cmp_mag (x, n) != MP_LT) {
1852 /* sets the value of "d" required for mp_dr_reduce */
1853 static void mp_dr_setup(const mp_int *a, mp_digit *d)
1855 /* the casts are required if DIGIT_BIT is one less than
1856 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
1858 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
1859 ((mp_word)a->dp[0]));
1862 /* this is a shell function that calls either the normal or Montgomery
1863 * exptmod functions. Originally the call to the montgomery code was
1864 * embedded in the normal function but that wasted a lot of stack space
1865 * for nothing (since 99% of the time the Montgomery code would be called)
1867 int mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
1871 /* modulus P must be positive */
1872 if (P->sign == MP_NEG) {
1876 /* if exponent X is negative we have to recurse */
1877 if (X->sign == MP_NEG) {
1881 /* first compute 1/G mod P */
1882 if ((err = mp_init(&tmpG)) != MP_OKAY) {
1885 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
1891 if ((err = mp_init(&tmpX)) != MP_OKAY) {
1895 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
1896 mp_clear_multi(&tmpG, &tmpX, NULL);
1900 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
1901 err = mp_exptmod(&tmpG, &tmpX, P, Y);
1902 mp_clear_multi(&tmpG, &tmpX, NULL);
1908 /* if the modulus is odd or dr != 0 use the fast method */
1909 if (mp_isodd (P) == 1 || dr != 0) {
1910 return mp_exptmod_fast (G, X, P, Y, dr);
1912 /* otherwise use the generic Barrett reduction technique */
1913 return s_mp_exptmod (G, X, P, Y);
1917 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1919 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
1920 * The value of k changes based on the size of the exponent.
1922 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1926 mp_exptmod_fast (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y, int redmode)
1930 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1932 /* use a pointer to the reduction algorithm. This allows us to use
1933 * one of many reduction algorithms without modding the guts of
1934 * the code with if statements everywhere.
1936 int (*redux)(mp_int*,const mp_int*,mp_digit);
1938 /* find window size */
1939 x = mp_count_bits (X);
1942 } else if (x <= 36) {
1944 } else if (x <= 140) {
1946 } else if (x <= 450) {
1948 } else if (x <= 1303) {
1950 } else if (x <= 3529) {
1957 /* init first cell */
1958 if ((err = mp_init(&M[1])) != MP_OKAY) {
1962 /* now init the second half of the array */
1963 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
1964 if ((err = mp_init(&M[x])) != MP_OKAY) {
1965 for (y = 1<<(winsize-1); y < x; y++) {
1973 /* determine and setup reduction code */
1975 /* now setup montgomery */
1976 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
1980 /* automatically pick the comba one if available (saves quite a few calls/ifs) */
1981 if (((P->used * 2 + 1) < MP_WARRAY) &&
1982 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
1983 redux = fast_mp_montgomery_reduce;
1985 /* use slower baseline Montgomery method */
1986 redux = mp_montgomery_reduce;
1988 } else if (redmode == 1) {
1989 /* setup DR reduction for moduli of the form B**k - b */
1990 mp_dr_setup(P, &mp);
1991 redux = mp_dr_reduce;
1993 /* setup DR reduction for moduli of the form 2**k - b */
1994 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
1997 redux = mp_reduce_2k;
2001 if ((err = mp_init (&res)) != MP_OKAY) {
2009 * The first half of the table is not computed though accept for M[0] and M[1]
2013 /* now we need R mod m */
2014 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
2018 /* now set M[1] to G * R mod m */
2019 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
2024 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
2029 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
2030 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
2034 for (x = 0; x < (winsize - 1); x++) {
2035 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
2038 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
2043 /* create upper table */
2044 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2045 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
2048 if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
2053 /* set initial mode and bit cnt */
2057 digidx = X->used - 1;
2062 /* grab next digit as required */
2063 if (--bitcnt == 0) {
2064 /* if digidx == -1 we are out of digits so break */
2068 /* read next digit and reset bitcnt */
2069 buf = X->dp[digidx--];
2073 /* grab the next msb from the exponent */
2074 y = (buf >> (DIGIT_BIT - 1)) & 1;
2075 buf <<= (mp_digit)1;
2077 /* if the bit is zero and mode == 0 then we ignore it
2078 * These represent the leading zero bits before the first 1 bit
2079 * in the exponent. Technically this opt is not required but it
2080 * does lower the # of trivial squaring/reductions used
2082 if (mode == 0 && y == 0) {
2086 /* if the bit is zero and mode == 1 then we square */
2087 if (mode == 1 && y == 0) {
2088 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2091 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2097 /* else we add it to the window */
2098 bitbuf |= (y << (winsize - ++bitcpy));
2101 if (bitcpy == winsize) {
2102 /* ok window is filled so square as required and multiply */
2104 for (x = 0; x < winsize; x++) {
2105 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2108 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2114 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
2117 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2121 /* empty window and reset */
2128 /* if bits remain then square/multiply */
2129 if (mode == 2 && bitcpy > 0) {
2130 /* square then multiply if the bit is set */
2131 for (x = 0; x < bitcpy; x++) {
2132 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2135 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2139 /* get next bit of the window */
2141 if ((bitbuf & (1 << winsize)) != 0) {
2143 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
2146 if ((err = redux (&res, P, mp)) != MP_OKAY) {
2154 /* fixup result if Montgomery reduction is used
2155 * recall that any value in a Montgomery system is
2156 * actually multiplied by R mod n. So we have
2157 * to reduce one more time to cancel out the factor
2160 if ((err = redux(&res, P, mp)) != MP_OKAY) {
2165 /* swap res with Y */
2168 __RES:mp_clear (&res);
2171 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2177 /* Greatest Common Divisor using the binary method */
2178 int mp_gcd (const mp_int * a, const mp_int * b, mp_int * c)
2181 int k, u_lsb, v_lsb, res;
2183 /* either zero than gcd is the largest */
2184 if (mp_iszero (a) == 1 && mp_iszero (b) == 0) {
2185 return mp_abs (b, c);
2187 if (mp_iszero (a) == 0 && mp_iszero (b) == 1) {
2188 return mp_abs (a, c);
2191 /* optimized. At this point if a == 0 then
2192 * b must equal zero too
2194 if (mp_iszero (a) == 1) {
2199 /* get copies of a and b we can modify */
2200 if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
2204 if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
2208 /* must be positive for the remainder of the algorithm */
2209 u.sign = v.sign = MP_ZPOS;
2211 /* B1. Find the common power of two for u and v */
2212 u_lsb = mp_cnt_lsb(&u);
2213 v_lsb = mp_cnt_lsb(&v);
2214 k = MIN(u_lsb, v_lsb);
2217 /* divide the power of two out */
2218 if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
2222 if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
2227 /* divide any remaining factors of two out */
2229 if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
2235 if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
2240 while (mp_iszero(&v) == 0) {
2241 /* make sure v is the largest */
2242 if (mp_cmp_mag(&u, &v) == MP_GT) {
2243 /* swap u and v to make sure v is >= u */
2247 /* subtract smallest from largest */
2248 if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
2252 /* Divide out all factors of two */
2253 if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
2258 /* multiply by 2**k which we divided out at the beginning */
2259 if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
2269 /* get the lower 32-bits of an mp_int */
2270 unsigned long mp_get_int(const mp_int * a)
2279 /* get number of digits of the lsb we have to read */
2280 i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1;
2282 /* get most significant digit of result */
2286 res = (res << DIGIT_BIT) | DIGIT(a,i);
2289 /* force result to 32-bits always so it is consistent on non 32-bit platforms */
2290 return res & 0xFFFFFFFFUL;
2293 /* creates "a" then copies b into it */
2294 int mp_init_copy (mp_int * a, const mp_int * b)
2298 if ((res = mp_init (a)) != MP_OKAY) {
2301 return mp_copy (b, a);
2304 int mp_init_multi(mp_int *mp, ...)
2306 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
2307 int n = 0; /* Number of ok inits */
2308 mp_int* cur_arg = mp;
2311 va_start(args, mp); /* init args to next argument from caller */
2312 while (cur_arg != NULL) {
2313 if (mp_init(cur_arg) != MP_OKAY) {
2314 /* Oops - error! Back-track and mp_clear what we already
2315 succeeded in init-ing, then return error.
2319 /* end the current list */
2322 /* now start cleaning up */
2324 va_start(clean_args, mp);
2327 cur_arg = va_arg(clean_args, mp_int*);
2334 cur_arg = va_arg(args, mp_int*);
2337 return res; /* Assumed ok, if error flagged above. */
2340 /* hac 14.61, pp608 */
2341 int mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
2343 /* b cannot be negative */
2344 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2348 /* if the modulus is odd we can use a faster routine instead */
2349 if (mp_isodd (b) == 1) {
2350 return fast_mp_invmod (a, b, c);
2353 return mp_invmod_slow(a, b, c);
2356 /* hac 14.61, pp608 */
2357 int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c)
2359 mp_int x, y, u, v, A, B, C, D;
2362 /* b cannot be negative */
2363 if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2368 if ((res = mp_init_multi(&x, &y, &u, &v,
2369 &A, &B, &C, &D, NULL)) != MP_OKAY) {
2374 if ((res = mp_copy (a, &x)) != MP_OKAY) {
2377 if ((res = mp_copy (b, &y)) != MP_OKAY) {
2381 /* 2. [modified] if x,y are both even then return an error! */
2382 if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
2387 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2388 if ((res = mp_copy (&x, &u)) != MP_OKAY) {
2391 if ((res = mp_copy (&y, &v)) != MP_OKAY) {
2398 /* 4. while u is even do */
2399 while (mp_iseven (&u) == 1) {
2401 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
2404 /* 4.2 if A or B is odd then */
2405 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
2406 /* A = (A+y)/2, B = (B-x)/2 */
2407 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
2410 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
2414 /* A = A/2, B = B/2 */
2415 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
2418 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
2423 /* 5. while v is even do */
2424 while (mp_iseven (&v) == 1) {
2426 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
2429 /* 5.2 if C or D is odd then */
2430 if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
2431 /* C = (C+y)/2, D = (D-x)/2 */
2432 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
2435 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
2439 /* C = C/2, D = D/2 */
2440 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
2443 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
2448 /* 6. if u >= v then */
2449 if (mp_cmp (&u, &v) != MP_LT) {
2450 /* u = u - v, A = A - C, B = B - D */
2451 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
2455 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
2459 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
2463 /* v - v - u, C = C - A, D = D - B */
2464 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
2468 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
2472 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
2477 /* if not zero goto step 4 */
2478 if (mp_iszero (&u) == 0)
2481 /* now a = C, b = D, gcd == g*v */
2483 /* if v != 1 then there is no inverse */
2484 if (mp_cmp_d (&v, 1) != MP_EQ) {
2489 /* if its too low */
2490 while (mp_cmp_d(&C, 0) == MP_LT) {
2491 if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
2497 while (mp_cmp_mag(&C, b) != MP_LT) {
2498 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
2503 /* C is now the inverse */
2506 __ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
2510 /* c = |a| * |b| using Karatsuba Multiplication using
2511 * three half size multiplications
2513 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2514 * let n represent half of the number of digits in
2517 * a = a1 * B**n + a0
2518 * b = b1 * B**n + b0
2521 a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
2523 * Note that a1b1 and a0b0 are used twice and only need to be
2524 * computed once. So in total three half size (half # of
2525 * digit) multiplications are performed, a0b0, a1b1 and
2528 * Note that a multiplication of half the digits requires
2529 * 1/4th the number of single precision multiplications so in
2530 * total after one call 25% of the single precision multiplications
2531 * are saved. Note also that the call to mp_mul can end up back
2532 * in this function if the a0, a1, b0, or b1 are above the threshold.
2533 * This is known as divide-and-conquer and leads to the famous
2534 * O(N**lg(3)) or O(N**1.584) work which is asymptotically lower than
2535 * the standard O(N**2) that the baseline/comba methods use.
2536 * Generally though the overhead of this method doesn't pay off
2537 * until a certain size (N ~ 80) is reached.
2539 int mp_karatsuba_mul (const mp_int * a, const mp_int * b, mp_int * c)
2541 mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
2544 /* default the return code to an error */
2547 /* min # of digits */
2548 B = MIN (a->used, b->used);
2550 /* now divide in two */
2553 /* init copy all the temps */
2554 if (mp_init_size (&x0, B) != MP_OKAY)
2556 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2558 if (mp_init_size (&y0, B) != MP_OKAY)
2560 if (mp_init_size (&y1, b->used - B) != MP_OKAY)
2564 if (mp_init_size (&t1, B * 2) != MP_OKAY)
2566 if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
2568 if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
2571 /* now shift the digits */
2572 x0.used = y0.used = B;
2573 x1.used = a->used - B;
2574 y1.used = b->used - B;
2578 register mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
2580 /* we copy the digits directly instead of using higher level functions
2581 * since we also need to shift the digits
2588 for (x = 0; x < B; x++) {
2594 for (x = B; x < a->used; x++) {
2599 for (x = B; x < b->used; x++) {
2604 /* only need to clamp the lower words since by definition the
2605 * upper words x1/y1 must have a known number of digits
2610 /* now calc the products x0y0 and x1y1 */
2611 /* after this x0 is no longer required, free temp [x0==t2]! */
2612 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
2613 goto X1Y1; /* x0y0 = x0*y0 */
2614 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
2615 goto X1Y1; /* x1y1 = x1*y1 */
2617 /* now calc x1-x0 and y1-y0 */
2618 if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2619 goto X1Y1; /* t1 = x1 - x0 */
2620 if (mp_sub (&y1, &y0, &x0) != MP_OKAY)
2621 goto X1Y1; /* t2 = y1 - y0 */
2622 if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
2623 goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */
2626 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
2627 goto X1Y1; /* t2 = x0y0 + x1y1 */
2628 if (mp_sub (&x0, &t1, &t1) != MP_OKAY)
2629 goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
2632 if (mp_lshd (&t1, B) != MP_OKAY)
2633 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2634 if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
2635 goto X1Y1; /* x1y1 = x1y1 << 2*B */
2637 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
2638 goto X1Y1; /* t1 = x0y0 + t1 */
2639 if (mp_add (&t1, &x1y1, c) != MP_OKAY)
2640 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
2642 /* Algorithm succeeded set the return code to MP_OKAY */
2645 X1Y1:mp_clear (&x1y1);
2646 X0Y0:mp_clear (&x0y0);
2656 /* Karatsuba squaring, computes b = a*a using three
2657 * half size squarings
2659 * See comments of karatsuba_mul for details. It
2660 * is essentially the same algorithm but merely
2661 * tuned to perform recursive squarings.
2663 int mp_karatsuba_sqr (const mp_int * a, mp_int * b)
2665 mp_int x0, x1, t1, t2, x0x0, x1x1;
2670 /* min # of digits */
2673 /* now divide in two */
2676 /* init copy all the temps */
2677 if (mp_init_size (&x0, B) != MP_OKAY)
2679 if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2683 if (mp_init_size (&t1, a->used * 2) != MP_OKAY)
2685 if (mp_init_size (&t2, a->used * 2) != MP_OKAY)
2687 if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
2689 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY)
2694 register mp_digit *dst, *src;
2698 /* now shift the digits */
2700 for (x = 0; x < B; x++) {
2705 for (x = B; x < a->used; x++) {
2711 x1.used = a->used - B;
2715 /* now calc the products x0*x0 and x1*x1 */
2716 if (mp_sqr (&x0, &x0x0) != MP_OKAY)
2717 goto X1X1; /* x0x0 = x0*x0 */
2718 if (mp_sqr (&x1, &x1x1) != MP_OKAY)
2719 goto X1X1; /* x1x1 = x1*x1 */
2721 /* now calc (x1-x0)**2 */
2722 if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2723 goto X1X1; /* t1 = x1 - x0 */
2724 if (mp_sqr (&t1, &t1) != MP_OKAY)
2725 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
2728 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
2729 goto X1X1; /* t2 = x0x0 + x1x1 */
2730 if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
2731 goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
2734 if (mp_lshd (&t1, B) != MP_OKAY)
2735 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
2736 if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
2737 goto X1X1; /* x1x1 = x1x1 << 2*B */
2739 if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
2740 goto X1X1; /* t1 = x0x0 + t1 */
2741 if (mp_add (&t1, &x1x1, b) != MP_OKAY)
2742 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
2746 X1X1:mp_clear (&x1x1);
2747 X0X0:mp_clear (&x0x0);
2756 /* computes least common multiple as |a*b|/(a, b) */
2757 int mp_lcm (const mp_int * a, const mp_int * b, mp_int * c)
2763 if ((res = mp_init_multi (&t1, &t2, NULL)) != MP_OKAY) {
2767 /* t1 = get the GCD of the two inputs */
2768 if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
2772 /* divide the smallest by the GCD */
2773 if (mp_cmp_mag(a, b) == MP_LT) {
2774 /* store quotient in t2 such that t2 * b is the LCM */
2775 if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
2778 res = mp_mul(b, &t2, c);
2780 /* store quotient in t2 such that t2 * a is the LCM */
2781 if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
2784 res = mp_mul(a, &t2, c);
2787 /* fix the sign to positive */
2791 mp_clear_multi (&t1, &t2, NULL);
2795 /* c = a mod b, 0 <= c < b */
2797 mp_mod (const mp_int * a, mp_int * b, mp_int * c)
2802 if ((res = mp_init (&t)) != MP_OKAY) {
2806 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
2811 if (t.sign != b->sign) {
2812 res = mp_add (b, &t, c);
2823 mp_mod_d (const mp_int * a, mp_digit b, mp_digit * c)
2825 return mp_div_d(a, b, NULL, c);
2829 static int mp_mul_2(const mp_int * a, mp_int * b)
2831 int x, res, oldused;
2833 /* grow to accommodate result */
2834 if (b->alloc < a->used + 1) {
2835 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2844 register mp_digit r, rr, *tmpa, *tmpb;
2846 /* alias for source */
2849 /* alias for dest */
2854 for (x = 0; x < a->used; x++) {
2856 /* get what will be the *next* carry bit from the
2857 * MSB of the current digit
2859 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2861 /* now shift up this digit, add in the carry [from the previous] */
2862 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
2864 /* copy the carry that would be from the source
2865 * digit into the next iteration
2870 /* new leading digit? */
2872 /* add a MSB which is always 1 at this point */
2877 /* now zero any excess digits on the destination
2878 * that we didn't write to
2880 tmpb = b->dp + b->used;
2881 for (x = b->used; x < oldused; x++) {
2890 * shifts with subtractions when the result is greater than b.
2892 * The method is slightly modified to shift B unconditionally up to just under
2893 * the leading bit of b. This saves a lot of multiple precision shifting.
2895 int mp_montgomery_calc_normalization (mp_int * a, const mp_int * b)
2899 /* how many bits of last digit does b use */
2900 bits = mp_count_bits (b) % DIGIT_BIT;
2904 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
2913 /* now compute C = A * B mod b */
2914 for (x = bits - 1; x < DIGIT_BIT; x++) {
2915 if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
2918 if (mp_cmp_mag (a, b) != MP_LT) {
2919 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
2928 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
2930 mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho)
2935 /* can the fast reduction [comba] method be used?
2937 * Note that unlike in mul you're safely allowed *less*
2938 * than the available columns [255 per default] since carries
2939 * are fixed up in the inner loop.
2941 digs = n->used * 2 + 1;
2942 if ((digs < MP_WARRAY) &&
2944 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2945 return fast_mp_montgomery_reduce (x, n, rho);
2948 /* grow the input as required */
2949 if (x->alloc < digs) {
2950 if ((res = mp_grow (x, digs)) != MP_OKAY) {
2956 for (ix = 0; ix < n->used; ix++) {
2957 /* mu = ai * rho mod b
2959 * The value of rho must be precalculated via
2960 * montgomery_setup() such that
2961 * it equals -1/n0 mod b this allows the
2962 * following inner loop to reduce the
2963 * input one digit at a time
2965 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
2967 /* a = a + mu * m * b**i */
2970 register mp_digit *tmpn, *tmpx, u;
2973 /* alias for digits of the modulus */
2976 /* alias for the digits of x [the input] */
2979 /* set the carry to zero */
2982 /* Multiply and add in place */
2983 for (iy = 0; iy < n->used; iy++) {
2984 /* compute product and sum */
2985 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
2986 ((mp_word) u) + ((mp_word) * tmpx);
2989 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
2992 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
2994 /* At this point the ix'th digit of x should be zero */
2997 /* propagate carries upwards as required*/
3000 u = *tmpx >> DIGIT_BIT;
3006 /* at this point the n.used'th least
3007 * significant digits of x are all zero
3008 * which means we can shift x to the
3009 * right by n.used digits and the
3010 * residue is unchanged.
3013 /* x = x/b**n.used */
3015 mp_rshd (x, n->used);
3017 /* if x >= n then x = x - n */
3018 if (mp_cmp_mag (x, n) != MP_LT) {
3019 return s_mp_sub (x, n, x);
3025 /* setups the montgomery reduction stuff */
3027 mp_montgomery_setup (const mp_int * n, mp_digit * rho)
3031 /* fast inversion mod 2**k
3033 * Based on the fact that
3035 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
3036 * => 2*X*A - X*X*A*A = 1
3037 * => 2*(1) - (1) = 1
3045 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
3046 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
3047 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
3048 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
3050 /* rho = -1/m mod b */
3051 *rho = (((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
3056 /* high level multiplication (handles sign) */
3057 int mp_mul (const mp_int * a, const mp_int * b, mp_int * c)
3060 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
3062 /* use Karatsuba? */
3063 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
3064 res = mp_karatsuba_mul (a, b, c);
3067 /* can we use the fast multiplier?
3069 * The fast multiplier can be used if the output will
3070 * have less than MP_WARRAY digits and the number of
3071 * digits won't affect carry propagation
3073 int digs = a->used + b->used + 1;
3075 if ((digs < MP_WARRAY) &&
3076 MIN(a->used, b->used) <=
3077 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3078 res = fast_s_mp_mul_digs (a, b, c, digs);
3080 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
3082 c->sign = (c->used > 0) ? neg : MP_ZPOS;
3086 /* d = a * b (mod c) */
3088 mp_mulmod (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
3093 if ((res = mp_init (&t)) != MP_OKAY) {
3097 if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
3101 res = mp_mod (&t, c, d);
3106 /* table of first PRIME_SIZE primes */
3107 static const mp_digit __prime_tab[] = {
3108 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
3109 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
3110 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
3111 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
3112 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
3113 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
3114 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
3115 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
3117 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
3118 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
3119 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
3120 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
3121 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
3122 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
3123 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
3124 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
3126 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
3127 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
3128 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
3129 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
3130 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
3131 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
3132 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
3133 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
3135 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
3136 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
3137 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
3138 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
3139 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
3140 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
3141 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
3142 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
3145 /* determines if an integers is divisible by one
3146 * of the first PRIME_SIZE primes or not
3148 * sets result to 0 if not, 1 if yes
3150 static int mp_prime_is_divisible (const mp_int * a, int *result)
3155 /* default to not */
3158 for (ix = 0; ix < PRIME_SIZE; ix++) {
3159 /* what is a mod __prime_tab[ix] */
3160 if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) {
3164 /* is the residue zero? */
3174 /* Miller-Rabin test of "a" to the base of "b" as described in
3175 * HAC pp. 139 Algorithm 4.24
3177 * Sets result to 0 if definitely composite or 1 if probably prime.
3178 * Randomly the chance of error is no more than 1/4 and often
3181 static int mp_prime_miller_rabin (mp_int * a, const mp_int * b, int *result)
3190 if (mp_cmp_d(b, 1) != MP_GT) {
3194 /* get n1 = a - 1 */
3195 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
3198 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
3202 /* set 2**s * r = n1 */
3203 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
3207 /* count the number of least significant bits
3212 /* now divide n - 1 by 2**s */
3213 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
3217 /* compute y = b**r mod a */
3218 if ((err = mp_init (&y)) != MP_OKAY) {
3221 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
3225 /* if y != 1 and y != n1 do */
3226 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
3228 /* while j <= s-1 and y != n1 */
3229 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
3230 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
3234 /* if y == 1 then composite */
3235 if (mp_cmp_d (&y, 1) == MP_EQ) {
3242 /* if y != n1 then composite */
3243 if (mp_cmp (&y, &n1) != MP_EQ) {
3248 /* probably prime now */
3252 __N1:mp_clear (&n1);
3256 /* performs a variable number of rounds of Miller-Rabin
3258 * Probability of error after t rounds is no more than
3261 * Sets result to 1 if probably prime, 0 otherwise
3263 static int mp_prime_is_prime (mp_int * a, int t, int *result)
3271 /* valid value of t? */
3272 if (t <= 0 || t > PRIME_SIZE) {
3276 /* is the input equal to one of the primes in the table? */
3277 for (ix = 0; ix < PRIME_SIZE; ix++) {
3278 if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) {
3284 /* first perform trial division */
3285 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
3289 /* return if it was trivially divisible */
3290 if (res == MP_YES) {
3294 /* now perform the miller-rabin rounds */
3295 if ((err = mp_init (&b)) != MP_OKAY) {
3299 for (ix = 0; ix < t; ix++) {
3301 mp_set (&b, __prime_tab[ix]);
3303 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
3312 /* passed the test */
3318 static const struct {
3331 /* returns # of RM trials required for a given bit size */
3332 int mp_prime_rabin_miller_trials(int size)
3336 for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) {
3337 if (sizes[x].k == size) {
3339 } else if (sizes[x].k > size) {
3340 return (x == 0) ? sizes[0].t : sizes[x - 1].t;
3343 return sizes[x-1].t + 1;
3346 /* makes a truly random prime of a given size (bits),
3348 * Flags are as follows:
3350 * LTM_PRIME_BBS - make prime congruent to 3 mod 4
3351 * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
3352 * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
3353 * LTM_PRIME_2MSB_ON - make the 2nd highest bit one
3355 * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
3356 * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
3361 /* This is possibly the mother of all prime generation functions, muahahahahaha! */
3362 int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)
3364 unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;
3365 int res, err, bsize, maskOR_msb_offset;
3367 /* sanity check the input */
3368 if (size <= 1 || t <= 0) {
3372 /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
3373 if (flags & LTM_PRIME_SAFE) {
3374 flags |= LTM_PRIME_BBS;
3377 /* calc the byte size */
3378 bsize = (size>>3)+((size&7)?1:0);
3380 /* we need a buffer of bsize bytes */
3381 tmp = malloc(bsize);
3386 /* calc the maskAND value for the MSbyte*/
3387 maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
3389 /* calc the maskOR_msb */
3391 maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
3392 if (flags & LTM_PRIME_2MSB_ON) {
3393 maskOR_msb |= 1 << ((size - 2) & 7);
3394 } else if (flags & LTM_PRIME_2MSB_OFF) {
3395 maskAND &= ~(1 << ((size - 2) & 7));
3398 /* get the maskOR_lsb */
3400 if (flags & LTM_PRIME_BBS) {
3405 /* read the bytes */
3406 if (cb(tmp, bsize, dat) != bsize) {
3411 /* work over the MSbyte */
3413 tmp[0] |= 1 << ((size - 1) & 7);
3415 /* mix in the maskORs */
3416 tmp[maskOR_msb_offset] |= maskOR_msb;
3417 tmp[bsize-1] |= maskOR_lsb;
3420 if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) { goto error; }
3423 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3428 if (flags & LTM_PRIME_SAFE) {
3429 /* see if (a-1)/2 is prime */
3430 if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { goto error; }
3431 if ((err = mp_div_2(a, a)) != MP_OKAY) { goto error; }
3434 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3436 } while (res == MP_NO);
3438 if (flags & LTM_PRIME_SAFE) {
3439 /* restore a to the original value */
3440 if ((err = mp_mul_2(a, a)) != MP_OKAY) { goto error; }
3441 if ((err = mp_add_d(a, 1, a)) != MP_OKAY) { goto error; }
3450 /* reads an unsigned char array, assumes the msb is stored first [big endian] */
3452 mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
3456 /* make sure there are at least two digits */
3458 if ((res = mp_grow(a, 2)) != MP_OKAY) {
3466 /* read the bytes in */
3468 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
3479 /* reduces x mod m, assumes 0 < x < m**2, mu is
3480 * precomputed via mp_reduce_setup.
3481 * From HAC pp.604 Algorithm 14.42
3484 mp_reduce (mp_int * x, const mp_int * m, const mp_int * mu)
3487 int res, um = m->used;
3490 if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3494 /* q1 = x / b**(k-1) */
3495 mp_rshd (&q, um - 1);
3497 /* according to HAC this optimization is ok */
3498 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3499 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3503 if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
3508 /* q3 = q2 / b**(k+1) */
3509 mp_rshd (&q, um + 1);
3511 /* x = x mod b**(k+1), quick (no division) */
3512 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3516 /* q = q * m mod b**(k+1), quick (no division) */
3517 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3522 if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3526 /* If x < 0, add b**(k+1) to it */
3527 if (mp_cmp_d (x, 0) == MP_LT) {
3529 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3531 if ((res = mp_add (x, &q, x)) != MP_OKAY)
3535 /* Back off if it's too big */
3536 while (mp_cmp (x, m) != MP_LT) {
3537 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3548 /* reduces a modulo n where n is of the form 2**p - d */
3550 mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d)
3555 if ((res = mp_init(&q)) != MP_OKAY) {
3559 p = mp_count_bits(n);
3561 /* q = a/2**p, a = a mod 2**p */
3562 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3568 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
3574 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3578 if (mp_cmp_mag(a, n) != MP_LT) {
3588 /* determines the setup value */
3590 mp_reduce_2k_setup(const mp_int *a, mp_digit *d)
3595 if ((res = mp_init(&tmp)) != MP_OKAY) {
3599 p = mp_count_bits(a);
3600 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
3605 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
3615 /* pre-calculate the value required for Barrett reduction
3616 * For a given modulus "b" it calulates the value required in "a"
3618 int mp_reduce_setup (mp_int * a, const mp_int * b)
3622 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3625 return mp_div (a, b, a, NULL);
3628 /* set to a digit */
3629 void mp_set (mp_int * a, mp_digit b)
3632 a->dp[0] = b & MP_MASK;
3633 a->used = (a->dp[0] != 0) ? 1 : 0;
3636 /* set a 32-bit const */
3637 int mp_set_int (mp_int * a, unsigned long b)
3643 /* set four bits at a time */
3644 for (x = 0; x < 8; x++) {
3645 /* shift the number up four bits */
3646 if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
3650 /* OR in the top four bits of the source */
3651 a->dp[0] |= (b >> 28) & 15;
3653 /* shift the source up to the next four bits */
3656 /* ensure that digits are not clamped off */
3663 /* shrink a bignum */
3664 int mp_shrink (mp_int * a)
3667 if (a->alloc != a->used && a->used > 0) {
3668 if ((tmp = realloc (a->dp, sizeof (mp_digit) * a->used)) == NULL) {
3677 /* get the size for an signed equivalent */
3678 int mp_signed_bin_size (const mp_int * a)
3680 return 1 + mp_unsigned_bin_size (a);
3683 /* computes b = a*a */
3685 mp_sqr (const mp_int * a, mp_int * b)
3689 if (a->used >= KARATSUBA_SQR_CUTOFF) {
3690 res = mp_karatsuba_sqr (a, b);
3693 /* can we use the fast comba multiplier? */
3694 if ((a->used * 2 + 1) < MP_WARRAY &&
3696 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
3697 res = fast_s_mp_sqr (a, b);
3699 res = s_mp_sqr (a, b);
3705 /* c = a * a (mod b) */
3707 mp_sqrmod (const mp_int * a, mp_int * b, mp_int * c)
3712 if ((res = mp_init (&t)) != MP_OKAY) {
3716 if ((res = mp_sqr (a, &t)) != MP_OKAY) {
3720 res = mp_mod (&t, b, c);
3725 /* high level subtraction (handles signs) */
3727 mp_sub (mp_int * a, mp_int * b, mp_int * c)
3735 /* subtract a negative from a positive, OR */
3736 /* subtract a positive from a negative. */
3737 /* In either case, ADD their magnitudes, */
3738 /* and use the sign of the first number. */
3740 res = s_mp_add (a, b, c);
3742 /* subtract a positive from a positive, OR */
3743 /* subtract a negative from a negative. */
3744 /* First, take the difference between their */
3745 /* magnitudes, then... */
3746 if (mp_cmp_mag (a, b) != MP_LT) {
3747 /* Copy the sign from the first */
3749 /* The first has a larger or equal magnitude */
3750 res = s_mp_sub (a, b, c);
3752 /* The result has the *opposite* sign from */
3753 /* the first number. */
3754 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
3755 /* The second has a larger magnitude */
3756 res = s_mp_sub (b, a, c);
3762 /* single digit subtraction */
3764 mp_sub_d (mp_int * a, mp_digit b, mp_int * c)
3766 mp_digit *tmpa, *tmpc, mu;
3767 int res, ix, oldused;
3769 /* grow c as required */
3770 if (c->alloc < a->used + 1) {
3771 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3776 /* if a is negative just do an unsigned
3777 * addition [with fudged signs]
3779 if (a->sign == MP_NEG) {
3781 res = mp_add_d(a, b, c);
3782 a->sign = c->sign = MP_NEG;
3791 /* if a <= b simply fix the single digit */
3792 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
3794 *tmpc++ = b - *tmpa;
3800 /* negative/1digit */
3808 /* subtract first digit */
3809 *tmpc = *tmpa++ - b;
3810 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3813 /* handle rest of the digits */
3814 for (ix = 1; ix < a->used; ix++) {
3815 *tmpc = *tmpa++ - mu;
3816 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3821 /* zero excess digits */
3822 while (ix++ < oldused) {
3829 /* store in unsigned [big endian] format */
3831 mp_to_unsigned_bin (const mp_int * a, unsigned char *b)
3836 if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
3841 while (mp_iszero (&t) == 0) {
3842 b[x++] = (unsigned char) (t.dp[0] & 255);
3843 if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
3853 /* get the size for an unsigned equivalent */
3855 mp_unsigned_bin_size (const mp_int * a)
3857 int size = mp_count_bits (a);
3858 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
3861 /* reverse an array, used for radix code */
3863 bn_reverse (unsigned char *s, int len)
3879 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
3881 s_mp_add (mp_int * a, mp_int * b, mp_int * c)
3884 int olduse, res, min, max;
3886 /* find sizes, we let |a| <= |b| which means we have to sort
3887 * them. "x" will point to the input with the most digits
3889 if (a->used > b->used) {
3900 if (c->alloc < max + 1) {
3901 if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
3906 /* get old used digit count and set new one */
3911 register mp_digit u, *tmpa, *tmpb, *tmpc;
3914 /* alias for digit pointers */
3925 /* zero the carry */
3927 for (i = 0; i < min; i++) {
3928 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
3929 *tmpc = *tmpa++ + *tmpb++ + u;
3931 /* U = carry bit of T[i] */
3932 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3934 /* take away carry bit from T[i] */
3938 /* now copy higher words if any, that is in A+B
3939 * if A or B has more digits add those in
3942 for (; i < max; i++) {
3943 /* T[i] = X[i] + U */
3944 *tmpc = x->dp[i] + u;
3946 /* U = carry bit of T[i] */
3947 u = *tmpc >> ((mp_digit)DIGIT_BIT);
3949 /* take away carry bit from T[i] */
3957 /* clear digits above oldused */
3958 for (i = c->used; i < olduse; i++) {
3967 static int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
3969 mp_int M[256], res, mu;
3971 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3973 /* find window size */
3974 x = mp_count_bits (X);
3977 } else if (x <= 36) {
3979 } else if (x <= 140) {
3981 } else if (x <= 450) {
3983 } else if (x <= 1303) {
3985 } else if (x <= 3529) {
3992 /* init first cell */
3993 if ((err = mp_init(&M[1])) != MP_OKAY) {
3997 /* now init the second half of the array */
3998 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3999 if ((err = mp_init(&M[x])) != MP_OKAY) {
4000 for (y = 1<<(winsize-1); y < x; y++) {
4008 /* create mu, used for Barrett reduction */
4009 if ((err = mp_init (&mu)) != MP_OKAY) {
4012 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
4018 * The M table contains powers of the base,
4019 * e.g. M[x] = G**x mod P
4021 * The first half of the table is not
4022 * computed though accept for M[0] and M[1]
4024 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
4028 /* compute the value at M[1<<(winsize-1)] by squaring
4029 * M[1] (winsize-1) times
4031 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
4035 for (x = 0; x < (winsize - 1); x++) {
4036 if ((err = mp_sqr (&M[1 << (winsize - 1)],
4037 &M[1 << (winsize - 1)])) != MP_OKAY) {
4040 if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
4045 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
4046 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
4048 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
4049 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
4052 if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
4058 if ((err = mp_init (&res)) != MP_OKAY) {
4063 /* set initial mode and bit cnt */
4067 digidx = X->used - 1;
4072 /* grab next digit as required */
4073 if (--bitcnt == 0) {
4074 /* if digidx == -1 we are out of digits */
4078 /* read next digit and reset the bitcnt */
4079 buf = X->dp[digidx--];
4083 /* grab the next msb from the exponent */
4084 y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
4085 buf <<= (mp_digit)1;
4087 /* if the bit is zero and mode == 0 then we ignore it
4088 * These represent the leading zero bits before the first 1 bit
4089 * in the exponent. Technically this opt is not required but it
4090 * does lower the # of trivial squaring/reductions used
4092 if (mode == 0 && y == 0) {
4096 /* if the bit is zero and mode == 1 then we square */
4097 if (mode == 1 && y == 0) {
4098 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4101 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4107 /* else we add it to the window */
4108 bitbuf |= (y << (winsize - ++bitcpy));
4111 if (bitcpy == winsize) {
4112 /* ok window is filled so square as required and multiply */
4114 for (x = 0; x < winsize; x++) {
4115 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4118 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4124 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
4127 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4131 /* empty window and reset */
4138 /* if bits remain then square/multiply */
4139 if (mode == 2 && bitcpy > 0) {
4140 /* square then multiply if the bit is set */
4141 for (x = 0; x < bitcpy; x++) {
4142 if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4145 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4150 if ((bitbuf & (1 << winsize)) != 0) {
4152 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
4155 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4164 __RES:mp_clear (&res);
4165 __MU:mp_clear (&mu);
4168 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4174 /* multiplies |a| * |b| and only computes up to digs digits of result
4175 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
4176 * many digits of output are created.
4179 s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4182 int res, pa, pb, ix, iy;
4185 mp_digit tmpx, *tmpt, *tmpy;
4187 /* can we use the fast multiplier? */
4188 if (((digs) < MP_WARRAY) &&
4189 MIN (a->used, b->used) <
4190 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4191 return fast_s_mp_mul_digs (a, b, c, digs);
4194 if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
4199 /* compute the digits of the product directly */
4201 for (ix = 0; ix < pa; ix++) {
4202 /* set the carry to zero */
4205 /* limit ourselves to making digs digits of output */
4206 pb = MIN (b->used, digs - ix);
4208 /* setup some aliases */
4209 /* copy of the digit from a used within the nested loop */
4212 /* an alias for the destination shifted ix places */
4215 /* an alias for the digits of b */
4218 /* compute the columns of the output and propagate the carry */
4219 for (iy = 0; iy < pb; iy++) {
4220 /* compute the column as a mp_word */
4221 r = ((mp_word)*tmpt) +
4222 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4225 /* the new column is the lower part of the result */
4226 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4228 /* get the carry word from the result */
4229 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4231 /* set carry if it is placed below digs */
4232 if (ix + iy < digs) {
4244 /* multiplies |a| * |b| and does not compute the lower digs digits
4245 * [meant to get the higher part of the product]
4248 s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4251 int res, pa, pb, ix, iy;
4254 mp_digit tmpx, *tmpt, *tmpy;
4256 /* can we use the fast multiplier? */
4257 if (((a->used + b->used + 1) < MP_WARRAY)
4258 && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4259 return fast_s_mp_mul_high_digs (a, b, c, digs);
4262 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
4265 t.used = a->used + b->used + 1;
4269 for (ix = 0; ix < pa; ix++) {
4270 /* clear the carry */
4273 /* left hand side of A[ix] * B[iy] */
4276 /* alias to the address of where the digits will be stored */
4277 tmpt = &(t.dp[digs]);
4279 /* alias for where to read the right hand side from */
4280 tmpy = b->dp + (digs - ix);
4282 for (iy = digs - ix; iy < pb; iy++) {
4283 /* calculate the double precision result */
4284 r = ((mp_word)*tmpt) +
4285 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4288 /* get the lower part */
4289 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4291 /* carry the carry */
4292 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4302 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
4304 s_mp_sqr (const mp_int * a, mp_int * b)
4307 int res, ix, iy, pa;
4309 mp_digit u, tmpx, *tmpt;
4312 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
4316 /* default used is maximum possible size */
4319 for (ix = 0; ix < pa; ix++) {
4320 /* first calculate the digit at 2*ix */
4321 /* calculate double precision result */
4322 r = ((mp_word) t.dp[2*ix]) +
4323 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
4325 /* store lower part in result */
4326 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
4329 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4331 /* left hand side of A[ix] * A[iy] */
4334 /* alias for where to store the results */
4335 tmpt = t.dp + (2*ix + 1);
4337 for (iy = ix + 1; iy < pa; iy++) {
4338 /* first calculate the product */
4339 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
4341 /* now calculate the double precision result, note we use
4342 * addition instead of *2 since it's easier to optimize
4344 r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
4346 /* store lower part */
4347 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4350 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4352 /* propagate upwards */
4354 r = ((mp_word) *tmpt) + ((mp_word) u);
4355 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4356 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4366 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
4368 s_mp_sub (const mp_int * a, const mp_int * b, mp_int * c)
4370 int olduse, res, min, max;
4377 if (c->alloc < max) {
4378 if ((res = mp_grow (c, max)) != MP_OKAY) {
4386 register mp_digit u, *tmpa, *tmpb, *tmpc;
4389 /* alias for digit pointers */
4394 /* set carry to zero */
4396 for (i = 0; i < min; i++) {
4397 /* T[i] = A[i] - B[i] - U */
4398 *tmpc = *tmpa++ - *tmpb++ - u;
4400 /* U = carry bit of T[i]
4401 * Note this saves performing an AND operation since
4402 * if a carry does occur it will propagate all the way to the
4403 * MSB. As a result a single shift is enough to get the carry
4405 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4407 /* Clear carry from T[i] */
4411 /* now copy higher words if any, e.g. if A has more digits than B */
4412 for (; i < max; i++) {
4413 /* T[i] = A[i] - U */
4414 *tmpc = *tmpa++ - u;
4416 /* U = carry bit of T[i] */
4417 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4419 /* Clear carry from T[i] */
4423 /* clear digits above used (since we may not have grown result above) */
4424 for (i = c->used; i < olduse; i++) {