libtommath.c

Go to the documentation of this file.
00001 
00017 #ifndef CHAR_BIT
00018 #define CHAR_BIT 8
00019 #endif
00020 
00021 #define BN_MP_INVMOD_C
00022 #define BN_S_MP_EXPTMOD_C /* Note: #undef in tommath_superclass.h; this would
00023                            * require BN_MP_EXPTMOD_FAST_C instead */
00024 #define BN_S_MP_MUL_DIGS_C
00025 #define BN_MP_INVMOD_SLOW_C
00026 #define BN_S_MP_SQR_C
00027 #define BN_S_MP_MUL_HIGH_DIGS_C /* Note: #undef in tommath_superclass.h; this
00028                                  * would require other than mp_reduce */
00029 
00030 #ifdef LTM_FAST
00031 
00032 /* Use faster div at the cost of about 1 kB */
00033 #define BN_MP_MUL_D_C
00034 
00035 /* Include faster exptmod (Montgomery) at the cost of about 2.5 kB in code */
00036 #define BN_MP_EXPTMOD_FAST_C
00037 #define BN_MP_MONTGOMERY_SETUP_C
00038 #define BN_FAST_MP_MONTGOMERY_REDUCE_C
00039 #define BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
00040 #define BN_MP_MUL_2_C
00041 
00042 /* Include faster sqr at the cost of about 0.5 kB in code */
00043 #define BN_FAST_S_MP_SQR_C
00044 
00045 #else /* LTM_FAST */
00046 
00047 #define BN_MP_DIV_SMALL
00048 #define BN_MP_INIT_MULTI_C
00049 #define BN_MP_CLEAR_MULTI_C
00050 #define BN_MP_ABS_C
00051 #endif /* LTM_FAST */
00052 
00053 /* Current uses do not require support for negative exponent in exptmod, so we
00054  * can save about 1.5 kB in leaving out invmod. */
00055 #define LTM_NO_NEG_EXP
00056 
00057 /* from tommath.h */
00058 
00059 #ifndef MIN
00060    #define MIN(x,y) ((x)<(y)?(x):(y))
00061 #endif
00062 
00063 #ifndef MAX
00064    #define MAX(x,y) ((x)>(y)?(x):(y))
00065 #endif
00066 
00067 #define  OPT_CAST(x)
00068 
00069 typedef unsigned long mp_digit;
00070 typedef u64 mp_word;
00071 
00072 #define DIGIT_BIT          28
00073 #define MP_28BIT
00074 
00075 
00076 #define XMALLOC  os_malloc
00077 #define XFREE    os_free
00078 #define XREALLOC os_realloc
00079 
00080 
00081 #define MP_MASK          ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
00082 
00083 #define MP_LT        -1   /* less than */
00084 #define MP_EQ         0   /* equal to */
00085 #define MP_GT         1   /* greater than */
00086 
00087 #define MP_ZPOS       0   /* positive integer */
00088 #define MP_NEG        1   /* negative */
00089 
00090 #define MP_OKAY       0   /* ok result */
00091 #define MP_MEM        -2  /* out of mem */
00092 #define MP_VAL        -3  /* invalid input */
00093 
00094 #define MP_YES        1   /* yes response */
00095 #define MP_NO         0   /* no response */
00096 
00097 typedef int           mp_err;
00098 
00099 /* define this to use lower memory usage routines (exptmods mostly) */
00100 #define MP_LOW_MEM
00101 
00102 /* default precision */
00103 #ifndef MP_PREC
00104    #ifndef MP_LOW_MEM
00105       #define MP_PREC                 32     /* default digits of precision */
00106    #else
00107       #define MP_PREC                 8      /* default digits of precision */
00108    #endif   
00109 #endif
00110 
00111 /* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */
00112 #define MP_WARRAY               (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
00113 
00114 /* the infamous mp_int structure */
00115 typedef struct  {
00116     int used, alloc, sign;
00117     mp_digit *dp;
00118 } mp_int;
00119 
00120 
00121 /* ---> Basic Manipulations <--- */
00122 #define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
00123 #define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO)
00124 #define mp_isodd(a)  (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO)
00125 
00126 
00127 /* prototypes for copied functions */
00128 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
00129 static int s_mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
00130 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
00131 static int s_mp_sqr(mp_int * a, mp_int * b);
00132 static int s_mp_mul_high_digs(mp_int * a, mp_int * b, mp_int * c, int digs);
00133 
00134 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
00135 
00136 #ifdef BN_MP_INIT_MULTI_C
00137 static int mp_init_multi(mp_int *mp, ...);
00138 #endif
00139 #ifdef BN_MP_CLEAR_MULTI_C
00140 static void mp_clear_multi(mp_int *mp, ...);
00141 #endif
00142 static int mp_lshd(mp_int * a, int b);
00143 static void mp_set(mp_int * a, mp_digit b);
00144 static void mp_clamp(mp_int * a);
00145 static void mp_exch(mp_int * a, mp_int * b);
00146 static void mp_rshd(mp_int * a, int b);
00147 static void mp_zero(mp_int * a);
00148 static int mp_mod_2d(mp_int * a, int b, mp_int * c);
00149 static int mp_div_2d(mp_int * a, int b, mp_int * c, mp_int * d);
00150 static int mp_init_copy(mp_int * a, mp_int * b);
00151 static int mp_mul_2d(mp_int * a, int b, mp_int * c);
00152 #ifndef LTM_NO_NEG_EXP
00153 static int mp_div_2(mp_int * a, mp_int * b);
00154 static int mp_invmod(mp_int * a, mp_int * b, mp_int * c);
00155 static int mp_invmod_slow(mp_int * a, mp_int * b, mp_int * c);
00156 #endif /* LTM_NO_NEG_EXP */
00157 static int mp_copy(mp_int * a, mp_int * b);
00158 static int mp_count_bits(mp_int * a);
00159 static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d);
00160 static int mp_mod(mp_int * a, mp_int * b, mp_int * c);
00161 static int mp_grow(mp_int * a, int size);
00162 static int mp_cmp_mag(mp_int * a, mp_int * b);
00163 #ifdef BN_MP_ABS_C
00164 static int mp_abs(mp_int * a, mp_int * b);
00165 #endif
00166 static int mp_sqr(mp_int * a, mp_int * b);
00167 static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d);
00168 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d);
00169 static int mp_2expt(mp_int * a, int b);
00170 static int mp_reduce_setup(mp_int * a, mp_int * b);
00171 static int mp_reduce(mp_int * x, mp_int * m, mp_int * mu);
00172 static int mp_init_size(mp_int * a, int size);
00173 #ifdef BN_MP_EXPTMOD_FAST_C
00174 static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
00175 #endif /* BN_MP_EXPTMOD_FAST_C */
00176 #ifdef BN_FAST_S_MP_SQR_C
00177 static int fast_s_mp_sqr (mp_int * a, mp_int * b);
00178 #endif /* BN_FAST_S_MP_SQR_C */
00179 #ifdef BN_MP_MUL_D_C
00180 static int mp_mul_d (mp_int * a, mp_digit b, mp_int * c);
00181 #endif /* BN_MP_MUL_D_C */
00182 
00183 
00184 
00185 /* functions from bn_<func name>.c */
00186 
00187 
00188 /* reverse an array, used for radix code */
00189 static void bn_reverse (unsigned char *s, int len)
00190 {
00191   int     ix, iy;
00192   unsigned char t;
00193 
00194   ix = 0;
00195   iy = len - 1;
00196   while (ix < iy) {
00197     t     = s[ix];
00198     s[ix] = s[iy];
00199     s[iy] = t;
00200     ++ix;
00201     --iy;
00202   }
00203 }
00204 
00205 
00206 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
00207 static int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
00208 {
00209   mp_int *x;
00210   int     olduse, res, min, max;
00211 
00212   /* find sizes, we let |a| <= |b| which means we have to sort
00213    * them.  "x" will point to the input with the most digits
00214    */
00215   if (a->used > b->used) {
00216     min = b->used;
00217     max = a->used;
00218     x = a;
00219   } else {
00220     min = a->used;
00221     max = b->used;
00222     x = b;
00223   }
00224 
00225   /* init result */
00226   if (c->alloc < max + 1) {
00227     if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
00228       return res;
00229     }
00230   }
00231 
00232   /* get old used digit count and set new one */
00233   olduse = c->used;
00234   c->used = max + 1;
00235 
00236   {
00237     register mp_digit u, *tmpa, *tmpb, *tmpc;
00238     register int i;
00239 
00240     /* alias for digit pointers */
00241 
00242     /* first input */
00243     tmpa = a->dp;
00244 
00245     /* second input */
00246     tmpb = b->dp;
00247 
00248     /* destination */
00249     tmpc = c->dp;
00250 
00251     /* zero the carry */
00252     u = 0;
00253     for (i = 0; i < min; i++) {
00254       /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
00255       *tmpc = *tmpa++ + *tmpb++ + u;
00256 
00257       /* U = carry bit of T[i] */
00258       u = *tmpc >> ((mp_digit)DIGIT_BIT);
00259 
00260       /* take away carry bit from T[i] */
00261       *tmpc++ &= MP_MASK;
00262     }
00263 
00264     /* now copy higher words if any, that is in A+B 
00265      * if A or B has more digits add those in 
00266      */
00267     if (min != max) {
00268       for (; i < max; i++) {
00269         /* T[i] = X[i] + U */
00270         *tmpc = x->dp[i] + u;
00271 
00272         /* U = carry bit of T[i] */
00273         u = *tmpc >> ((mp_digit)DIGIT_BIT);
00274 
00275         /* take away carry bit from T[i] */
00276         *tmpc++ &= MP_MASK;
00277       }
00278     }
00279 
00280     /* add carry */
00281     *tmpc++ = u;
00282 
00283     /* clear digits above oldused */
00284     for (i = c->used; i < olduse; i++) {
00285       *tmpc++ = 0;
00286     }
00287   }
00288 
00289   mp_clamp (c);
00290   return MP_OKAY;
00291 }
00292 
00293 
00294 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
00295 static int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
00296 {
00297   int     olduse, res, min, max;
00298 
00299   /* find sizes */
00300   min = b->used;
00301   max = a->used;
00302 
00303   /* init result */
00304   if (c->alloc < max) {
00305     if ((res = mp_grow (c, max)) != MP_OKAY) {
00306       return res;
00307     }
00308   }
00309   olduse = c->used;
00310   c->used = max;
00311 
00312   {
00313     register mp_digit u, *tmpa, *tmpb, *tmpc;
00314     register int i;
00315 
00316     /* alias for digit pointers */
00317     tmpa = a->dp;
00318     tmpb = b->dp;
00319     tmpc = c->dp;
00320 
00321     /* set carry to zero */
00322     u = 0;
00323     for (i = 0; i < min; i++) {
00324       /* T[i] = A[i] - B[i] - U */
00325       *tmpc = *tmpa++ - *tmpb++ - u;
00326 
00327       /* U = carry bit of T[i]
00328        * Note this saves performing an AND operation since
00329        * if a carry does occur it will propagate all the way to the
00330        * MSB.  As a result a single shift is enough to get the carry
00331        */
00332       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
00333 
00334       /* Clear carry from T[i] */
00335       *tmpc++ &= MP_MASK;
00336     }
00337 
00338     /* now copy higher words if any, e.g. if A has more digits than B  */
00339     for (; i < max; i++) {
00340       /* T[i] = A[i] - U */
00341       *tmpc = *tmpa++ - u;
00342 
00343       /* U = carry bit of T[i] */
00344       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
00345 
00346       /* Clear carry from T[i] */
00347       *tmpc++ &= MP_MASK;
00348     }
00349 
00350     /* clear digits above used (since we may not have grown result above) */
00351     for (i = c->used; i < olduse; i++) {
00352       *tmpc++ = 0;
00353     }
00354   }
00355 
00356   mp_clamp (c);
00357   return MP_OKAY;
00358 }
00359 
00360 
00361 /* init a new mp_int */
00362 static int mp_init (mp_int * a)
00363 {
00364   int i;
00365 
00366   /* allocate memory required and clear it */
00367   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
00368   if (a->dp == NULL) {
00369     return MP_MEM;
00370   }
00371 
00372   /* set the digits to zero */
00373   for (i = 0; i < MP_PREC; i++) {
00374       a->dp[i] = 0;
00375   }
00376 
00377   /* set the used to zero, allocated digits to the default precision
00378    * and sign to positive */
00379   a->used  = 0;
00380   a->alloc = MP_PREC;
00381   a->sign  = MP_ZPOS;
00382 
00383   return MP_OKAY;
00384 }
00385 
00386 
00387 /* clear one (frees)  */
00388 static void mp_clear (mp_int * a)
00389 {
00390   int i;
00391 
00392   /* only do anything if a hasn't been freed previously */
00393   if (a->dp != NULL) {
00394     /* first zero the digits */
00395     for (i = 0; i < a->used; i++) {
00396         a->dp[i] = 0;
00397     }
00398 
00399     /* free ram */
00400     XFREE(a->dp);
00401 
00402     /* reset members to make debugging easier */
00403     a->dp    = NULL;
00404     a->alloc = a->used = 0;
00405     a->sign  = MP_ZPOS;
00406   }
00407 }
00408 
00409 
00410 /* high level addition (handles signs) */
00411 static int mp_add (mp_int * a, mp_int * b, mp_int * c)
00412 {
00413   int     sa, sb, res;
00414 
00415   /* get sign of both inputs */
00416   sa = a->sign;
00417   sb = b->sign;
00418 
00419   /* handle two cases, not four */
00420   if (sa == sb) {
00421     /* both positive or both negative */
00422     /* add their magnitudes, copy the sign */
00423     c->sign = sa;
00424     res = s_mp_add (a, b, c);
00425   } else {
00426     /* one positive, the other negative */
00427     /* subtract the one with the greater magnitude from */
00428     /* the one of the lesser magnitude.  The result gets */
00429     /* the sign of the one with the greater magnitude. */
00430     if (mp_cmp_mag (a, b) == MP_LT) {
00431       c->sign = sb;
00432       res = s_mp_sub (b, a, c);
00433     } else {
00434       c->sign = sa;
00435       res = s_mp_sub (a, b, c);
00436     }
00437   }
00438   return res;
00439 }
00440 
00441 
00442 /* high level subtraction (handles signs) */
00443 static int mp_sub (mp_int * a, mp_int * b, mp_int * c)
00444 {
00445   int     sa, sb, res;
00446 
00447   sa = a->sign;
00448   sb = b->sign;
00449 
00450   if (sa != sb) {
00451     /* subtract a negative from a positive, OR */
00452     /* subtract a positive from a negative. */
00453     /* In either case, ADD their magnitudes, */
00454     /* and use the sign of the first number. */
00455     c->sign = sa;
00456     res = s_mp_add (a, b, c);
00457   } else {
00458     /* subtract a positive from a positive, OR */
00459     /* subtract a negative from a negative. */
00460     /* First, take the difference between their */
00461     /* magnitudes, then... */
00462     if (mp_cmp_mag (a, b) != MP_LT) {
00463       /* Copy the sign from the first */
00464       c->sign = sa;
00465       /* The first has a larger or equal magnitude */
00466       res = s_mp_sub (a, b, c);
00467     } else {
00468       /* The result has the *opposite* sign from */
00469       /* the first number. */
00470       c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
00471       /* The second has a larger magnitude */
00472       res = s_mp_sub (b, a, c);
00473     }
00474   }
00475   return res;
00476 }
00477 
00478 
00479 /* high level multiplication (handles sign) */
00480 static int mp_mul (mp_int * a, mp_int * b, mp_int * c)
00481 {
00482   int     res, neg;
00483   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
00484 
00485   /* use Toom-Cook? */
00486 #ifdef BN_MP_TOOM_MUL_C
00487   if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
00488     res = mp_toom_mul(a, b, c);
00489   } else 
00490 #endif
00491 #ifdef BN_MP_KARATSUBA_MUL_C
00492   /* use Karatsuba? */
00493   if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
00494     res = mp_karatsuba_mul (a, b, c);
00495   } else 
00496 #endif
00497   {
00498     /* can we use the fast multiplier?
00499      *
00500      * The fast multiplier can be used if the output will 
00501      * have less than MP_WARRAY digits and the number of 
00502      * digits won't affect carry propagation
00503      */
00504 #ifdef BN_FAST_S_MP_MUL_DIGS_C
00505     int     digs = a->used + b->used + 1;
00506 
00507     if ((digs < MP_WARRAY) &&
00508         MIN(a->used, b->used) <= 
00509         (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
00510       res = fast_s_mp_mul_digs (a, b, c, digs);
00511     } else 
00512 #endif
00513 #ifdef BN_S_MP_MUL_DIGS_C
00514       res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
00515 #else
00516 #error mp_mul could fail
00517       res = MP_VAL;
00518 #endif
00519 
00520   }
00521   c->sign = (c->used > 0) ? neg : MP_ZPOS;
00522   return res;
00523 }
00524 
00525 
00526 /* d = a * b (mod c) */
00527 static int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
00528 {
00529   int     res;
00530   mp_int  t;
00531 
00532   if ((res = mp_init (&t)) != MP_OKAY) {
00533     return res;
00534   }
00535 
00536   if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
00537     mp_clear (&t);
00538     return res;
00539   }
00540   res = mp_mod (&t, c, d);
00541   mp_clear (&t);
00542   return res;
00543 }
00544 
00545 
00546 /* c = a mod b, 0 <= c < b */
00547 static int mp_mod (mp_int * a, mp_int * b, mp_int * c)
00548 {
00549   mp_int  t;
00550   int     res;
00551 
00552   if ((res = mp_init (&t)) != MP_OKAY) {
00553     return res;
00554   }
00555 
00556   if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
00557     mp_clear (&t);
00558     return res;
00559   }
00560 
00561   if (t.sign != b->sign) {
00562     res = mp_add (b, &t, c);
00563   } else {
00564     res = MP_OKAY;
00565     mp_exch (&t, c);
00566   }
00567 
00568   mp_clear (&t);
00569   return res;
00570 }
00571 
00572 
00573 /* this is a shell function that calls either the normal or Montgomery
00574  * exptmod functions.  Originally the call to the montgomery code was
00575  * embedded in the normal function but that wasted alot of stack space
00576  * for nothing (since 99% of the time the Montgomery code would be called)
00577  */
00578 static int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
00579 {
00580   int dr;
00581 
00582   /* modulus P must be positive */
00583   if (P->sign == MP_NEG) {
00584      return MP_VAL;
00585   }
00586 
00587   /* if exponent X is negative we have to recurse */
00588   if (X->sign == MP_NEG) {
00589 #ifdef LTM_NO_NEG_EXP
00590         return MP_VAL;
00591 #else /* LTM_NO_NEG_EXP */
00592 #ifdef BN_MP_INVMOD_C
00593      mp_int tmpG, tmpX;
00594      int err;
00595 
00596      /* first compute 1/G mod P */
00597      if ((err = mp_init(&tmpG)) != MP_OKAY) {
00598         return err;
00599      }
00600      if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
00601         mp_clear(&tmpG);
00602         return err;
00603      }
00604 
00605      /* now get |X| */
00606      if ((err = mp_init(&tmpX)) != MP_OKAY) {
00607         mp_clear(&tmpG);
00608         return err;
00609      }
00610      if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
00611         mp_clear_multi(&tmpG, &tmpX, NULL);
00612         return err;
00613      }
00614 
00615      /* and now compute (1/G)**|X| instead of G**X [X < 0] */
00616      err = mp_exptmod(&tmpG, &tmpX, P, Y);
00617      mp_clear_multi(&tmpG, &tmpX, NULL);
00618      return err;
00619 #else 
00620 #error mp_exptmod would always fail
00621      /* no invmod */
00622      return MP_VAL;
00623 #endif
00624 #endif /* LTM_NO_NEG_EXP */
00625   }
00626 
00627 /* modified diminished radix reduction */
00628 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C)
00629   if (mp_reduce_is_2k_l(P) == MP_YES) {
00630      return s_mp_exptmod(G, X, P, Y, 1);
00631   }
00632 #endif
00633 
00634 #ifdef BN_MP_DR_IS_MODULUS_C
00635   /* is it a DR modulus? */
00636   dr = mp_dr_is_modulus(P);
00637 #else
00638   /* default to no */
00639   dr = 0;
00640 #endif
00641 
00642 #ifdef BN_MP_REDUCE_IS_2K_C
00643   /* if not, is it a unrestricted DR modulus? */
00644   if (dr == 0) {
00645      dr = mp_reduce_is_2k(P) << 1;
00646   }
00647 #endif
00648     
00649   /* if the modulus is odd or dr != 0 use the montgomery method */
00650 #ifdef BN_MP_EXPTMOD_FAST_C
00651   if (mp_isodd (P) == 1 || dr !=  0) {
00652     return mp_exptmod_fast (G, X, P, Y, dr);
00653   } else {
00654 #endif
00655 #ifdef BN_S_MP_EXPTMOD_C
00656     /* otherwise use the generic Barrett reduction technique */
00657     return s_mp_exptmod (G, X, P, Y, 0);
00658 #else
00659 #error mp_exptmod could fail
00660     /* no exptmod for evens */
00661     return MP_VAL;
00662 #endif
00663 #ifdef BN_MP_EXPTMOD_FAST_C
00664   }
00665 #endif
00666 }
00667 
00668 
00669 /* compare two ints (signed)*/
00670 static int mp_cmp (mp_int * a, mp_int * b)
00671 {
00672   /* compare based on sign */
00673   if (a->sign != b->sign) {
00674      if (a->sign == MP_NEG) {
00675         return MP_LT;
00676      } else {
00677         return MP_GT;
00678      }
00679   }
00680   
00681   /* compare digits */
00682   if (a->sign == MP_NEG) {
00683      /* if negative compare opposite direction */
00684      return mp_cmp_mag(b, a);
00685   } else {
00686      return mp_cmp_mag(a, b);
00687   }
00688 }
00689 
00690 
00691 /* compare a digit */
00692 static int mp_cmp_d(mp_int * a, mp_digit b)
00693 {
00694   /* compare based on sign */
00695   if (a->sign == MP_NEG) {
00696     return MP_LT;
00697   }
00698 
00699   /* compare based on magnitude */
00700   if (a->used > 1) {
00701     return MP_GT;
00702   }
00703 
00704   /* compare the only digit of a to b */
00705   if (a->dp[0] > b) {
00706     return MP_GT;
00707   } else if (a->dp[0] < b) {
00708     return MP_LT;
00709   } else {
00710     return MP_EQ;
00711   }
00712 }
00713 
00714 
00715 #ifndef LTM_NO_NEG_EXP
00716 /* hac 14.61, pp608 */
00717 static int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
00718 {
00719   /* b cannot be negative */
00720   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
00721     return MP_VAL;
00722   }
00723 
00724 #ifdef BN_FAST_MP_INVMOD_C
00725   /* if the modulus is odd we can use a faster routine instead */
00726   if (mp_isodd (b) == 1) {
00727     return fast_mp_invmod (a, b, c);
00728   }
00729 #endif
00730 
00731 #ifdef BN_MP_INVMOD_SLOW_C
00732   return mp_invmod_slow(a, b, c);
00733 #endif
00734 
00735 #ifndef BN_FAST_MP_INVMOD_C
00736 #ifndef BN_MP_INVMOD_SLOW_C
00737 #error mp_invmod would always fail
00738 #endif
00739 #endif
00740   return MP_VAL;
00741 }
00742 #endif /* LTM_NO_NEG_EXP */
00743 
00744 
00745 /* get the size for an unsigned equivalent */
00746 static int mp_unsigned_bin_size (mp_int * a)
00747 {
00748   int     size = mp_count_bits (a);
00749   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
00750 }
00751 
00752 
00753 #ifndef LTM_NO_NEG_EXP
00754 /* hac 14.61, pp608 */
00755 static int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
00756 {
00757   mp_int  x, y, u, v, A, B, C, D;
00758   int     res;
00759 
00760   /* b cannot be negative */
00761   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
00762     return MP_VAL;
00763   }
00764 
00765   /* init temps */
00766   if ((res = mp_init_multi(&x, &y, &u, &v, 
00767                            &A, &B, &C, &D, NULL)) != MP_OKAY) {
00768      return res;
00769   }
00770 
00771   /* x = a, y = b */
00772   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
00773       goto LBL_ERR;
00774   }
00775   if ((res = mp_copy (b, &y)) != MP_OKAY) {
00776     goto LBL_ERR;
00777   }
00778 
00779   /* 2. [modified] if x,y are both even then return an error! */
00780   if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
00781     res = MP_VAL;
00782     goto LBL_ERR;
00783   }
00784 
00785   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
00786   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
00787     goto LBL_ERR;
00788   }
00789   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
00790     goto LBL_ERR;
00791   }
00792   mp_set (&A, 1);
00793   mp_set (&D, 1);
00794 
00795 top:
00796   /* 4.  while u is even do */
00797   while (mp_iseven (&u) == 1) {
00798     /* 4.1 u = u/2 */
00799     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
00800       goto LBL_ERR;
00801     }
00802     /* 4.2 if A or B is odd then */
00803     if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
00804       /* A = (A+y)/2, B = (B-x)/2 */
00805       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
00806          goto LBL_ERR;
00807       }
00808       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
00809          goto LBL_ERR;
00810       }
00811     }
00812     /* A = A/2, B = B/2 */
00813     if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
00814       goto LBL_ERR;
00815     }
00816     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
00817       goto LBL_ERR;
00818     }
00819   }
00820 
00821   /* 5.  while v is even do */
00822   while (mp_iseven (&v) == 1) {
00823     /* 5.1 v = v/2 */
00824     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
00825       goto LBL_ERR;
00826     }
00827     /* 5.2 if C or D is odd then */
00828     if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
00829       /* C = (C+y)/2, D = (D-x)/2 */
00830       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
00831          goto LBL_ERR;
00832       }
00833       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
00834          goto LBL_ERR;
00835       }
00836     }
00837     /* C = C/2, D = D/2 */
00838     if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
00839       goto LBL_ERR;
00840     }
00841     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
00842       goto LBL_ERR;
00843     }
00844   }
00845 
00846   /* 6.  if u >= v then */
00847   if (mp_cmp (&u, &v) != MP_LT) {
00848     /* u = u - v, A = A - C, B = B - D */
00849     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
00850       goto LBL_ERR;
00851     }
00852 
00853     if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
00854       goto LBL_ERR;
00855     }
00856 
00857     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
00858       goto LBL_ERR;
00859     }
00860   } else {
00861     /* v - v - u, C = C - A, D = D - B */
00862     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
00863       goto LBL_ERR;
00864     }
00865 
00866     if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
00867       goto LBL_ERR;
00868     }
00869 
00870     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
00871       goto LBL_ERR;
00872     }
00873   }
00874 
00875   /* if not zero goto step 4 */
00876   if (mp_iszero (&u) == 0)
00877     goto top;
00878 
00879   /* now a = C, b = D, gcd == g*v */
00880 
00881   /* if v != 1 then there is no inverse */
00882   if (mp_cmp_d (&v, 1) != MP_EQ) {
00883     res = MP_VAL;
00884     goto LBL_ERR;
00885   }
00886 
00887   /* if its too low */
00888   while (mp_cmp_d(&C, 0) == MP_LT) {
00889       if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
00890          goto LBL_ERR;
00891       }
00892   }
00893   
00894   /* too big */
00895   while (mp_cmp_mag(&C, b) != MP_LT) {
00896       if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
00897          goto LBL_ERR;
00898       }
00899   }
00900   
00901   /* C is now the inverse */
00902   mp_exch (&C, c);
00903   res = MP_OKAY;
00904 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
00905   return res;
00906 }
00907 #endif /* LTM_NO_NEG_EXP */
00908 
00909 
00910 /* compare maginitude of two ints (unsigned) */
00911 static int mp_cmp_mag (mp_int * a, mp_int * b)
00912 {
00913   int     n;
00914   mp_digit *tmpa, *tmpb;
00915 
00916   /* compare based on # of non-zero digits */
00917   if (a->used > b->used) {
00918     return MP_GT;
00919   }
00920   
00921   if (a->used < b->used) {
00922     return MP_LT;
00923   }
00924 
00925   /* alias for a */
00926   tmpa = a->dp + (a->used - 1);
00927 
00928   /* alias for b */
00929   tmpb = b->dp + (a->used - 1);
00930 
00931   /* compare based on digits  */
00932   for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
00933     if (*tmpa > *tmpb) {
00934       return MP_GT;
00935     }
00936 
00937     if (*tmpa < *tmpb) {
00938       return MP_LT;
00939     }
00940   }
00941   return MP_EQ;
00942 }
00943 
00944 
00945 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
00946 static int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
00947 {
00948   int     res;
00949 
00950   /* make sure there are at least two digits */
00951   if (a->alloc < 2) {
00952      if ((res = mp_grow(a, 2)) != MP_OKAY) {
00953         return res;
00954      }
00955   }
00956 
00957   /* zero the int */
00958   mp_zero (a);
00959 
00960   /* read the bytes in */
00961   while (c-- > 0) {
00962     if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
00963       return res;
00964     }
00965 
00966 #ifndef MP_8BIT
00967       a->dp[0] |= *b++;
00968       a->used += 1;
00969 #else
00970       a->dp[0] = (*b & MP_MASK);
00971       a->dp[1] |= ((*b++ >> 7U) & 1);
00972       a->used += 2;
00973 #endif
00974   }
00975   mp_clamp (a);
00976   return MP_OKAY;
00977 }
00978 
00979 
00980 /* store in unsigned [big endian] format */
00981 static int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
00982 {
00983   int     x, res;
00984   mp_int  t;
00985 
00986   if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
00987     return res;
00988   }
00989 
00990   x = 0;
00991   while (mp_iszero (&t) == 0) {
00992 #ifndef MP_8BIT
00993       b[x++] = (unsigned char) (t.dp[0] & 255);
00994 #else
00995       b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
00996 #endif
00997     if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
00998       mp_clear (&t);
00999       return res;
01000     }
01001   }
01002   bn_reverse (b, x);
01003   mp_clear (&t);
01004   return MP_OKAY;
01005 }
01006 
01007 
01008 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
01009 static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
01010 {
01011   mp_digit D, r, rr;
01012   int     x, res;
01013   mp_int  t;
01014 
01015 
01016   /* if the shift count is <= 0 then we do no work */
01017   if (b <= 0) {
01018     res = mp_copy (a, c);
01019     if (d != NULL) {
01020       mp_zero (d);
01021     }
01022     return res;
01023   }
01024 
01025   if ((res = mp_init (&t)) != MP_OKAY) {
01026     return res;
01027   }
01028 
01029   /* get the remainder */
01030   if (d != NULL) {
01031     if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
01032       mp_clear (&t);
01033       return res;
01034     }
01035   }
01036 
01037   /* copy */
01038   if ((res = mp_copy (a, c)) != MP_OKAY) {
01039     mp_clear (&t);
01040     return res;
01041   }
01042 
01043   /* shift by as many digits in the bit count */
01044   if (b >= (int)DIGIT_BIT) {
01045     mp_rshd (c, b / DIGIT_BIT);
01046   }
01047 
01048   /* shift any bit count < DIGIT_BIT */
01049   D = (mp_digit) (b % DIGIT_BIT);
01050   if (D != 0) {
01051     register mp_digit *tmpc, mask, shift;
01052 
01053     /* mask */
01054     mask = (((mp_digit)1) << D) - 1;
01055 
01056     /* shift for lsb */
01057     shift = DIGIT_BIT - D;
01058 
01059     /* alias */
01060     tmpc = c->dp + (c->used - 1);
01061 
01062     /* carry */
01063     r = 0;
01064     for (x = c->used - 1; x >= 0; x--) {
01065       /* get the lower  bits of this word in a temp */
01066       rr = *tmpc & mask;
01067 
01068       /* shift the current word and mix in the carry bits from the previous word */
01069       *tmpc = (*tmpc >> D) | (r << shift);
01070       --tmpc;
01071 
01072       /* set the carry to the carry bits of the current word found above */
01073       r = rr;
01074     }
01075   }
01076   mp_clamp (c);
01077   if (d != NULL) {
01078     mp_exch (&t, d);
01079   }
01080   mp_clear (&t);
01081   return MP_OKAY;
01082 }
01083 
01084 
01085 static int mp_init_copy (mp_int * a, mp_int * b)
01086 {
01087   int     res;
01088 
01089   if ((res = mp_init (a)) != MP_OKAY) {
01090     return res;
01091   }
01092   return mp_copy (b, a);
01093 }
01094 
01095 
01096 /* set to zero */
01097 static void mp_zero (mp_int * a)
01098 {
01099   int       n;
01100   mp_digit *tmp;
01101 
01102   a->sign = MP_ZPOS;
01103   a->used = 0;
01104 
01105   tmp = a->dp;
01106   for (n = 0; n < a->alloc; n++) {
01107      *tmp++ = 0;
01108   }
01109 }
01110 
01111 
01112 /* copy, b = a */
01113 static int mp_copy (mp_int * a, mp_int * b)
01114 {
01115   int     res, n;
01116 
01117   /* if dst == src do nothing */
01118   if (a == b) {
01119     return MP_OKAY;
01120   }
01121 
01122   /* grow dest */
01123   if (b->alloc < a->used) {
01124      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
01125         return res;
01126      }
01127   }
01128 
01129   /* zero b and copy the parameters over */
01130   {
01131     register mp_digit *tmpa, *tmpb;
01132 
01133     /* pointer aliases */
01134 
01135     /* source */
01136     tmpa = a->dp;
01137 
01138     /* destination */
01139     tmpb = b->dp;
01140 
01141     /* copy all the digits */
01142     for (n = 0; n < a->used; n++) {
01143       *tmpb++ = *tmpa++;
01144     }
01145 
01146     /* clear high digits */
01147     for (; n < b->used; n++) {
01148       *tmpb++ = 0;
01149     }
01150   }
01151 
01152   /* copy used count and sign */
01153   b->used = a->used;
01154   b->sign = a->sign;
01155   return MP_OKAY;
01156 }
01157 
01158 
01159 /* shift right a certain amount of digits */
01160 static void mp_rshd (mp_int * a, int b)
01161 {
01162   int     x;
01163 
01164   /* if b <= 0 then ignore it */
01165   if (b <= 0) {
01166     return;
01167   }
01168 
01169   /* if b > used then simply zero it and return */
01170   if (a->used <= b) {
01171     mp_zero (a);
01172     return;
01173   }
01174 
01175   {
01176     register mp_digit *bottom, *top;
01177 
01178     /* shift the digits down */
01179 
01180     /* bottom */
01181     bottom = a->dp;
01182 
01183     /* top [offset into digits] */
01184     top = a->dp + b;
01185 
01186     /* this is implemented as a sliding window where 
01187      * the window is b-digits long and digits from 
01188      * the top of the window are copied to the bottom
01189      *
01190      * e.g.
01191 
01192      b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
01193                  /\                   |      ---->
01194                   \-------------------/      ---->
01195      */
01196     for (x = 0; x < (a->used - b); x++) {
01197       *bottom++ = *top++;
01198     }
01199 
01200     /* zero the top digits */
01201     for (; x < a->used; x++) {
01202       *bottom++ = 0;
01203     }
01204   }
01205   
01206   /* remove excess digits */
01207   a->used -= b;
01208 }
01209 
01210 
01211 /* swap the elements of two integers, for cases where you can't simply swap the 
01212  * mp_int pointers around
01213  */
01214 static void mp_exch (mp_int * a, mp_int * b)
01215 {
01216   mp_int  t;
01217 
01218   t  = *a;
01219   *a = *b;
01220   *b = t;
01221 }
01222 
01223 
01224 /* trim unused digits 
01225  *
01226  * This is used to ensure that leading zero digits are
01227  * trimed and the leading "used" digit will be non-zero
01228  * Typically very fast.  Also fixes the sign if there
01229  * are no more leading digits
01230  */
01231 static void mp_clamp (mp_int * a)
01232 {
01233   /* decrease used while the most significant digit is
01234    * zero.
01235    */
01236   while (a->used > 0 && a->dp[a->used - 1] == 0) {
01237     --(a->used);
01238   }
01239 
01240   /* reset the sign flag if used == 0 */
01241   if (a->used == 0) {
01242     a->sign = MP_ZPOS;
01243   }
01244 }
01245 
01246 
01247 /* grow as required */
01248 static int mp_grow (mp_int * a, int size)
01249 {
01250   int     i;
01251   mp_digit *tmp;
01252 
01253   /* if the alloc size is smaller alloc more ram */
01254   if (a->alloc < size) {
01255     /* ensure there are always at least MP_PREC digits extra on top */
01256     size += (MP_PREC * 2) - (size % MP_PREC);
01257 
01258     /* reallocate the array a->dp
01259      *
01260      * We store the return in a temporary variable
01261      * in case the operation failed we don't want
01262      * to overwrite the dp member of a.
01263      */
01264     tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size);
01265     if (tmp == NULL) {
01266       /* reallocation failed but "a" is still valid [can be freed] */
01267       return MP_MEM;
01268     }
01269 
01270     /* reallocation succeeded so set a->dp */
01271     a->dp = tmp;
01272 
01273     /* zero excess digits */
01274     i        = a->alloc;
01275     a->alloc = size;
01276     for (; i < a->alloc; i++) {
01277       a->dp[i] = 0;
01278     }
01279   }
01280   return MP_OKAY;
01281 }
01282 
01283 
01284 #ifdef BN_MP_ABS_C
01285 /* b = |a| 
01286  *
01287  * Simple function copies the input and fixes the sign to positive
01288  */
01289 static int mp_abs (mp_int * a, mp_int * b)
01290 {
01291   int     res;
01292 
01293   /* copy a to b */
01294   if (a != b) {
01295      if ((res = mp_copy (a, b)) != MP_OKAY) {
01296        return res;
01297      }
01298   }
01299 
01300   /* force the sign of b to positive */
01301   b->sign = MP_ZPOS;
01302 
01303   return MP_OKAY;
01304 }
01305 #endif
01306 
01307 
01308 /* set to a digit */
01309 static void mp_set (mp_int * a, mp_digit b)
01310 {
01311   mp_zero (a);
01312   a->dp[0] = b & MP_MASK;
01313   a->used  = (a->dp[0] != 0) ? 1 : 0;
01314 }
01315 
01316 
01317 #ifndef LTM_NO_NEG_EXP
01318 /* b = a/2 */
01319 static int mp_div_2(mp_int * a, mp_int * b)
01320 {
01321   int     x, res, oldused;
01322 
01323   /* copy */
01324   if (b->alloc < a->used) {
01325     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
01326       return res;
01327     }
01328   }
01329 
01330   oldused = b->used;
01331   b->used = a->used;
01332   {
01333     register mp_digit r, rr, *tmpa, *tmpb;
01334 
01335     /* source alias */
01336     tmpa = a->dp + b->used - 1;
01337 
01338     /* dest alias */
01339     tmpb = b->dp + b->used - 1;
01340 
01341     /* carry */
01342     r = 0;
01343     for (x = b->used - 1; x >= 0; x--) {
01344       /* get the carry for the next iteration */
01345       rr = *tmpa & 1;
01346 
01347       /* shift the current digit, add in carry and store */
01348       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
01349 
01350       /* forward carry to next iteration */
01351       r = rr;
01352     }
01353 
01354     /* zero excess digits */
01355     tmpb = b->dp + b->used;
01356     for (x = b->used; x < oldused; x++) {
01357       *tmpb++ = 0;
01358     }
01359   }
01360   b->sign = a->sign;
01361   mp_clamp (b);
01362   return MP_OKAY;
01363 }
01364 #endif /* LTM_NO_NEG_EXP */
01365 
01366 
01367 /* shift left by a certain bit count */
01368 static int mp_mul_2d (mp_int * a, int b, mp_int * c)
01369 {
01370   mp_digit d;
01371   int      res;
01372 
01373   /* copy */
01374   if (a != c) {
01375      if ((res = mp_copy (a, c)) != MP_OKAY) {
01376        return res;
01377      }
01378   }
01379 
01380   if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
01381      if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
01382        return res;
01383      }
01384   }
01385 
01386   /* shift by as many digits in the bit count */
01387   if (b >= (int)DIGIT_BIT) {
01388     if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
01389       return res;
01390     }
01391   }
01392 
01393   /* shift any bit count < DIGIT_BIT */
01394   d = (mp_digit) (b % DIGIT_BIT);
01395   if (d != 0) {
01396     register mp_digit *tmpc, shift, mask, r, rr;
01397     register int x;
01398 
01399     /* bitmask for carries */
01400     mask = (((mp_digit)1) << d) - 1;
01401 
01402     /* shift for msbs */
01403     shift = DIGIT_BIT - d;
01404 
01405     /* alias */
01406     tmpc = c->dp;
01407 
01408     /* carry */
01409     r    = 0;
01410     for (x = 0; x < c->used; x++) {
01411       /* get the higher bits of the current word */
01412       rr = (*tmpc >> shift) & mask;
01413 
01414       /* shift the current word and OR in the carry */
01415       *tmpc = ((*tmpc << d) | r) & MP_MASK;
01416       ++tmpc;
01417 
01418       /* set the carry to the carry bits of the current word */
01419       r = rr;
01420     }
01421     
01422     /* set final carry */
01423     if (r != 0) {
01424        c->dp[(c->used)++] = r;
01425     }
01426   }
01427   mp_clamp (c);
01428   return MP_OKAY;
01429 }
01430 
01431 
01432 #ifdef BN_MP_INIT_MULTI_C
01433 static int mp_init_multi(mp_int *mp, ...) 
01434 {
01435     mp_err res = MP_OKAY;      /* Assume ok until proven otherwise */
01436     int n = 0;                 /* Number of ok inits */
01437     mp_int* cur_arg = mp;
01438     va_list args;
01439 
01440     va_start(args, mp);        /* init args to next argument from caller */
01441     while (cur_arg != NULL) {
01442         if (mp_init(cur_arg) != MP_OKAY) {
01443             /* Oops - error! Back-track and mp_clear what we already
01444                succeeded in init-ing, then return error.
01445             */
01446             va_list clean_args;
01447             
01448             /* end the current list */
01449             va_end(args);
01450             
01451             /* now start cleaning up */            
01452             cur_arg = mp;
01453             va_start(clean_args, mp);
01454             while (n--) {
01455                 mp_clear(cur_arg);
01456                 cur_arg = va_arg(clean_args, mp_int*);
01457             }
01458             va_end(clean_args);
01459             res = MP_MEM;
01460             break;
01461         }
01462         n++;
01463         cur_arg = va_arg(args, mp_int*);
01464     }
01465     va_end(args);
01466     return res;                /* Assumed ok, if error flagged above. */
01467 }
01468 #endif
01469 
01470 
01471 #ifdef BN_MP_CLEAR_MULTI_C
01472 static void mp_clear_multi(mp_int *mp, ...) 
01473 {
01474     mp_int* next_mp = mp;
01475     va_list args;
01476     va_start(args, mp);
01477     while (next_mp != NULL) {
01478         mp_clear(next_mp);
01479         next_mp = va_arg(args, mp_int*);
01480     }
01481     va_end(args);
01482 }
01483 #endif
01484 
01485 
01486 /* shift left a certain amount of digits */
01487 static int mp_lshd (mp_int * a, int b)
01488 {
01489   int     x, res;
01490 
01491   /* if its less than zero return */
01492   if (b <= 0) {
01493     return MP_OKAY;
01494   }
01495 
01496   /* grow to fit the new digits */
01497   if (a->alloc < a->used + b) {
01498      if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
01499        return res;
01500      }
01501   }
01502 
01503   {
01504     register mp_digit *top, *bottom;
01505 
01506     /* increment the used by the shift amount then copy upwards */
01507     a->used += b;
01508 
01509     /* top */
01510     top = a->dp + a->used - 1;
01511 
01512     /* base */
01513     bottom = a->dp + a->used - 1 - b;
01514 
01515     /* much like mp_rshd this is implemented using a sliding window
01516      * except the window goes the otherway around.  Copying from
01517      * the bottom to the top.  see bn_mp_rshd.c for more info.
01518      */
01519     for (x = a->used - 1; x >= b; x--) {
01520       *top-- = *bottom--;
01521     }
01522 
01523     /* zero the lower digits */
01524     top = a->dp;
01525     for (x = 0; x < b; x++) {
01526       *top++ = 0;
01527     }
01528   }
01529   return MP_OKAY;
01530 }
01531 
01532 
01533 /* returns the number of bits in an int */
01534 static int mp_count_bits (mp_int * a)
01535 {
01536   int     r;
01537   mp_digit q;
01538 
01539   /* shortcut */
01540   if (a->used == 0) {
01541     return 0;
01542   }
01543 
01544   /* get number of digits and add that */
01545   r = (a->used - 1) * DIGIT_BIT;
01546   
01547   /* take the last digit and count the bits in it */
01548   q = a->dp[a->used - 1];
01549   while (q > ((mp_digit) 0)) {
01550     ++r;
01551     q >>= ((mp_digit) 1);
01552   }
01553   return r;
01554 }
01555 
01556 
01557 /* calc a value mod 2**b */
01558 static int mp_mod_2d (mp_int * a, int b, mp_int * c)
01559 {
01560   int     x, res;
01561 
01562   /* if b is <= 0 then zero the int */
01563   if (b <= 0) {
01564     mp_zero (c);
01565     return MP_OKAY;
01566   }
01567 
01568   /* if the modulus is larger than the value than return */
01569   if (b >= (int) (a->used * DIGIT_BIT)) {
01570     res = mp_copy (a, c);
01571     return res;
01572   }
01573 
01574   /* copy */
01575   if ((res = mp_copy (a, c)) != MP_OKAY) {
01576     return res;
01577   }
01578 
01579   /* zero digits above the last digit of the modulus */
01580   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
01581     c->dp[x] = 0;
01582   }
01583   /* clear the digit that is not completely outside/inside the modulus */
01584   c->dp[b / DIGIT_BIT] &=
01585     (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
01586   mp_clamp (c);
01587   return MP_OKAY;
01588 }
01589 
01590 
01591 #ifdef BN_MP_DIV_SMALL
01592 
01593 /* slower bit-bang division... also smaller */
01594 static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
01595 {
01596    mp_int ta, tb, tq, q;
01597    int    res, n, n2;
01598 
01599   /* is divisor zero ? */
01600   if (mp_iszero (b) == 1) {
01601     return MP_VAL;
01602   }
01603 
01604   /* if a < b then q=0, r = a */
01605   if (mp_cmp_mag (a, b) == MP_LT) {
01606     if (d != NULL) {
01607       res = mp_copy (a, d);
01608     } else {
01609       res = MP_OKAY;
01610     }
01611     if (c != NULL) {
01612       mp_zero (c);
01613     }
01614     return res;
01615   }
01616         
01617   /* init our temps */
01618   if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL) != MP_OKAY)) {
01619      return res;
01620   }
01621 
01622 
01623   mp_set(&tq, 1);
01624   n = mp_count_bits(a) - mp_count_bits(b);
01625   if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
01626       ((res = mp_abs(b, &tb)) != MP_OKAY) || 
01627       ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
01628       ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
01629       goto LBL_ERR;
01630   }
01631 
01632   while (n-- >= 0) {
01633      if (mp_cmp(&tb, &ta) != MP_GT) {
01634         if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
01635             ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
01636            goto LBL_ERR;
01637         }
01638      }
01639      if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
01640          ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
01641            goto LBL_ERR;
01642      }
01643   }
01644 
01645   /* now q == quotient and ta == remainder */
01646   n  = a->sign;
01647   n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
01648   if (c != NULL) {
01649      mp_exch(c, &q);
01650      c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
01651   }
01652   if (d != NULL) {
01653      mp_exch(d, &ta);
01654      d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
01655   }
01656 LBL_ERR:
01657    mp_clear_multi(&ta, &tb, &tq, &q, NULL);
01658    return res;
01659 }
01660 
01661 #else
01662 
01663 /* integer signed division. 
01664  * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
01665  * HAC pp.598 Algorithm 14.20
01666  *
01667  * Note that the description in HAC is horribly 
01668  * incomplete.  For example, it doesn't consider 
01669  * the case where digits are removed from 'x' in 
01670  * the inner loop.  It also doesn't consider the 
01671  * case that y has fewer than three digits, etc..
01672  *
01673  * The overall algorithm is as described as 
01674  * 14.20 from HAC but fixed to treat these cases.
01675 */
01676 static int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
01677 {
01678   mp_int  q, x, y, t1, t2;
01679   int     res, n, t, i, norm, neg;
01680 
01681   /* is divisor zero ? */
01682   if (mp_iszero (b) == 1) {
01683     return MP_VAL;
01684   }
01685 
01686   /* if a < b then q=0, r = a */
01687   if (mp_cmp_mag (a, b) == MP_LT) {
01688     if (d != NULL) {
01689       res = mp_copy (a, d);
01690     } else {
01691       res = MP_OKAY;
01692     }
01693     if (c != NULL) {
01694       mp_zero (c);
01695     }
01696     return res;
01697   }
01698 
01699   if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
01700     return res;
01701   }
01702   q.used = a->used + 2;
01703 
01704   if ((res = mp_init (&t1)) != MP_OKAY) {
01705     goto LBL_Q;
01706   }
01707 
01708   if ((res = mp_init (&t2)) != MP_OKAY) {
01709     goto LBL_T1;
01710   }
01711 
01712   if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
01713     goto LBL_T2;
01714   }
01715 
01716   if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
01717     goto LBL_X;
01718   }
01719 
01720   /* fix the sign */
01721   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
01722   x.sign = y.sign = MP_ZPOS;
01723 
01724   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
01725   norm = mp_count_bits(&y) % DIGIT_BIT;
01726   if (norm < (int)(DIGIT_BIT-1)) {
01727      norm = (DIGIT_BIT-1) - norm;
01728      if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
01729        goto LBL_Y;
01730      }
01731      if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
01732        goto LBL_Y;
01733      }
01734   } else {
01735      norm = 0;
01736   }
01737 
01738   /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
01739   n = x.used - 1;
01740   t = y.used - 1;
01741 
01742   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
01743   if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
01744     goto LBL_Y;
01745   }
01746 
01747   while (mp_cmp (&x, &y) != MP_LT) {
01748     ++(q.dp[n - t]);
01749     if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
01750       goto LBL_Y;
01751     }
01752   }
01753 
01754   /* reset y by shifting it back down */
01755   mp_rshd (&y, n - t);
01756 
01757   /* step 3. for i from n down to (t + 1) */
01758   for (i = n; i >= (t + 1); i--) {
01759     if (i > x.used) {
01760       continue;
01761     }
01762 
01763     /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 
01764      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
01765     if (x.dp[i] == y.dp[t]) {
01766       q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
01767     } else {
01768       mp_word tmp;
01769       tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
01770       tmp |= ((mp_word) x.dp[i - 1]);
01771       tmp /= ((mp_word) y.dp[t]);
01772       if (tmp > (mp_word) MP_MASK)
01773         tmp = MP_MASK;
01774       q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
01775     }
01776 
01777     /* while (q{i-t-1} * (yt * b + y{t-1})) > 
01778              xi * b**2 + xi-1 * b + xi-2 
01779      
01780        do q{i-t-1} -= 1; 
01781     */
01782     q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
01783     do {
01784       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
01785 
01786       /* find left hand */
01787       mp_zero (&t1);
01788       t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
01789       t1.dp[1] = y.dp[t];
01790       t1.used = 2;
01791       if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
01792         goto LBL_Y;
01793       }
01794 
01795       /* find right hand */
01796       t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
01797       t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
01798       t2.dp[2] = x.dp[i];
01799       t2.used = 3;
01800     } while (mp_cmp_mag(&t1, &t2) == MP_GT);
01801 
01802     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
01803     if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
01804       goto LBL_Y;
01805     }
01806 
01807     if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
01808       goto LBL_Y;
01809     }
01810 
01811     if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
01812       goto LBL_Y;
01813     }
01814 
01815     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
01816     if (x.sign == MP_NEG) {
01817       if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
01818         goto LBL_Y;
01819       }
01820       if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
01821         goto LBL_Y;
01822       }
01823       if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
01824         goto LBL_Y;
01825       }
01826 
01827       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
01828     }
01829   }
01830 
01831   /* now q is the quotient and x is the remainder 
01832    * [which we have to normalize] 
01833    */
01834   
01835   /* get sign before writing to c */
01836   x.sign = x.used == 0 ? MP_ZPOS : a->sign;
01837 
01838   if (c != NULL) {
01839     mp_clamp (&q);
01840     mp_exch (&q, c);
01841     c->sign = neg;
01842   }
01843 
01844   if (d != NULL) {
01845     mp_div_2d (&x, norm, &x, NULL);
01846     mp_exch (&x, d);
01847   }
01848 
01849   res = MP_OKAY;
01850 
01851 LBL_Y:mp_clear (&y);
01852 LBL_X:mp_clear (&x);
01853 LBL_T2:mp_clear (&t2);
01854 LBL_T1:mp_clear (&t1);
01855 LBL_Q:mp_clear (&q);
01856   return res;
01857 }
01858 
01859 #endif
01860 
01861 
01862 #ifdef MP_LOW_MEM
01863    #define TAB_SIZE 32
01864 #else
01865    #define TAB_SIZE 256
01866 #endif
01867 
01868 static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
01869 {
01870   mp_int  M[TAB_SIZE], res, mu;
01871   mp_digit buf;
01872   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
01873   int (*redux)(mp_int*,mp_int*,mp_int*);
01874 
01875   /* find window size */
01876   x = mp_count_bits (X);
01877   if (x <= 7) {
01878     winsize = 2;
01879   } else if (x <= 36) {
01880     winsize = 3;
01881   } else if (x <= 140) {
01882     winsize = 4;
01883   } else if (x <= 450) {
01884     winsize = 5;
01885   } else if (x <= 1303) {
01886     winsize = 6;
01887   } else if (x <= 3529) {
01888     winsize = 7;
01889   } else {
01890     winsize = 8;
01891   }
01892 
01893 #ifdef MP_LOW_MEM
01894     if (winsize > 5) {
01895        winsize = 5;
01896     }
01897 #endif
01898 
01899   /* init M array */
01900   /* init first cell */
01901   if ((err = mp_init(&M[1])) != MP_OKAY) {
01902      return err; 
01903   }
01904 
01905   /* now init the second half of the array */
01906   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
01907     if ((err = mp_init(&M[x])) != MP_OKAY) {
01908       for (y = 1<<(winsize-1); y < x; y++) {
01909         mp_clear (&M[y]);
01910       }
01911       mp_clear(&M[1]);
01912       return err;
01913     }
01914   }
01915 
01916   /* create mu, used for Barrett reduction */
01917   if ((err = mp_init (&mu)) != MP_OKAY) {
01918     goto LBL_M;
01919   }
01920   
01921   if (redmode == 0) {
01922      if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
01923         goto LBL_MU;
01924      }
01925      redux = mp_reduce;
01926   } else {
01927      if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
01928         goto LBL_MU;
01929      }
01930      redux = mp_reduce_2k_l;
01931   }    
01932 
01933   /* create M table
01934    *
01935    * The M table contains powers of the base, 
01936    * e.g. M[x] = G**x mod P
01937    *
01938    * The first half of the table is not 
01939    * computed though accept for M[0] and M[1]
01940    */
01941   if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
01942     goto LBL_MU;
01943   }
01944 
01945   /* compute the value at M[1<<(winsize-1)] by squaring 
01946    * M[1] (winsize-1) times 
01947    */
01948   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
01949     goto LBL_MU;
01950   }
01951 
01952   for (x = 0; x < (winsize - 1); x++) {
01953     /* square it */
01954     if ((err = mp_sqr (&M[1 << (winsize - 1)], 
01955                        &M[1 << (winsize - 1)])) != MP_OKAY) {
01956       goto LBL_MU;
01957     }
01958 
01959     /* reduce modulo P */
01960     if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
01961       goto LBL_MU;
01962     }
01963   }
01964 
01965   /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
01966    * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
01967    */
01968   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
01969     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
01970       goto LBL_MU;
01971     }
01972     if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
01973       goto LBL_MU;
01974     }
01975   }
01976 
01977   /* setup result */
01978   if ((err = mp_init (&res)) != MP_OKAY) {
01979     goto LBL_MU;
01980   }
01981   mp_set (&res, 1);
01982 
01983   /* set initial mode and bit cnt */
01984   mode   = 0;
01985   bitcnt = 1;
01986   buf    = 0;
01987   digidx = X->used - 1;
01988   bitcpy = 0;
01989   bitbuf = 0;
01990 
01991   for (;;) {
01992     /* grab next digit as required */
01993     if (--bitcnt == 0) {
01994       /* if digidx == -1 we are out of digits */
01995       if (digidx == -1) {
01996         break;
01997       }
01998       /* read next digit and reset the bitcnt */
01999       buf    = X->dp[digidx--];
02000       bitcnt = (int) DIGIT_BIT;
02001     }
02002 
02003     /* grab the next msb from the exponent */
02004     y     = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
02005     buf <<= (mp_digit)1;
02006 
02007     /* if the bit is zero and mode == 0 then we ignore it
02008      * These represent the leading zero bits before the first 1 bit
02009      * in the exponent.  Technically this opt is not required but it
02010      * does lower the # of trivial squaring/reductions used
02011      */
02012     if (mode == 0 && y == 0) {
02013       continue;
02014     }
02015 
02016     /* if the bit is zero and mode == 1 then we square */
02017     if (mode == 1 && y == 0) {
02018       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
02019         goto LBL_RES;
02020       }
02021       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
02022         goto LBL_RES;
02023       }
02024       continue;
02025     }
02026 
02027     /* else we add it to the window */
02028     bitbuf |= (y << (winsize - ++bitcpy));
02029     mode    = 2;
02030 
02031     if (bitcpy == winsize) {
02032       /* ok window is filled so square as required and multiply  */
02033       /* square first */
02034       for (x = 0; x < winsize; x++) {
02035         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
02036           goto LBL_RES;
02037         }
02038         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
02039           goto LBL_RES;
02040         }
02041       }
02042 
02043       /* then multiply */
02044       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
02045         goto LBL_RES;
02046       }
02047       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
02048         goto LBL_RES;
02049       }
02050 
02051       /* empty window and reset */
02052       bitcpy = 0;
02053       bitbuf = 0;
02054       mode   = 1;
02055     }
02056   }
02057 
02058   /* if bits remain then square/multiply */
02059   if (mode == 2 && bitcpy > 0) {
02060     /* square then multiply if the bit is set */
02061     for (x = 0; x < bitcpy; x++) {
02062       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
02063         goto LBL_RES;
02064       }
02065       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
02066         goto LBL_RES;
02067       }
02068 
02069       bitbuf <<= 1;
02070       if ((bitbuf & (1 << winsize)) != 0) {
02071         /* then multiply */
02072         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
02073           goto LBL_RES;
02074         }
02075         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
02076           goto LBL_RES;
02077         }
02078       }
02079     }
02080   }
02081 
02082   mp_exch (&res, Y);
02083   err = MP_OKAY;
02084 LBL_RES:mp_clear (&res);
02085 LBL_MU:mp_clear (&mu);
02086 LBL_M:
02087   mp_clear(&M[1]);
02088   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
02089     mp_clear (&M[x]);
02090   }
02091   return err;
02092 }
02093 
02094 
02095 /* computes b = a*a */
02096 static int mp_sqr (mp_int * a, mp_int * b)
02097 {
02098   int     res;
02099 
02100 #ifdef BN_MP_TOOM_SQR_C
02101   /* use Toom-Cook? */
02102   if (a->used >= TOOM_SQR_CUTOFF) {
02103     res = mp_toom_sqr(a, b);
02104   /* Karatsuba? */
02105   } else 
02106 #endif
02107 #ifdef BN_MP_KARATSUBA_SQR_C
02108 if (a->used >= KARATSUBA_SQR_CUTOFF) {
02109     res = mp_karatsuba_sqr (a, b);
02110   } else 
02111 #endif
02112   {
02113 #ifdef BN_FAST_S_MP_SQR_C
02114     /* can we use the fast comba multiplier? */
02115     if ((a->used * 2 + 1) < MP_WARRAY && 
02116          a->used < 
02117          (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
02118       res = fast_s_mp_sqr (a, b);
02119     } else
02120 #endif
02121 #ifdef BN_S_MP_SQR_C
02122       res = s_mp_sqr (a, b);
02123 #else
02124 #error mp_sqr could fail
02125       res = MP_VAL;
02126 #endif
02127   }
02128   b->sign = MP_ZPOS;
02129   return res;
02130 }
02131 
02132 
02133 /* reduces a modulo n where n is of the form 2**p - d 
02134    This differs from reduce_2k since "d" can be larger
02135    than a single digit.
02136 */
02137 static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
02138 {
02139    mp_int q;
02140    int    p, res;
02141    
02142    if ((res = mp_init(&q)) != MP_OKAY) {
02143       return res;
02144    }
02145    
02146    p = mp_count_bits(n);    
02147 top:
02148    /* q = a/2**p, a = a mod 2**p */
02149    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
02150       goto ERR;
02151    }
02152    
02153    /* q = q * d */
02154    if ((res = mp_mul(&q, d, &q)) != MP_OKAY) { 
02155       goto ERR;
02156    }
02157    
02158    /* a = a + q */
02159    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
02160       goto ERR;
02161    }
02162    
02163    if (mp_cmp_mag(a, n) != MP_LT) {
02164       s_mp_sub(a, n, a);
02165       goto top;
02166    }
02167    
02168 ERR:
02169    mp_clear(&q);
02170    return res;
02171 }
02172 
02173 
02174 /* determines the setup value */
02175 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
02176 {
02177    int    res;
02178    mp_int tmp;
02179    
02180    if ((res = mp_init(&tmp)) != MP_OKAY) {
02181       return res;
02182    }
02183    
02184    if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
02185       goto ERR;
02186    }
02187    
02188    if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
02189       goto ERR;
02190    }
02191    
02192 ERR:
02193    mp_clear(&tmp);
02194    return res;
02195 }
02196 
02197 
02198 /* computes a = 2**b 
02199  *
02200  * Simple algorithm which zeroes the int, grows it then just sets one bit
02201  * as required.
02202  */
02203 static int mp_2expt (mp_int * a, int b)
02204 {
02205   int     res;
02206 
02207   /* zero a as per default */
02208   mp_zero (a);
02209 
02210   /* grow a to accomodate the single bit */
02211   if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
02212     return res;
02213   }
02214 
02215   /* set the used count of where the bit will go */
02216   a->used = b / DIGIT_BIT + 1;
02217 
02218   /* put the single bit in its place */
02219   a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
02220 
02221   return MP_OKAY;
02222 }
02223 
02224 
02225 /* pre-calculate the value required for Barrett reduction
02226  * For a given modulus "b" it calulates the value required in "a"
02227  */
02228 static int mp_reduce_setup (mp_int * a, mp_int * b)
02229 {
02230   int     res;
02231   
02232   if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
02233     return res;
02234   }
02235   return mp_div (a, b, a, NULL);
02236 }
02237 
02238 
02239 /* reduces x mod m, assumes 0 < x < m**2, mu is 
02240  * precomputed via mp_reduce_setup.
02241  * From HAC pp.604 Algorithm 14.42
02242  */
02243 static int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
02244 {
02245   mp_int  q;
02246   int     res, um = m->used;
02247 
02248   /* q = x */
02249   if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
02250     return res;
02251   }
02252 
02253   /* q1 = x / b**(k-1)  */
02254   mp_rshd (&q, um - 1);         
02255 
02256   /* according to HAC this optimization is ok */
02257   if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
02258     if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
02259       goto CLEANUP;
02260     }
02261   } else {
02262 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
02263     if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
02264       goto CLEANUP;
02265     }
02266 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
02267     if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
02268       goto CLEANUP;
02269     }
02270 #else 
02271     { 
02272 #error mp_reduce would always fail
02273       res = MP_VAL;
02274       goto CLEANUP;
02275     }
02276 #endif
02277   }
02278 
02279   /* q3 = q2 / b**(k+1) */
02280   mp_rshd (&q, um + 1);         
02281 
02282   /* x = x mod b**(k+1), quick (no division) */
02283   if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
02284     goto CLEANUP;
02285   }
02286 
02287   /* q = q * m mod b**(k+1), quick (no division) */
02288   if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
02289     goto CLEANUP;
02290   }
02291 
02292   /* x = x - q */
02293   if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
02294     goto CLEANUP;
02295   }
02296 
02297   /* If x < 0, add b**(k+1) to it */
02298   if (mp_cmp_d (x, 0) == MP_LT) {
02299     mp_set (&q, 1);
02300     if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) {
02301       goto CLEANUP;
02302     }
02303     if ((res = mp_add (x, &q, x)) != MP_OKAY) {
02304       goto CLEANUP;
02305     }
02306   }
02307 
02308   /* Back off if it's too big */
02309   while (mp_cmp (x, m) != MP_LT) {
02310     if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
02311       goto CLEANUP;
02312     }
02313   }
02314   
02315 CLEANUP:
02316   mp_clear (&q);
02317 
02318   return res;
02319 }
02320 
02321 
02322 /* multiplies |a| * |b| and only computes upto digs digits of result
02323  * HAC pp. 595, Algorithm 14.12  Modified so you can control how 
02324  * many digits of output are created.
02325  */
02326 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
02327 {
02328   mp_int  t;
02329   int     res, pa, pb, ix, iy;
02330   mp_digit u;
02331   mp_word r;
02332   mp_digit tmpx, *tmpt, *tmpy;
02333 
02334   /* can we use the fast multiplier? */
02335   if (((digs) < MP_WARRAY) &&
02336       MIN (a->used, b->used) < 
02337           (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02338     return fast_s_mp_mul_digs (a, b, c, digs);
02339   }
02340 
02341   if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
02342     return res;
02343   }
02344   t.used = digs;
02345 
02346   /* compute the digits of the product directly */
02347   pa = a->used;
02348   for (ix = 0; ix < pa; ix++) {
02349     /* set the carry to zero */
02350     u = 0;
02351 
02352     /* limit ourselves to making digs digits of output */
02353     pb = MIN (b->used, digs - ix);
02354 
02355     /* setup some aliases */
02356     /* copy of the digit from a used within the nested loop */
02357     tmpx = a->dp[ix];
02358     
02359     /* an alias for the destination shifted ix places */
02360     tmpt = t.dp + ix;
02361     
02362     /* an alias for the digits of b */
02363     tmpy = b->dp;
02364 
02365     /* compute the columns of the output and propagate the carry */
02366     for (iy = 0; iy < pb; iy++) {
02367       /* compute the column as a mp_word */
02368       r       = ((mp_word)*tmpt) +
02369                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
02370                 ((mp_word) u);
02371 
02372       /* the new column is the lower part of the result */
02373       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
02374 
02375       /* get the carry word from the result */
02376       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
02377     }
02378     /* set carry if it is placed below digs */
02379     if (ix + iy < digs) {
02380       *tmpt = u;
02381     }
02382   }
02383 
02384   mp_clamp (&t);
02385   mp_exch (&t, c);
02386 
02387   mp_clear (&t);
02388   return MP_OKAY;
02389 }
02390 
02391 
02392 /* Fast (comba) multiplier
02393  *
02394  * This is the fast column-array [comba] multiplier.  It is 
02395  * designed to compute the columns of the product first 
02396  * then handle the carries afterwards.  This has the effect 
02397  * of making the nested loops that compute the columns very
02398  * simple and schedulable on super-scalar processors.
02399  *
02400  * This has been modified to produce a variable number of 
02401  * digits of output so if say only a half-product is required 
02402  * you don't have to compute the upper half (a feature 
02403  * required for fast Barrett reduction).
02404  *
02405  * Based on Algorithm 14.12 on pp.595 of HAC.
02406  *
02407  */
02408 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
02409 {
02410   int     olduse, res, pa, ix, iz;
02411   mp_digit W[MP_WARRAY];
02412   register mp_word  _W;
02413 
02414   /* grow the destination as required */
02415   if (c->alloc < digs) {
02416     if ((res = mp_grow (c, digs)) != MP_OKAY) {
02417       return res;
02418     }
02419   }
02420 
02421   /* number of output digits to produce */
02422   pa = MIN(digs, a->used + b->used);
02423 
02424   /* clear the carry */
02425   _W = 0;
02426   for (ix = 0; ix < pa; ix++) { 
02427       int      tx, ty;
02428       int      iy;
02429       mp_digit *tmpx, *tmpy;
02430 
02431       /* get offsets into the two bignums */
02432       ty = MIN(b->used-1, ix);
02433       tx = ix - ty;
02434 
02435       /* setup temp aliases */
02436       tmpx = a->dp + tx;
02437       tmpy = b->dp + ty;
02438 
02439       /* this is the number of times the loop will iterrate, essentially 
02440          while (tx++ < a->used && ty-- >= 0) { ... }
02441        */
02442       iy = MIN(a->used-tx, ty+1);
02443 
02444       /* execute loop */
02445       for (iz = 0; iz < iy; ++iz) {
02446          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
02447 
02448       }
02449 
02450       /* store term */
02451       W[ix] = ((mp_digit)_W) & MP_MASK;
02452 
02453       /* make next carry */
02454       _W = _W >> ((mp_word)DIGIT_BIT);
02455  }
02456 
02457   /* setup dest */
02458   olduse  = c->used;
02459   c->used = pa;
02460 
02461   {
02462     register mp_digit *tmpc;
02463     tmpc = c->dp;
02464     for (ix = 0; ix < pa+1; ix++) {
02465       /* now extract the previous digit [below the carry] */
02466       *tmpc++ = W[ix];
02467     }
02468 
02469     /* clear unused digits [that existed in the old copy of c] */
02470     for (; ix < olduse; ix++) {
02471       *tmpc++ = 0;
02472     }
02473   }
02474   mp_clamp (c);
02475   return MP_OKAY;
02476 }
02477 
02478 
02479 /* init an mp_init for a given size */
02480 static int mp_init_size (mp_int * a, int size)
02481 {
02482   int x;
02483 
02484   /* pad size so there are always extra digits */
02485   size += (MP_PREC * 2) - (size % MP_PREC);     
02486   
02487   /* alloc mem */
02488   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size);
02489   if (a->dp == NULL) {
02490     return MP_MEM;
02491   }
02492 
02493   /* set the members */
02494   a->used  = 0;
02495   a->alloc = size;
02496   a->sign  = MP_ZPOS;
02497 
02498   /* zero the digits */
02499   for (x = 0; x < size; x++) {
02500       a->dp[x] = 0;
02501   }
02502 
02503   return MP_OKAY;
02504 }
02505 
02506 
02507 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
02508 static int s_mp_sqr (mp_int * a, mp_int * b)
02509 {
02510   mp_int  t;
02511   int     res, ix, iy, pa;
02512   mp_word r;
02513   mp_digit u, tmpx, *tmpt;
02514 
02515   pa = a->used;
02516   if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
02517     return res;
02518   }
02519 
02520   /* default used is maximum possible size */
02521   t.used = 2*pa + 1;
02522 
02523   for (ix = 0; ix < pa; ix++) {
02524     /* first calculate the digit at 2*ix */
02525     /* calculate double precision result */
02526     r = ((mp_word) t.dp[2*ix]) +
02527         ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
02528 
02529     /* store lower part in result */
02530     t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
02531 
02532     /* get the carry */
02533     u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02534 
02535     /* left hand side of A[ix] * A[iy] */
02536     tmpx        = a->dp[ix];
02537 
02538     /* alias for where to store the results */
02539     tmpt        = t.dp + (2*ix + 1);
02540     
02541     for (iy = ix + 1; iy < pa; iy++) {
02542       /* first calculate the product */
02543       r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
02544 
02545       /* now calculate the double precision result, note we use
02546        * addition instead of *2 since it's easier to optimize
02547        */
02548       r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
02549 
02550       /* store lower part */
02551       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
02552 
02553       /* get carry */
02554       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02555     }
02556     /* propagate upwards */
02557     while (u != ((mp_digit) 0)) {
02558       r       = ((mp_word) *tmpt) + ((mp_word) u);
02559       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
02560       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
02561     }
02562   }
02563 
02564   mp_clamp (&t);
02565   mp_exch (&t, b);
02566   mp_clear (&t);
02567   return MP_OKAY;
02568 }
02569 
02570 
02571 /* multiplies |a| * |b| and does not compute the lower digs digits
02572  * [meant to get the higher part of the product]
02573  */
02574 static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
02575 {
02576   mp_int  t;
02577   int     res, pa, pb, ix, iy;
02578   mp_digit u;
02579   mp_word r;
02580   mp_digit tmpx, *tmpt, *tmpy;
02581 
02582   /* can we use the fast multiplier? */
02583 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
02584   if (((a->used + b->used + 1) < MP_WARRAY)
02585       && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
02586     return fast_s_mp_mul_high_digs (a, b, c, digs);
02587   }
02588 #endif
02589 
02590   if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
02591     return res;
02592   }
02593   t.used = a->used + b->used + 1;
02594 
02595   pa = a->used;
02596   pb = b->used;
02597   for (ix = 0; ix < pa; ix++) {
02598     /* clear the carry */
02599     u = 0;
02600 
02601     /* left hand side of A[ix] * B[iy] */
02602     tmpx = a->dp[ix];
02603 
02604     /* alias to the address of where the digits will be stored */
02605     tmpt = &(t.dp[digs]);
02606 
02607     /* alias for where to read the right hand side from */
02608     tmpy = b->dp + (digs - ix);
02609 
02610     for (iy = digs - ix; iy < pb; iy++) {
02611       /* calculate the double precision result */
02612       r       = ((mp_word)*tmpt) +
02613                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
02614                 ((mp_word) u);
02615 
02616       /* get the lower part */
02617       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
02618 
02619       /* carry the carry */
02620       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
02621     }
02622     *tmpt = u;
02623   }
02624   mp_clamp (&t);
02625   mp_exch (&t, c);
02626   mp_clear (&t);
02627   return MP_OKAY;
02628 }
02629 
02630 
02631 #ifdef BN_MP_MONTGOMERY_SETUP_C
02632 /* setups the montgomery reduction stuff */
02633 static int
02634 mp_montgomery_setup (mp_int * n, mp_digit * rho)
02635 {
02636   mp_digit x, b;
02637 
02638 /* fast inversion mod 2**k
02639  *
02640  * Based on the fact that
02641  *
02642  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
02643  *                    =>  2*X*A - X*X*A*A = 1
02644  *                    =>  2*(1) - (1)     = 1
02645  */
02646   b = n->dp[0];
02647 
02648   if ((b & 1) == 0) {
02649     return MP_VAL;
02650   }
02651 
02652   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
02653   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
02654 #if !defined(MP_8BIT)
02655   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
02656 #endif
02657 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
02658   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
02659 #endif
02660 #ifdef MP_64BIT
02661   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
02662 #endif
02663 
02664   /* rho = -1/m mod b */
02665   *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
02666 
02667   return MP_OKAY;
02668 }
02669 #endif
02670 
02671 
02672 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
02673 /* computes xR**-1 == x (mod N) via Montgomery Reduction
02674  *
02675  * This is an optimized implementation of montgomery_reduce
02676  * which uses the comba method to quickly calculate the columns of the
02677  * reduction.
02678  *
02679  * Based on Algorithm 14.32 on pp.601 of HAC.
02680 */
02681 int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
02682 {
02683   int     ix, res, olduse;
02684   mp_word W[MP_WARRAY];
02685 
02686   /* get old used count */
02687   olduse = x->used;
02688 
02689   /* grow a as required */
02690   if (x->alloc < n->used + 1) {
02691     if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
02692       return res;
02693     }
02694   }
02695 
02696   /* first we have to get the digits of the input into
02697    * an array of double precision words W[...]
02698    */
02699   {
02700     register mp_word *_W;
02701     register mp_digit *tmpx;
02702 
02703     /* alias for the W[] array */
02704     _W   = W;
02705 
02706     /* alias for the digits of  x*/
02707     tmpx = x->dp;
02708 
02709     /* copy the digits of a into W[0..a->used-1] */
02710     for (ix = 0; ix < x->used; ix++) {
02711       *_W++ = *tmpx++;
02712     }
02713 
02714     /* zero the high words of W[a->used..m->used*2] */
02715     for (; ix < n->used * 2 + 1; ix++) {
02716       *_W++ = 0;
02717     }
02718   }
02719 
02720   /* now we proceed to zero successive digits
02721    * from the least significant upwards
02722    */
02723   for (ix = 0; ix < n->used; ix++) {
02724     /* mu = ai * m' mod b
02725      *
02726      * We avoid a double precision multiplication (which isn't required)
02727      * by casting the value down to a mp_digit.  Note this requires
02728      * that W[ix-1] have  the carry cleared (see after the inner loop)
02729      */
02730     register mp_digit mu;
02731     mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
02732 
02733     /* a = a + mu * m * b**i
02734      *
02735      * This is computed in place and on the fly.  The multiplication
02736      * by b**i is handled by offseting which columns the results
02737      * are added to.
02738      *
02739      * Note the comba method normally doesn't handle carries in the
02740      * inner loop In this case we fix the carry from the previous
02741      * column since the Montgomery reduction requires digits of the
02742      * result (so far) [see above] to work.  This is
02743      * handled by fixing up one carry after the inner loop.  The
02744      * carry fixups are done in order so after these loops the
02745      * first m->used words of W[] have the carries fixed
02746      */
02747     {
02748       register int iy;
02749       register mp_digit *tmpn;
02750       register mp_word *_W;
02751 
02752       /* alias for the digits of the modulus */
02753       tmpn = n->dp;
02754 
02755       /* Alias for the columns set by an offset of ix */
02756       _W = W + ix;
02757 
02758       /* inner loop */
02759       for (iy = 0; iy < n->used; iy++) {
02760           *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
02761       }
02762     }
02763 
02764     /* now fix carry for next digit, W[ix+1] */
02765     W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
02766   }
02767 
02768   /* now we have to propagate the carries and
02769    * shift the words downward [all those least
02770    * significant digits we zeroed].
02771    */
02772   {
02773     register mp_digit *tmpx;
02774     register mp_word *_W, *_W1;
02775 
02776     /* nox fix rest of carries */
02777 
02778     /* alias for current word */
02779     _W1 = W + ix;
02780 
02781     /* alias for next word, where the carry goes */
02782     _W = W + ++ix;
02783 
02784     for (; ix <= n->used * 2 + 1; ix++) {
02785       *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
02786     }
02787 
02788     /* copy out, A = A/b**n
02789      *
02790      * The result is A/b**n but instead of converting from an
02791      * array of mp_word to mp_digit than calling mp_rshd
02792      * we just copy them in the right order
02793      */
02794 
02795     /* alias for destination word */
02796     tmpx = x->dp;
02797 
02798     /* alias for shifted double precision result */
02799     _W = W + n->used;
02800 
02801     for (ix = 0; ix < n->used + 1; ix++) {
02802       *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
02803     }
02804 
02805     /* zero oldused digits, if the input a was larger than
02806      * m->used+1 we'll have to clear the digits
02807      */
02808     for (; ix < olduse; ix++) {
02809       *tmpx++ = 0;
02810     }
02811   }
02812 
02813   /* set the max used and clamp */
02814   x->used = n->used + 1;
02815   mp_clamp (x);
02816 
02817   /* if A >= m then A = A - m */
02818   if (mp_cmp_mag (x, n) != MP_LT) {
02819     return s_mp_sub (x, n, x);
02820   }
02821   return MP_OKAY;
02822 }
02823 #endif
02824 
02825 
02826 #ifdef BN_MP_MUL_2_C
02827 /* b = a*2 */
02828 static int mp_mul_2(mp_int * a, mp_int * b)
02829 {
02830   int     x, res, oldused;
02831 
02832   /* grow to accomodate result */
02833   if (b->alloc < a->used + 1) {
02834     if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
02835       return res;
02836     }
02837   }
02838 
02839   oldused = b->used;
02840   b->used = a->used;
02841 
02842   {
02843     register mp_digit r, rr, *tmpa, *tmpb;
02844 
02845     /* alias for source */
02846     tmpa = a->dp;
02847     
02848     /* alias for dest */
02849     tmpb = b->dp;
02850 
02851     /* carry */
02852     r = 0;
02853     for (x = 0; x < a->used; x++) {
02854     
02855       /* get what will be the *next* carry bit from the 
02856        * MSB of the current digit 
02857        */
02858       rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
02859       
02860       /* now shift up this digit, add in the carry [from the previous] */
02861       *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
02862       
02863       /* copy the carry that would be from the source 
02864        * digit into the next iteration 
02865        */
02866       r = rr;
02867     }
02868 
02869     /* new leading digit? */
02870     if (r != 0) {
02871       /* add a MSB which is always 1 at this point */
02872       *tmpb = 1;
02873       ++(b->used);
02874     }
02875 
02876     /* now zero any excess digits on the destination 
02877      * that we didn't write to 
02878      */
02879     tmpb = b->dp + b->used;
02880     for (x = b->used; x < oldused; x++) {
02881       *tmpb++ = 0;
02882     }
02883   }
02884   b->sign = a->sign;
02885   return MP_OKAY;
02886 }
02887 #endif
02888 
02889 
02890 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
02891 /*
02892  * shifts with subtractions when the result is greater than b.
02893  *
02894  * The method is slightly modified to shift B unconditionally upto just under
02895  * the leading bit of b.  This saves alot of multiple precision shifting.
02896  */
02897 static int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
02898 {
02899   int     x, bits, res;
02900 
02901   /* how many bits of last digit does b use */
02902   bits = mp_count_bits (b) % DIGIT_BIT;
02903 
02904   if (b->used > 1) {
02905      if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
02906         return res;
02907      }
02908   } else {
02909      mp_set(a, 1);
02910      bits = 1;
02911   }
02912 
02913 
02914   /* now compute C = A * B mod b */
02915   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
02916     if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
02917       return res;
02918     }
02919     if (mp_cmp_mag (a, b) != MP_LT) {
02920       if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
02921         return res;
02922       }
02923     }
02924   }
02925 
02926   return MP_OKAY;
02927 }
02928 #endif
02929 
02930 
02931 #ifdef BN_MP_EXPTMOD_FAST_C
02932 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
02933  *
02934  * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
02935  * The value of k changes based on the size of the exponent.
02936  *
02937  * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
02938  */
02939 
02940 static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
02941 {
02942   mp_int  M[TAB_SIZE], res;
02943   mp_digit buf, mp;
02944   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
02945 
02946   /* use a pointer to the reduction algorithm.  This allows us to use
02947    * one of many reduction algorithms without modding the guts of
02948    * the code with if statements everywhere.
02949    */
02950   int     (*redux)(mp_int*,mp_int*,mp_digit);
02951 
02952   /* find window size */
02953   x = mp_count_bits (X);
02954   if (x <= 7) {
02955     winsize = 2;
02956   } else if (x <= 36) {
02957     winsize = 3;
02958   } else if (x <= 140) {
02959     winsize = 4;
02960   } else if (x <= 450) {
02961     winsize = 5;
02962   } else if (x <= 1303) {
02963     winsize = 6;
02964   } else if (x <= 3529) {
02965     winsize = 7;
02966   } else {
02967     winsize = 8;
02968   }
02969 
02970 #ifdef MP_LOW_MEM
02971   if (winsize > 5) {
02972      winsize = 5;
02973   }
02974 #endif
02975 
02976   /* init M array */
02977   /* init first cell */
02978   if ((err = mp_init(&M[1])) != MP_OKAY) {
02979      return err;
02980   }
02981 
02982   /* now init the second half of the array */
02983   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
02984     if ((err = mp_init(&M[x])) != MP_OKAY) {
02985       for (y = 1<<(winsize-1); y < x; y++) {
02986         mp_clear (&M[y]);
02987       }
02988       mp_clear(&M[1]);
02989       return err;
02990     }
02991   }
02992 
02993   /* determine and setup reduction code */
02994   if (redmode == 0) {
02995 #ifdef BN_MP_MONTGOMERY_SETUP_C     
02996      /* now setup montgomery  */
02997      if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
02998         goto LBL_M;
02999      }
03000 #else
03001      err = MP_VAL;
03002      goto LBL_M;
03003 #endif
03004 
03005      /* automatically pick the comba one if available (saves quite a few calls/ifs) */
03006 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
03007      if (((P->used * 2 + 1) < MP_WARRAY) &&
03008           P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
03009         redux = fast_mp_montgomery_reduce;
03010      } else 
03011 #endif
03012      {
03013 #ifdef BN_MP_MONTGOMERY_REDUCE_C
03014         /* use slower baseline Montgomery method */
03015         redux = mp_montgomery_reduce;
03016 #else
03017         err = MP_VAL;
03018         goto LBL_M;
03019 #endif
03020      }
03021   } else if (redmode == 1) {
03022 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
03023      /* setup DR reduction for moduli of the form B**k - b */
03024      mp_dr_setup(P, &mp);
03025      redux = mp_dr_reduce;
03026 #else
03027      err = MP_VAL;
03028      goto LBL_M;
03029 #endif
03030   } else {
03031 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
03032      /* setup DR reduction for moduli of the form 2**k - b */
03033      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
03034         goto LBL_M;
03035      }
03036      redux = mp_reduce_2k;
03037 #else
03038      err = MP_VAL;
03039      goto LBL_M;
03040 #endif
03041   }
03042 
03043   /* setup result */
03044   if ((err = mp_init (&res)) != MP_OKAY) {
03045     goto LBL_M;
03046   }
03047 
03048   /* create M table
03049    *
03050 
03051    *
03052    * The first half of the table is not computed though accept for M[0] and M[1]
03053    */
03054 
03055   if (redmode == 0) {
03056 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
03057      /* now we need R mod m */
03058      if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
03059        goto LBL_RES;
03060      }
03061 #else 
03062      err = MP_VAL;
03063      goto LBL_RES;
03064 #endif
03065 
03066      /* now set M[1] to G * R mod m */
03067      if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
03068        goto LBL_RES;
03069      }
03070   } else {
03071      mp_set(&res, 1);
03072      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
03073         goto LBL_RES;
03074      }
03075   }
03076 
03077   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
03078   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
03079     goto LBL_RES;
03080   }
03081 
03082   for (x = 0; x < (winsize - 1); x++) {
03083     if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
03084       goto LBL_RES;
03085     }
03086     if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
03087       goto LBL_RES;
03088     }
03089   }
03090 
03091   /* create upper table */
03092   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
03093     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
03094       goto LBL_RES;
03095     }
03096     if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
03097       goto LBL_RES;
03098     }
03099   }
03100 
03101   /* set initial mode and bit cnt */
03102   mode   = 0;
03103   bitcnt = 1;
03104   buf    = 0;
03105   digidx = X->used - 1;
03106   bitcpy = 0;
03107   bitbuf = 0;
03108 
03109   for (;;) {
03110     /* grab next digit as required */
03111     if (--bitcnt == 0) {
03112       /* if digidx == -1 we are out of digits so break */
03113       if (digidx == -1) {
03114         break;
03115       }
03116       /* read next digit and reset bitcnt */
03117       buf    = X->dp[digidx--];
03118       bitcnt = (int)DIGIT_BIT;
03119     }
03120 
03121     /* grab the next msb from the exponent */
03122     y     = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
03123     buf <<= (mp_digit)1;
03124 
03125     /* if the bit is zero and mode == 0 then we ignore it
03126      * These represent the leading zero bits before the first 1 bit
03127      * in the exponent.  Technically this opt is not required but it
03128      * does lower the # of trivial squaring/reductions used
03129      */
03130     if (mode == 0 && y == 0) {
03131       continue;
03132     }
03133 
03134     /* if the bit is zero and mode == 1 then we square */
03135     if (mode == 1 && y == 0) {
03136       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03137         goto LBL_RES;
03138       }
03139       if ((err = redux (&res, P, mp)) != MP_OKAY) {
03140         goto LBL_RES;
03141       }
03142       continue;
03143     }
03144 
03145     /* else we add it to the window */
03146     bitbuf |= (y << (winsize - ++bitcpy));
03147     mode    = 2;
03148 
03149     if (bitcpy == winsize) {
03150       /* ok window is filled so square as required and multiply  */
03151       /* square first */
03152       for (x = 0; x < winsize; x++) {
03153         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03154           goto LBL_RES;
03155         }
03156         if ((err = redux (&res, P, mp)) != MP_OKAY) {
03157           goto LBL_RES;
03158         }
03159       }
03160 
03161       /* then multiply */
03162       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
03163         goto LBL_RES;
03164       }
03165       if ((err = redux (&res, P, mp)) != MP_OKAY) {
03166         goto LBL_RES;
03167       }
03168 
03169       /* empty window and reset */
03170       bitcpy = 0;
03171       bitbuf = 0;
03172       mode   = 1;
03173     }
03174   }
03175 
03176   /* if bits remain then square/multiply */
03177   if (mode == 2 && bitcpy > 0) {
03178     /* square then multiply if the bit is set */
03179     for (x = 0; x < bitcpy; x++) {
03180       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
03181         goto LBL_RES;
03182       }
03183       if ((err = redux (&res, P, mp)) != MP_OKAY) {
03184         goto LBL_RES;
03185       }
03186 
03187       /* get next bit of the window */
03188       bitbuf <<= 1;
03189       if ((bitbuf & (1 << winsize)) != 0) {
03190         /* then multiply */
03191         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
03192           goto LBL_RES;
03193         }
03194         if ((err = redux (&res, P, mp)) != MP_OKAY) {
03195           goto LBL_RES;
03196         }
03197       }
03198     }
03199   }
03200 
03201   if (redmode == 0) {
03202      /* fixup result if Montgomery reduction is used
03203       * recall that any value in a Montgomery system is
03204       * actually multiplied by R mod n.  So we have
03205       * to reduce one more time to cancel out the factor
03206       * of R.
03207       */
03208      if ((err = redux(&res, P, mp)) != MP_OKAY) {
03209        goto LBL_RES;
03210      }
03211   }
03212 
03213   /* swap res with Y */
03214   mp_exch (&res, Y);
03215   err = MP_OKAY;
03216 LBL_RES:mp_clear (&res);
03217 LBL_M:
03218   mp_clear(&M[1]);
03219   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
03220     mp_clear (&M[x]);
03221   }
03222   return err;
03223 }
03224 #endif
03225 
03226 
03227 #ifdef BN_FAST_S_MP_SQR_C
03228 /* the jist of squaring...
03229  * you do like mult except the offset of the tmpx [one that 
03230  * starts closer to zero] can't equal the offset of tmpy.  
03231  * So basically you set up iy like before then you min it with
03232  * (ty-tx) so that it never happens.  You double all those 
03233  * you add in the inner loop
03234 
03235 After that loop you do the squares and add them in.
03236 */
03237 
03238 static int fast_s_mp_sqr (mp_int * a, mp_int * b)
03239 {
03240   int       olduse, res, pa, ix, iz;
03241   mp_digit   W[MP_WARRAY], *tmpx;
03242   mp_word   W1;
03243 
03244   /* grow the destination as required */
03245   pa = a->used + a->used;
03246   if (b->alloc < pa) {
03247     if ((res = mp_grow (b, pa)) != MP_OKAY) {
03248       return res;
03249     }
03250   }
03251 
03252   /* number of output digits to produce */
03253   W1 = 0;
03254   for (ix = 0; ix < pa; ix++) { 
03255       int      tx, ty, iy;
03256       mp_word  _W;
03257       mp_digit *tmpy;
03258 
03259       /* clear counter */
03260       _W = 0;
03261 
03262       /* get offsets into the two bignums */
03263       ty = MIN(a->used-1, ix);
03264       tx = ix - ty;
03265 
03266       /* setup temp aliases */
03267       tmpx = a->dp + tx;
03268       tmpy = a->dp + ty;
03269 
03270       /* this is the number of times the loop will iterrate, essentially
03271          while (tx++ < a->used && ty-- >= 0) { ... }
03272        */
03273       iy = MIN(a->used-tx, ty+1);
03274 
03275       /* now for squaring tx can never equal ty 
03276        * we halve the distance since they approach at a rate of 2x
03277        * and we have to round because odd cases need to be executed
03278        */
03279       iy = MIN(iy, (ty-tx+1)>>1);
03280 
03281       /* execute loop */
03282       for (iz = 0; iz < iy; iz++) {
03283          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
03284       }
03285 
03286       /* double the inner product and add carry */
03287       _W = _W + _W + W1;
03288 
03289       /* even columns have the square term in them */
03290       if ((ix&1) == 0) {
03291          _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
03292       }
03293 
03294       /* store it */
03295       W[ix] = (mp_digit)(_W & MP_MASK);
03296 
03297       /* make next carry */
03298       W1 = _W >> ((mp_word)DIGIT_BIT);
03299   }
03300 
03301   /* setup dest */
03302   olduse  = b->used;
03303   b->used = a->used+a->used;
03304 
03305   {
03306     mp_digit *tmpb;
03307     tmpb = b->dp;
03308     for (ix = 0; ix < pa; ix++) {
03309       *tmpb++ = W[ix] & MP_MASK;
03310     }
03311 
03312     /* clear unused digits [that existed in the old copy of c] */
03313     for (; ix < olduse; ix++) {
03314       *tmpb++ = 0;
03315     }
03316   }
03317   mp_clamp (b);
03318   return MP_OKAY;
03319 }
03320 #endif
03321 
03322 
03323 #ifdef BN_MP_MUL_D_C
03324 /* multiply by a digit */
03325 static int
03326 mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
03327 {
03328   mp_digit u, *tmpa, *tmpc;
03329   mp_word  r;
03330   int      ix, res, olduse;
03331 
03332   /* make sure c is big enough to hold a*b */
03333   if (c->alloc < a->used + 1) {
03334     if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
03335       return res;
03336     }
03337   }
03338 
03339   /* get the original destinations used count */
03340   olduse = c->used;
03341 
03342   /* set the sign */
03343   c->sign = a->sign;
03344 
03345   /* alias for a->dp [source] */
03346   tmpa = a->dp;
03347 
03348   /* alias for c->dp [dest] */
03349   tmpc = c->dp;
03350 
03351   /* zero carry */
03352   u = 0;
03353 
03354   /* compute columns */
03355   for (ix = 0; ix < a->used; ix++) {
03356     /* compute product and carry sum for this term */
03357     r       = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
03358 
03359     /* mask off higher bits to get a single digit */
03360     *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
03361 
03362     /* send carry into next iteration */
03363     u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
03364   }
03365 
03366   /* store final carry [if any] and increment ix offset  */
03367   *tmpc++ = u;
03368   ++ix;
03369 
03370   /* now zero digits above the top */
03371   while (ix++ < olduse) {
03372      *tmpc++ = 0;
03373   }
03374 
03375   /* set used count */
03376   c->used = a->used + 1;
03377   mp_clamp(c);
03378 
03379   return MP_OKAY;
03380 }
03381 #endif
03382