Blame mpn/generic/sec_powm.c

Packit 5c3484
/* mpn_sec_powm -- Compute R = U^E mod M.  Secure variant, side-channel silent
Packit 5c3484
   under the assumption that the multiply instruction is side channel silent.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Torbjörn Granlund.
Packit 5c3484
Packit 5c3484
Copyright 2007-2009, 2011-2014 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
Packit 5c3484
/*
Packit 5c3484
  BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
Packit 5c3484
Packit 5c3484
  1. T <- (B^n * U) mod M                Convert to REDC form
Packit 5c3484
Packit 5c3484
  2. Compute table U^0, U^1, U^2... of E-dependent size
Packit 5c3484
Packit 5c3484
  3. While there are more bits in E
Packit 5c3484
       W <- power left-to-right base-k
Packit 5c3484
Packit 5c3484
Packit 5c3484
  TODO:
Packit 5c3484
Packit 5c3484
   * Make getbits a macro, thereby allowing it to update the index operand.
Packit 5c3484
     That will simplify the code using getbits.  (Perhaps make getbits' sibling
Packit 5c3484
     getbit then have similar form, for symmetry.)
Packit 5c3484
Packit 5c3484
   * Choose window size without looping.  (Superoptimize or think(tm).)
Packit 5c3484
Packit 5c3484
   * REDC_1_TO_REDC_2_THRESHOLD might actually represent the cutoff between
Packit 5c3484
     redc_1 and redc_n.  On such systems, we will switch to redc_2 causing
Packit 5c3484
     slowdown.
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
#undef MPN_REDC_1_SEC
Packit 5c3484
#define MPN_REDC_1_SEC(rp, up, mp, n, invm)				\
Packit 5c3484
  do {									\
Packit 5c3484
    mp_limb_t cy;							\
Packit 5c3484
    cy = mpn_redc_1 (rp, up, mp, n, invm);				\
Packit 5c3484
    mpn_cnd_sub_n (cy, rp, rp, mp, n);					\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#undef MPN_REDC_2_SEC
Packit 5c3484
#define MPN_REDC_2_SEC(rp, up, mp, n, mip)				\
Packit 5c3484
  do {									\
Packit 5c3484
    mp_limb_t cy;							\
Packit 5c3484
    cy = mpn_redc_2 (rp, up, mp, n, mip);				\
Packit 5c3484
    mpn_cnd_sub_n (cy, rp, rp, mp, n);					\
Packit 5c3484
  } while (0)
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
Packit 5c3484
#define WANT_REDC_2 1
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Define our own mpn squaring function.  We do this since we cannot use a
Packit 5c3484
   native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
Packit 5c3484
   SQR_TOOM2_THRESHOLD.  This is so because of fixed size stack allocations
Packit 5c3484
   made inside mpn_sqr_basecase.  */
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_sqr_diagonal
Packit 5c3484
#define MPN_SQR_DIAGONAL(rp, up, n)					\
Packit 5c3484
  mpn_sqr_diagonal (rp, up, n)
Packit 5c3484
#else
Packit 5c3484
#define MPN_SQR_DIAGONAL(rp, up, n)					\
Packit 5c3484
  do {									\
Packit 5c3484
    mp_size_t _i;							\
Packit 5c3484
    for (_i = 0; _i < (n); _i++)					\
Packit 5c3484
      {									\
Packit 5c3484
	mp_limb_t ul, lpl;						\
Packit 5c3484
	ul = (up)[_i];							\
Packit 5c3484
	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
Packit 5c3484
	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
Packit 5c3484
      }									\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
#if ! HAVE_NATIVE_mpn_sqr_basecase
Packit 5c3484
/* The limit of the generic code is SQR_TOOM2_THRESHOLD.  */
Packit 5c3484
#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_sqr_basecase
Packit 5c3484
#ifdef TUNE_SQR_TOOM2_MAX
Packit 5c3484
/* We slightly abuse TUNE_SQR_TOOM2_MAX here.  If it is set for an assembly
Packit 5c3484
   mpn_sqr_basecase, it comes from SQR_TOOM2_THRESHOLD_MAX in the assembly
Packit 5c3484
   file.  An assembly mpn_sqr_basecase that does not define it should allow
Packit 5c3484
   any size.  */
Packit 5c3484
#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#ifdef WANT_FAT_BINARY
Packit 5c3484
/* For fat builds, we use SQR_TOOM2_THRESHOLD which will expand to a read from
Packit 5c3484
   __gmpn_cpuvec.  Perhaps any possible sqr_basecase.asm allow any size, and we
Packit 5c3484
   limit the use unnecessarily.  We cannot tell, so play it safe.  FIXME.  */
Packit 5c3484
#define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#ifndef SQR_BASECASE_LIM
Packit 5c3484
/* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
Packit 5c3484
   size.  */
Packit 5c3484
#define mpn_local_sqr(rp,up,n,tp) mpn_sqr_basecase(rp,up,n)
Packit 5c3484
#else
Packit 5c3484
/* Else use mpn_sqr_basecase for its allowed sizes, else mpn_mul_basecase.  */
Packit 5c3484
#define mpn_local_sqr(rp,up,n,tp) \
Packit 5c3484
  do {									\
Packit 5c3484
    if (BELOW_THRESHOLD (n, SQR_BASECASE_LIM))				\
Packit 5c3484
      mpn_sqr_basecase (rp, up, n);					\
Packit 5c3484
    else								\
Packit 5c3484
      mpn_mul_basecase(rp, up, n, up, n);				\
Packit 5c3484
  } while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#define getbit(p,bi) \
Packit 5c3484
  ((p[(bi - 1) / GMP_NUMB_BITS] >> (bi - 1) % GMP_NUMB_BITS) & 1)
Packit 5c3484
Packit 5c3484
/* FIXME: Maybe some things would get simpler if all callers ensure
Packit 5c3484
   that bi >= nbits. As far as I understand, with the current code bi
Packit 5c3484
   < nbits can happen only for the final iteration. */
Packit 5c3484
static inline mp_limb_t
Packit 5c3484
getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
Packit 5c3484
{
Packit 5c3484
  int nbits_in_r;
Packit 5c3484
  mp_limb_t r;
Packit 5c3484
  mp_size_t i;
Packit 5c3484
Packit 5c3484
  if (bi < nbits)
Packit 5c3484
    {
Packit 5c3484
      return p[0] & (((mp_limb_t) 1 << bi) - 1);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      bi -= nbits;			/* bit index of low bit to extract */
Packit 5c3484
      i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
Packit 5c3484
      bi %= GMP_NUMB_BITS;		/* bit index in low word */
Packit 5c3484
      r = p[i] >> bi;			/* extract (low) bits */
Packit 5c3484
      nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
Packit 5c3484
      if (nbits_in_r < nbits)		/* did we get enough bits? */
Packit 5c3484
	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
Packit 5c3484
      return r & (((mp_limb_t ) 1 << nbits) - 1);
Packit 5c3484
    }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#ifndef POWM_SEC_TABLE
Packit 5c3484
#if GMP_NUMB_BITS < 50
Packit 5c3484
#define POWM_SEC_TABLE  2,33,96,780,2741
Packit 5c3484
#else
Packit 5c3484
#define POWM_SEC_TABLE  2,130,524,2578
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if TUNE_PROGRAM_BUILD
Packit 5c3484
extern int win_size (mp_bitcnt_t);
Packit 5c3484
#else
Packit 5c3484
static inline int
Packit 5c3484
win_size (mp_bitcnt_t enb)
Packit 5c3484
{
Packit 5c3484
  int k;
Packit 5c3484
  /* Find k, such that x[k-1] < enb <= x[k].
Packit 5c3484
Packit 5c3484
     We require that x[k] >= k, then it follows that enb > x[k-1] >=
Packit 5c3484
     k-1, which implies k <= enb.
Packit 5c3484
  */
Packit 5c3484
  static const mp_bitcnt_t x[] = {0,POWM_SEC_TABLE,~(mp_bitcnt_t)0};
Packit 5c3484
  for (k = 1; enb > x[k]; k++)
Packit 5c3484
    ;
Packit 5c3484
  ASSERT (k <= enb);
Packit 5c3484
  return k;
Packit 5c3484
}
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Convert U to REDC form, U_r = B^n * U mod M.
Packit 5c3484
   Uses scratch space at tp of size 2un + n + 1.  */
Packit 5c3484
static void
Packit 5c3484
redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
Packit 5c3484
{
Packit 5c3484
  MPN_ZERO (tp, n);
Packit 5c3484
  MPN_COPY (tp + n, up, un);
Packit 5c3484
Packit 5c3484
  mpn_sec_div_r (tp, un + n, mp, n, tp + un + n);
Packit 5c3484
  MPN_COPY (rp, tp, n);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* {rp, n} <-- {bp, bn} ^ {ep, en} mod {mp, n},
Packit 5c3484
   where en = ceil (enb / GMP_NUMB_BITS)
Packit 5c3484
   Requires that {mp, n} is odd (and hence also mp[0] odd).
Packit 5c3484
   Uses scratch space at tp as defined by mpn_sec_powm_itch.  */
Packit 5c3484
void
Packit 5c3484
mpn_sec_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
Packit 5c3484
	      mp_srcptr ep, mp_bitcnt_t enb,
Packit 5c3484
	      mp_srcptr mp, mp_size_t n, mp_ptr tp)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t ip[2], *mip;
Packit 5c3484
  int windowsize, this_windowsize;
Packit 5c3484
  mp_limb_t expbits;
Packit 5c3484
  mp_ptr pp, this_pp;
Packit 5c3484
  long i;
Packit 5c3484
  int cnd;
Packit 5c3484
Packit 5c3484
  ASSERT (enb > 0);
Packit 5c3484
  ASSERT (n > 0);
Packit 5c3484
  /* The code works for bn = 0, but the defined scratch space is 2 limbs
Packit 5c3484
     greater than we supply, when converting 1 to redc form .  */
Packit 5c3484
  ASSERT (bn > 0);
Packit 5c3484
  ASSERT ((mp[0] & 1) != 0);
Packit 5c3484
Packit 5c3484
  windowsize = win_size (enb);
Packit 5c3484
Packit 5c3484
#if WANT_REDC_2
Packit 5c3484
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      mip = ip;
Packit 5c3484
      binvert_limb (mip[0], mp[0]);
Packit 5c3484
      mip[0] = -mip[0];
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mip = ip;
Packit 5c3484
      mpn_binvert (mip, mp, 2, tp);
Packit 5c3484
      mip[0] = -mip[0]; mip[1] = ~mip[1];
Packit 5c3484
    }
Packit 5c3484
#else
Packit 5c3484
  mip = ip;
Packit 5c3484
  binvert_limb (mip[0], mp[0]);
Packit 5c3484
  mip[0] = -mip[0];
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  pp = tp;
Packit 5c3484
  tp += (n << windowsize);	/* put tp after power table */
Packit 5c3484
Packit 5c3484
  /* Compute pp[0] table entry */
Packit 5c3484
  /* scratch: |   n   | 1 |   n+2    |  */
Packit 5c3484
  /*          | pp[0] | 1 | redcify  |  */
Packit 5c3484
  this_pp = pp;
Packit 5c3484
  this_pp[n] = 1;
Packit 5c3484
  redcify (this_pp, this_pp + n, 1, mp, n, this_pp + n + 1);
Packit 5c3484
  this_pp += n;
Packit 5c3484
Packit 5c3484
  /* Compute pp[1] table entry.  To avoid excessive scratch usage in the
Packit 5c3484
     degenerate situation where B >> M, we let redcify use scratch space which
Packit 5c3484
     will later be used by the pp table (element 2 and up).  */
Packit 5c3484
  /* scratch: |   n   |   n   |  bn + n + 1  |  */
Packit 5c3484
  /*          | pp[0] | pp[1] |   redcify    |  */
Packit 5c3484
  redcify (this_pp, bp, bn, mp, n, this_pp + n);
Packit 5c3484
Packit 5c3484
  /* Precompute powers of b and put them in the temporary area at pp.  */
Packit 5c3484
  /* scratch: |   n   |   n   | ...  |                    |   2n      |  */
Packit 5c3484
  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  product  |  */
Packit 5c3484
  for (i = (1 << windowsize) - 2; i > 0; i--)
Packit 5c3484
    {
Packit 5c3484
      mpn_mul_basecase (tp, this_pp, n, pp + n, n);
Packit 5c3484
      this_pp += n;
Packit 5c3484
#if WANT_REDC_2
Packit 5c3484
      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
Packit 5c3484
	MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
Packit 5c3484
      else
Packit 5c3484
	MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
Packit 5c3484
#else
Packit 5c3484
      MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
Packit 5c3484
#endif
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  expbits = getbits (ep, enb, windowsize);
Packit 5c3484
  ASSERT_ALWAYS (enb >= windowsize);
Packit 5c3484
  enb -= windowsize;
Packit 5c3484
Packit 5c3484
  mpn_sec_tabselect (rp, pp, n, 1 << windowsize, expbits);
Packit 5c3484
Packit 5c3484
  /* Main exponentiation loop.  */
Packit 5c3484
  /* scratch: |   n   |   n   | ...  |                    |     3n-4n     |  */
Packit 5c3484
  /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  loop scratch |  */
Packit 5c3484
Packit 5c3484
#define INNERLOOP							\
Packit 5c3484
  while (enb != 0)							\
Packit 5c3484
    {									\
Packit 5c3484
      expbits = getbits (ep, enb, windowsize);				\
Packit 5c3484
      this_windowsize = windowsize;					\
Packit 5c3484
      if (enb < windowsize)						\
Packit 5c3484
	{								\
Packit 5c3484
	  this_windowsize -= windowsize - enb;				\
Packit 5c3484
	  enb = 0;							\
Packit 5c3484
	}								\
Packit 5c3484
      else								\
Packit 5c3484
	enb -= windowsize;						\
Packit 5c3484
									\
Packit 5c3484
      do								\
Packit 5c3484
	{								\
Packit 5c3484
	  mpn_local_sqr (tp, rp, n, tp + 2 * n);			\
Packit 5c3484
	  MPN_REDUCE (rp, tp, mp, n, mip);				\
Packit 5c3484
	  this_windowsize--;						\
Packit 5c3484
	}								\
Packit 5c3484
      while (this_windowsize != 0);					\
Packit 5c3484
									\
Packit 5c3484
      mpn_sec_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);	\
Packit 5c3484
      mpn_mul_basecase (tp, rp, n, tp + 2*n, n);			\
Packit 5c3484
									\
Packit 5c3484
      MPN_REDUCE (rp, tp, mp, n, mip);					\
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
#if WANT_REDC_2
Packit 5c3484
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
Packit 5c3484
      INNERLOOP;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2_SEC (rp, tp, mp, n, mip)
Packit 5c3484
      INNERLOOP;
Packit 5c3484
    }
Packit 5c3484
#else
Packit 5c3484
#undef MPN_MUL_N
Packit 5c3484
#undef MPN_SQR
Packit 5c3484
#undef MPN_REDUCE
Packit 5c3484
#define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
Packit 5c3484
#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
Packit 5c3484
#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
Packit 5c3484
  INNERLOOP;
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  MPN_COPY (tp, rp, n);
Packit 5c3484
  MPN_ZERO (tp + n, n);
Packit 5c3484
Packit 5c3484
#if WANT_REDC_2
Packit 5c3484
  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
Packit 5c3484
    MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
Packit 5c3484
  else
Packit 5c3484
    MPN_REDC_2_SEC (rp, tp, mp, n, mip);
Packit 5c3484
#else
Packit 5c3484
  MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
Packit 5c3484
#endif
Packit 5c3484
  cnd = mpn_sub_n (tp, rp, mp, n);	/* we need just retval */
Packit 5c3484
  mpn_cnd_sub_n (!cnd, rp, rp, mp, n);
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_size_t
Packit 5c3484
mpn_sec_powm_itch (mp_size_t bn, mp_bitcnt_t enb, mp_size_t n)
Packit 5c3484
{
Packit 5c3484
  int windowsize;
Packit 5c3484
  mp_size_t redcify_itch, itch;
Packit 5c3484
Packit 5c3484
  /* The top scratch usage will either be when reducing B in the 2nd redcify
Packit 5c3484
     call, or more typically n*2^windowsize + 3n or 4n, in the main loop.  (It
Packit 5c3484
     is 3n or 4n depending on if we use mpn_local_sqr or a native
Packit 5c3484
     mpn_sqr_basecase.  We assume 4n always for now.) */
Packit 5c3484
Packit 5c3484
  windowsize = win_size (enb);
Packit 5c3484
Packit 5c3484
  /* The 2n term is due to pp[0] and pp[1] at the time of the 2nd redcify call,
Packit 5c3484
     the (bn + n) term is due to redcify's own usage, and the rest is due to
Packit 5c3484
     mpn_sec_div_r's usage when called from redcify.  */
Packit 5c3484
  redcify_itch = (2 * n) + (bn + n) + ((bn + n) + 2 * n + 2);
Packit 5c3484
Packit 5c3484
  /* The n * 2^windowsize term is due to the power table, the 4n term is due to
Packit 5c3484
     scratch needs of squaring/multiplication in the exponentiation loop.  */
Packit 5c3484
  itch = (n << windowsize) + (4 * n);
Packit 5c3484
Packit 5c3484
  return MAX (itch, redcify_itch);
Packit 5c3484
}