Blame mpn/generic/gcd.c

Packit 5c3484
/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
Packit 5c3484
Packit 5c3484
Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012 Free Software
Packit 5c3484
Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
/* Uses the HGCD operation described in
Packit 5c3484
Packit 5c3484
     N. Möller, On Schönhage's algorithm and subquadratic integer gcd
Packit 5c3484
     computation, Math. Comp. 77 (2008), 589-607.
Packit 5c3484
Packit 5c3484
  to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
Packit 5c3484
  then uses Lehmer's algorithm.
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
/* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
Packit 5c3484
 * 2)/3, which gives a balanced multiplication in
Packit 5c3484
 * mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
Packit 5c3484
 * performance. The matrix-vector multiplication is then
Packit 5c3484
 * 4:1-unbalanced, with matrix elements of size n/6, and vector
Packit 5c3484
 * elements of size p = 2n/3. */
Packit 5c3484
Packit 5c3484
/* From analysis of the theoretical running time, it appears that when
Packit 5c3484
 * multiplication takes time O(n^alpha), p should be chosen so that
Packit 5c3484
 * the ratio of the time for the mpn_hgcd call, and the time for the
Packit 5c3484
 * multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
Packit 5c3484
 * 1). */
Packit 5c3484
#ifdef TUNE_GCD_P
Packit 5c3484
#define P_TABLE_SIZE 10000
Packit 5c3484
mp_size_t p_table[P_TABLE_SIZE];
Packit 5c3484
#define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
Packit 5c3484
#else
Packit 5c3484
#define CHOOSE_P(n) (2*(n) / 3)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
struct gcd_ctx
Packit 5c3484
{
Packit 5c3484
  mp_ptr gp;
Packit 5c3484
  mp_size_t gn;
Packit 5c3484
};
Packit 5c3484
Packit 5c3484
static void
Packit 5c3484
gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
Packit 5c3484
	  mp_srcptr qp, mp_size_t qn, int d)
Packit 5c3484
{
Packit 5c3484
  struct gcd_ctx *ctx = (struct gcd_ctx *) p;
Packit 5c3484
  MPN_COPY (ctx->gp, gp, gn);
Packit 5c3484
  ctx->gn = gn;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#if GMP_NAIL_BITS > 0
Packit 5c3484
/* Nail supports should be easy, replacing the sub_ddmmss with nails
Packit 5c3484
 * logic. */
Packit 5c3484
#error Nails not supported.
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
Packit 5c3484
   Both U and V must be odd. */
Packit 5c3484
static inline mp_size_t
Packit 5c3484
gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t u0, u1, v0, v1;
Packit 5c3484
  mp_size_t gn;
Packit 5c3484
Packit 5c3484
  u0 = up[0];
Packit 5c3484
  u1 = up[1];
Packit 5c3484
  v0 = vp[0];
Packit 5c3484
  v1 = vp[1];
Packit 5c3484
Packit 5c3484
  ASSERT (u0 & 1);
Packit 5c3484
  ASSERT (v0 & 1);
Packit 5c3484
Packit 5c3484
  /* Check for u0 != v0 needed to ensure that argument to
Packit 5c3484
   * count_trailing_zeros is non-zero. */
Packit 5c3484
  while (u1 != v1 && u0 != v0)
Packit 5c3484
    {
Packit 5c3484
      unsigned long int r;
Packit 5c3484
      if (u1 > v1)
Packit 5c3484
	{
Packit 5c3484
	  sub_ddmmss (u1, u0, u1, u0, v1, v0);
Packit 5c3484
	  count_trailing_zeros (r, u0);
Packit 5c3484
	  u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
Packit 5c3484
	  u1 >>= r;
Packit 5c3484
	}
Packit 5c3484
      else  /* u1 < v1.  */
Packit 5c3484
	{
Packit 5c3484
	  sub_ddmmss (v1, v0, v1, v0, u1, u0);
Packit 5c3484
	  count_trailing_zeros (r, v0);
Packit 5c3484
	  v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
Packit 5c3484
	  v1 >>= r;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
Packit 5c3484
Packit 5c3484
  /* If U == V == GCD, done.  Otherwise, compute GCD (V, |U - V|).  */
Packit 5c3484
  if (u1 == v1 && u0 == v0)
Packit 5c3484
    return gn;
Packit 5c3484
Packit 5c3484
  v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
Packit 5c3484
  gp[0] = mpn_gcd_1 (gp, gn, v0);
Packit 5c3484
Packit 5c3484
  return 1;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_size_t
Packit 5c3484
mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
Packit 5c3484
{
Packit 5c3484
  mp_size_t talloc;
Packit 5c3484
  mp_size_t scratch;
Packit 5c3484
  mp_size_t matrix_scratch;
Packit 5c3484
Packit 5c3484
  struct gcd_ctx ctx;
Packit 5c3484
  mp_ptr tp;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  ASSERT (usize >= n);
Packit 5c3484
  ASSERT (n > 0);
Packit 5c3484
  ASSERT (vp[n-1] > 0);
Packit 5c3484
Packit 5c3484
  /* FIXME: Check for small sizes first, before setting up temporary
Packit 5c3484
     storage etc. */
Packit 5c3484
  talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
Packit 5c3484
Packit 5c3484
  /* For initial division */
Packit 5c3484
  scratch = usize - n + 1;
Packit 5c3484
  if (scratch > talloc)
Packit 5c3484
    talloc = scratch;
Packit 5c3484
Packit 5c3484
#if TUNE_GCD_P
Packit 5c3484
  if (CHOOSE_P (n) > 0)
Packit 5c3484
#else
Packit 5c3484
  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
Packit 5c3484
#endif
Packit 5c3484
    {
Packit 5c3484
      mp_size_t hgcd_scratch;
Packit 5c3484
      mp_size_t update_scratch;
Packit 5c3484
      mp_size_t p = CHOOSE_P (n);
Packit 5c3484
      mp_size_t scratch;
Packit 5c3484
#if TUNE_GCD_P
Packit 5c3484
      /* Worst case, since we don't guarantee that n - CHOOSE_P(n)
Packit 5c3484
	 is increasing */
Packit 5c3484
      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
Packit 5c3484
      hgcd_scratch = mpn_hgcd_itch (n);
Packit 5c3484
      update_scratch = 2*(n - 1);
Packit 5c3484
#else
Packit 5c3484
      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
Packit 5c3484
      hgcd_scratch = mpn_hgcd_itch (n - p);
Packit 5c3484
      update_scratch = p + n - 1;
Packit 5c3484
#endif
Packit 5c3484
      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
Packit 5c3484
      if (scratch > talloc)
Packit 5c3484
	talloc = scratch;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
  tp = TMP_ALLOC_LIMBS(talloc);
Packit 5c3484
Packit 5c3484
  if (usize > n)
Packit 5c3484
    {
Packit 5c3484
      mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
Packit 5c3484
Packit 5c3484
      if (mpn_zero_p (up, n))
Packit 5c3484
	{
Packit 5c3484
	  MPN_COPY (gp, vp, n);
Packit 5c3484
	  ctx.gn = n;
Packit 5c3484
	  goto done;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  ctx.gp = gp;
Packit 5c3484
Packit 5c3484
#if TUNE_GCD_P
Packit 5c3484
  while (CHOOSE_P (n) > 0)
Packit 5c3484
#else
Packit 5c3484
  while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
Packit 5c3484
#endif
Packit 5c3484
    {
Packit 5c3484
      struct hgcd_matrix M;
Packit 5c3484
      mp_size_t p = CHOOSE_P (n);
Packit 5c3484
      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
Packit 5c3484
      mp_size_t nn;
Packit 5c3484
      mpn_hgcd_matrix_init (&M, n - p, tp);
Packit 5c3484
      nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
Packit 5c3484
      if (nn > 0)
Packit 5c3484
	{
Packit 5c3484
	  ASSERT (M.n <= (n - p - 1)/2);
Packit 5c3484
	  ASSERT (M.n + p <= (p + n - 1) / 2);
Packit 5c3484
	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
Packit 5c3484
	  n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  /* Temporary storage n */
Packit 5c3484
	  n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
Packit 5c3484
	  if (n == 0)
Packit 5c3484
	    goto done;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  while (n > 2)
Packit 5c3484
    {
Packit 5c3484
      struct hgcd_matrix1 M;
Packit 5c3484
      mp_limb_t uh, ul, vh, vl;
Packit 5c3484
      mp_limb_t mask;
Packit 5c3484
Packit 5c3484
      mask = up[n-1] | vp[n-1];
Packit 5c3484
      ASSERT (mask > 0);
Packit 5c3484
Packit 5c3484
      if (mask & GMP_NUMB_HIGHBIT)
Packit 5c3484
	{
Packit 5c3484
	  uh = up[n-1]; ul = up[n-2];
Packit 5c3484
	  vh = vp[n-1]; vl = vp[n-2];
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  int shift;
Packit 5c3484
Packit 5c3484
	  count_leading_zeros (shift, mask);
Packit 5c3484
	  uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
Packit 5c3484
	  ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
Packit 5c3484
	  vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
Packit 5c3484
	  vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      /* Try an mpn_hgcd2 step */
Packit 5c3484
      if (mpn_hgcd2 (uh, ul, vh, vl, &M))
Packit 5c3484
	{
Packit 5c3484
	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
Packit 5c3484
	  MP_PTR_SWAP (up, tp);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  /* mpn_hgcd2 has failed. Then either one of a or b is very
Packit 5c3484
	     small, or the difference is very small. Perform one
Packit 5c3484
	     subtraction followed by one division. */
Packit 5c3484
Packit 5c3484
	  /* Temporary storage n */
Packit 5c3484
	  n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
Packit 5c3484
	  if (n == 0)
Packit 5c3484
	    goto done;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  ASSERT(up[n-1] | vp[n-1]);
Packit 5c3484
Packit 5c3484
  if (n == 1)
Packit 5c3484
    {
Packit 5c3484
      *gp = mpn_gcd_1(up, 1, vp[0]);
Packit 5c3484
      ctx.gn = 1;
Packit 5c3484
      goto done;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* Due to the calling convention for mpn_gcd, at most one can be
Packit 5c3484
     even. */
Packit 5c3484
Packit 5c3484
  if (! (up[0] & 1))
Packit 5c3484
    MP_PTR_SWAP (up, vp);
Packit 5c3484
Packit 5c3484
  ASSERT (up[0] & 1);
Packit 5c3484
Packit 5c3484
  if (vp[0] == 0)
Packit 5c3484
    {
Packit 5c3484
      *gp = mpn_gcd_1 (up, 2, vp[1]);
Packit 5c3484
      ctx.gn = 1;
Packit 5c3484
      goto done;
Packit 5c3484
    }
Packit 5c3484
  else if (! (vp[0] & 1))
Packit 5c3484
    {
Packit 5c3484
      int r;
Packit 5c3484
      count_trailing_zeros (r, vp[0]);
Packit 5c3484
      vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);
Packit 5c3484
      vp[1] >>= r;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  ctx.gn = gcd_2(gp, up, vp);
Packit 5c3484
Packit 5c3484
done:
Packit 5c3484
  TMP_FREE;
Packit 5c3484
  return ctx.gn;
Packit 5c3484
}