Blame mpn/generic/gcdext_lehmer.c

Packit 5c3484
/* mpn_gcdext -- Extended Greatest Common Divisor.
Packit 5c3484
Packit 5c3484
Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
Packit 5c3484
Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
/* Here, d is the index of the cofactor to update. FIXME: Could use qn
Packit 5c3484
   = 0 for the common case q = 1. */
Packit 5c3484
void
Packit 5c3484
mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
Packit 5c3484
		 mp_srcptr qp, mp_size_t qn, int d)
Packit 5c3484
{
Packit 5c3484
  struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
Packit 5c3484
  mp_size_t un = ctx->un;
Packit 5c3484
Packit 5c3484
  if (gp)
Packit 5c3484
    {
Packit 5c3484
      mp_srcptr up;
Packit 5c3484
Packit 5c3484
      ASSERT (gn > 0);
Packit 5c3484
      ASSERT (gp[gn-1] > 0);
Packit 5c3484
Packit 5c3484
      MPN_COPY (ctx->gp, gp, gn);
Packit 5c3484
      ctx->gn = gn;
Packit 5c3484
Packit 5c3484
      if (d < 0)
Packit 5c3484
	{
Packit 5c3484
	  int c;
Packit 5c3484
Packit 5c3484
	  /* Must return the smallest cofactor, +u1 or -u0 */
Packit 5c3484
	  MPN_CMP (c, ctx->u0, ctx->u1, un);
Packit 5c3484
	  ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
Packit 5c3484
Packit 5c3484
	  d = c < 0;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      up = d ? ctx->u0 : ctx->u1;
Packit 5c3484
Packit 5c3484
      MPN_NORMALIZE (up, un);
Packit 5c3484
      MPN_COPY (ctx->up, up, un);
Packit 5c3484
Packit 5c3484
      *ctx->usize = d ? -un : un;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t cy;
Packit 5c3484
      mp_ptr u0 = ctx->u0;
Packit 5c3484
      mp_ptr u1 = ctx->u1;
Packit 5c3484
Packit 5c3484
      ASSERT (d >= 0);
Packit 5c3484
Packit 5c3484
      if (d)
Packit 5c3484
	MP_PTR_SWAP (u0, u1);
Packit 5c3484
Packit 5c3484
      qn -= (qp[qn-1] == 0);
Packit 5c3484
Packit 5c3484
      /* Update u0 += q  * u1 */
Packit 5c3484
      if (qn == 1)
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t q = qp[0];
Packit 5c3484
Packit 5c3484
	  if (q == 1)
Packit 5c3484
	    /* A common case. */
Packit 5c3484
	    cy = mpn_add_n (u0, u0, u1, un);
Packit 5c3484
	  else
Packit 5c3484
	    cy = mpn_addmul_1 (u0, u1, un, q);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  mp_size_t u1n;
Packit 5c3484
	  mp_ptr tp;
Packit 5c3484
Packit 5c3484
	  u1n = un;
Packit 5c3484
	  MPN_NORMALIZE (u1, u1n);
Packit 5c3484
Packit 5c3484
	  if (u1n == 0)
Packit 5c3484
	    return;
Packit 5c3484
Packit 5c3484
	  /* Should always have u1n == un here, and u1 >= u0. The
Packit 5c3484
	     reason is that we alternate adding u0 to u1 and u1 to u0
Packit 5c3484
	     (corresponding to subtractions a - b and b - a), and we
Packit 5c3484
	     can get a large quotient only just after a switch, which
Packit 5c3484
	     means that we'll add (a multiple of) the larger u to the
Packit 5c3484
	     smaller. */
Packit 5c3484
Packit 5c3484
	  tp = ctx->tp;
Packit 5c3484
Packit 5c3484
	  if (qn > u1n)
Packit 5c3484
	    mpn_mul (tp, qp, qn, u1, u1n);
Packit 5c3484
	  else
Packit 5c3484
	    mpn_mul (tp, u1, u1n, qp, qn);
Packit 5c3484
Packit 5c3484
	  u1n += qn;
Packit 5c3484
	  u1n -= tp[u1n-1] == 0;
Packit 5c3484
Packit 5c3484
	  if (u1n >= un)
Packit 5c3484
	    {
Packit 5c3484
	      cy = mpn_add (u0, tp, u1n, u0, un);
Packit 5c3484
	      un = u1n;
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    /* Note: Unlikely case, maybe never happens? */
Packit 5c3484
	    cy = mpn_add (u0, u0, un, tp, u1n);
Packit 5c3484
Packit 5c3484
	}
Packit 5c3484
      u0[un] = cy;
Packit 5c3484
      ctx->un = un + (cy > 0);
Packit 5c3484
    }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
Packit 5c3484
   the matrix-vector multiplication adjusting a, b. If hgcd fails, we
Packit 5c3484
   need at most n for the quotient and n+1 for the u update (reusing
Packit 5c3484
   the extra u). In all, 4n + 3. */
Packit 5c3484
Packit 5c3484
mp_size_t
Packit 5c3484
mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
Packit 5c3484
		     mp_ptr ap, mp_ptr bp, mp_size_t n,
Packit 5c3484
		     mp_ptr tp)
Packit 5c3484
{
Packit 5c3484
  mp_size_t ualloc = n + 1;
Packit 5c3484
Packit 5c3484
  /* Keeps track of the second row of the reduction matrix
Packit 5c3484
   *
Packit 5c3484
   *   M = (v0, v1 ; u0, u1)
Packit 5c3484
   *
Packit 5c3484
   * which correspond to the first column of the inverse
Packit 5c3484
   *
Packit 5c3484
   *   M^{-1} = (u1, -v1; -u0, v0)
Packit 5c3484
   *
Packit 5c3484
   * This implies that
Packit 5c3484
   *
Packit 5c3484
   *   a =  u1 A (mod B)
Packit 5c3484
   *   b = -u0 A (mod B)
Packit 5c3484
   *
Packit 5c3484
   * where A, B denotes the input values.
Packit 5c3484
   */
Packit 5c3484
Packit 5c3484
  struct gcdext_ctx ctx;
Packit 5c3484
  mp_size_t un;
Packit 5c3484
  mp_ptr u0;
Packit 5c3484
  mp_ptr u1;
Packit 5c3484
  mp_ptr u2;
Packit 5c3484
Packit 5c3484
  MPN_ZERO (tp, 3*ualloc);
Packit 5c3484
  u0 = tp; tp += ualloc;
Packit 5c3484
  u1 = tp; tp += ualloc;
Packit 5c3484
  u2 = tp; tp += ualloc;
Packit 5c3484
Packit 5c3484
  u1[0] = 1; un = 1;
Packit 5c3484
Packit 5c3484
  ctx.gp = gp;
Packit 5c3484
  ctx.up = up;
Packit 5c3484
  ctx.usize = usize;
Packit 5c3484
Packit 5c3484
  /* FIXME: Handle n == 2 differently, after the loop? */
Packit 5c3484
  while (n >= 2)
Packit 5c3484
    {
Packit 5c3484
      struct hgcd_matrix1 M;
Packit 5c3484
      mp_limb_t ah, al, bh, bl;
Packit 5c3484
      mp_limb_t mask;
Packit 5c3484
Packit 5c3484
      mask = ap[n-1] | bp[n-1];
Packit 5c3484
      ASSERT (mask > 0);
Packit 5c3484
Packit 5c3484
      if (mask & GMP_NUMB_HIGHBIT)
Packit 5c3484
	{
Packit 5c3484
	  ah = ap[n-1]; al = ap[n-2];
Packit 5c3484
	  bh = bp[n-1]; bl = bp[n-2];
Packit 5c3484
	}
Packit 5c3484
      else if (n == 2)
Packit 5c3484
	{
Packit 5c3484
	  /* We use the full inputs without truncation, so we can
Packit 5c3484
	     safely shift left. */
Packit 5c3484
	  int shift;
Packit 5c3484
Packit 5c3484
	  count_leading_zeros (shift, mask);
Packit 5c3484
	  ah = MPN_EXTRACT_NUMB (shift, ap[1], ap[0]);
Packit 5c3484
	  al = ap[0] << shift;
Packit 5c3484
	  bh = MPN_EXTRACT_NUMB (shift, bp[1], bp[0]);
Packit 5c3484
	  bl = bp[0] << shift;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  int shift;
Packit 5c3484
Packit 5c3484
	  count_leading_zeros (shift, mask);
Packit 5c3484
	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
Packit 5c3484
	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
Packit 5c3484
	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
Packit 5c3484
	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      /* Try an mpn_nhgcd2 step */
Packit 5c3484
      if (mpn_hgcd2 (ah, al, bh, bl, &M))
Packit 5c3484
	{
Packit 5c3484
	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
Packit 5c3484
	  MP_PTR_SWAP (ap, tp);
Packit 5c3484
	  un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
Packit 5c3484
	  MP_PTR_SWAP (u0, u2);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  /* mpn_hgcd2 has failed. Then either one of a or b is very
Packit 5c3484
	     small, or the difference is very small. Perform one
Packit 5c3484
	     subtraction followed by one division. */
Packit 5c3484
	  ctx.u0 = u0;
Packit 5c3484
	  ctx.u1 = u1;
Packit 5c3484
	  ctx.tp = u2;
Packit 5c3484
	  ctx.un = un;
Packit 5c3484
Packit 5c3484
	  /* Temporary storage n for the quotient and ualloc for the
Packit 5c3484
	     new cofactor. */
Packit 5c3484
	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
Packit 5c3484
	  if (n == 0)
Packit 5c3484
	    return ctx.gn;
Packit 5c3484
Packit 5c3484
	  un = ctx.un;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  ASSERT_ALWAYS (ap[0] > 0);
Packit 5c3484
  ASSERT_ALWAYS (bp[0] > 0);
Packit 5c3484
Packit 5c3484
  if (ap[0] == bp[0])
Packit 5c3484
    {
Packit 5c3484
      int c;
Packit 5c3484
Packit 5c3484
      /* Which cofactor to return now? Candidates are +u1 and -u0,
Packit 5c3484
	 depending on which of a and b was most recently reduced,
Packit 5c3484
	 which we don't keep track of. So compare and get the smallest
Packit 5c3484
	 one. */
Packit 5c3484
Packit 5c3484
      gp[0] = ap[0];
Packit 5c3484
Packit 5c3484
      MPN_CMP (c, u0, u1, un);
Packit 5c3484
      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
Packit 5c3484
      if (c < 0)
Packit 5c3484
	{
Packit 5c3484
	  MPN_NORMALIZE (u0, un);
Packit 5c3484
	  MPN_COPY (up, u0, un);
Packit 5c3484
	  *usize = -un;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  MPN_NORMALIZE_NOT_ZERO (u1, un);
Packit 5c3484
	  MPN_COPY (up, u1, un);
Packit 5c3484
	  *usize = un;
Packit 5c3484
	}
Packit 5c3484
      return 1;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t uh, vh;
Packit 5c3484
      mp_limb_signed_t u;
Packit 5c3484
      mp_limb_signed_t v;
Packit 5c3484
      int negate;
Packit 5c3484
Packit 5c3484
      gp[0] = mpn_gcdext_1 (&u, &v, ap[0], bp[0]);
Packit 5c3484
Packit 5c3484
      /* Set up = u u1 - v u0. Keep track of size, un grows by one or
Packit 5c3484
	 two limbs. */
Packit 5c3484
Packit 5c3484
      if (u == 0)
Packit 5c3484
	{
Packit 5c3484
	  ASSERT (v == 1);
Packit 5c3484
	  MPN_NORMALIZE (u0, un);
Packit 5c3484
	  MPN_COPY (up, u0, un);
Packit 5c3484
	  *usize = -un;
Packit 5c3484
	  return 1;
Packit 5c3484
	}
Packit 5c3484
      else if (v == 0)
Packit 5c3484
	{
Packit 5c3484
	  ASSERT (u == 1);
Packit 5c3484
	  MPN_NORMALIZE (u1, un);
Packit 5c3484
	  MPN_COPY (up, u1, un);
Packit 5c3484
	  *usize = un;
Packit 5c3484
	  return 1;
Packit 5c3484
	}
Packit 5c3484
      else if (u > 0)
Packit 5c3484
	{
Packit 5c3484
	  negate = 0;
Packit 5c3484
	  ASSERT (v < 0);
Packit 5c3484
	  v = -v;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  negate = 1;
Packit 5c3484
	  ASSERT (v > 0);
Packit 5c3484
	  u = -u;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      uh = mpn_mul_1 (up, u1, un, u);
Packit 5c3484
      vh = mpn_addmul_1 (up, u0, un, v);
Packit 5c3484
Packit 5c3484
      if ( (uh | vh) > 0)
Packit 5c3484
	{
Packit 5c3484
	  uh += vh;
Packit 5c3484
	  up[un++] = uh;
Packit 5c3484
	  if (uh < vh)
Packit 5c3484
	    up[un++] = 1;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      MPN_NORMALIZE_NOT_ZERO (up, un);
Packit 5c3484
Packit 5c3484
      *usize = negate ? -un : un;
Packit 5c3484
      return 1;
Packit 5c3484
    }
Packit 5c3484
}