Blame mpn/generic/gcdext.c

Packit 5c3484
/* mpn_gcdext -- Extended Greatest Common Divisor.
Packit 5c3484
Packit 5c3484
Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
Packit 5c3484
Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
/* Computes (r;b) = (a; b) M. Result is of size n + M->n +/- 1, and
Packit 5c3484
   the size is returned (if inputs are non-normalized, result may be
Packit 5c3484
   non-normalized too). Temporary space needed is M->n + n.
Packit 5c3484
 */
Packit 5c3484
static size_t
Packit 5c3484
hgcd_mul_matrix_vector (struct hgcd_matrix *M,
Packit 5c3484
			mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t ah, bh;
Packit 5c3484
Packit 5c3484
  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
Packit 5c3484
Packit 5c3484
     t  = u00 * a
Packit 5c3484
     r  = u10 * b
Packit 5c3484
     r += t;
Packit 5c3484
Packit 5c3484
     t  = u11 * b
Packit 5c3484
     b  = u01 * a
Packit 5c3484
     b += t;
Packit 5c3484
  */
Packit 5c3484
Packit 5c3484
  if (M->n >= n)
Packit 5c3484
    {
Packit 5c3484
      mpn_mul (tp, M->p[0][0], M->n, ap, n);
Packit 5c3484
      mpn_mul (rp, M->p[1][0], M->n, bp, n);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mpn_mul (tp, ap, n, M->p[0][0], M->n);
Packit 5c3484
      mpn_mul (rp, bp, n, M->p[1][0], M->n);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  ah = mpn_add_n (rp, rp, tp, n + M->n);
Packit 5c3484
Packit 5c3484
  if (M->n >= n)
Packit 5c3484
    {
Packit 5c3484
      mpn_mul (tp, M->p[1][1], M->n, bp, n);
Packit 5c3484
      mpn_mul (bp, M->p[0][1], M->n, ap, n);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mpn_mul (tp, bp, n, M->p[1][1], M->n);
Packit 5c3484
      mpn_mul (bp, ap, n, M->p[0][1], M->n);
Packit 5c3484
    }
Packit 5c3484
  bh = mpn_add_n (bp, bp, tp, n + M->n);
Packit 5c3484
Packit 5c3484
  n += M->n;
Packit 5c3484
  if ( (ah | bh) > 0)
Packit 5c3484
    {
Packit 5c3484
      rp[n] = ah;
Packit 5c3484
      bp[n] = bh;
Packit 5c3484
      n++;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      /* Normalize */
Packit 5c3484
      while ( (rp[n-1] | bp[n-1]) == 0)
Packit 5c3484
	n--;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  return n;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#define COMPUTE_V_ITCH(n) (2*(n))
Packit 5c3484
Packit 5c3484
/* Computes |v| = |(g - u a)| / b, where u may be positive or
Packit 5c3484
   negative, and v is of the opposite sign. max(a, b) is of size n, u and
Packit 5c3484
   v at most size n, and v must have space for n+1 limbs. */
Packit 5c3484
static mp_size_t
Packit 5c3484
compute_v (mp_ptr vp,
Packit 5c3484
	   mp_srcptr ap, mp_srcptr bp, mp_size_t n,
Packit 5c3484
	   mp_srcptr gp, mp_size_t gn,
Packit 5c3484
	   mp_srcptr up, mp_size_t usize,
Packit 5c3484
	   mp_ptr tp)
Packit 5c3484
{
Packit 5c3484
  mp_size_t size;
Packit 5c3484
  mp_size_t an;
Packit 5c3484
  mp_size_t bn;
Packit 5c3484
  mp_size_t vn;
Packit 5c3484
Packit 5c3484
  ASSERT (n > 0);
Packit 5c3484
  ASSERT (gn > 0);
Packit 5c3484
  ASSERT (usize != 0);
Packit 5c3484
Packit 5c3484
  size = ABS (usize);
Packit 5c3484
  ASSERT (size <= n);
Packit 5c3484
  ASSERT (up[size-1] > 0);
Packit 5c3484
Packit 5c3484
  an = n;
Packit 5c3484
  MPN_NORMALIZE (ap, an);
Packit 5c3484
  ASSERT (gn <= an);
Packit 5c3484
Packit 5c3484
  if (an >= size)
Packit 5c3484
    mpn_mul (tp, ap, an, up, size);
Packit 5c3484
  else
Packit 5c3484
    mpn_mul (tp, up, size, ap, an);
Packit 5c3484
Packit 5c3484
  size += an;
Packit 5c3484
Packit 5c3484
  if (usize > 0)
Packit 5c3484
    {
Packit 5c3484
      /* |v| = -v = (u a - g) / b */
Packit 5c3484
Packit 5c3484
      ASSERT_NOCARRY (mpn_sub (tp, tp, size, gp, gn));
Packit 5c3484
      MPN_NORMALIZE (tp, size);
Packit 5c3484
      if (size == 0)
Packit 5c3484
	return 0;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    { /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a,
Packit 5c3484
	 (g + |u| a) always fits in (|usize| + an) limbs. */
Packit 5c3484
Packit 5c3484
      ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));
Packit 5c3484
      size -= (tp[size - 1] == 0);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* Now divide t / b. There must be no remainder */
Packit 5c3484
  bn = n;
Packit 5c3484
  MPN_NORMALIZE (bp, bn);
Packit 5c3484
  ASSERT (size >= bn);
Packit 5c3484
Packit 5c3484
  vn = size + 1 - bn;
Packit 5c3484
  ASSERT (vn <= n + 1);
Packit 5c3484
Packit 5c3484
  mpn_divexact (vp, tp, size, bp, bn);
Packit 5c3484
  vn -= (vp[vn-1] == 0);
Packit 5c3484
Packit 5c3484
  return vn;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* Temporary storage:
Packit 5c3484
Packit 5c3484
   Initial division: Quotient of at most an - n + 1 <= an limbs.
Packit 5c3484
Packit 5c3484
   Storage for u0 and u1: 2(n+1).
Packit 5c3484
Packit 5c3484
   Storage for hgcd matrix M, with input ceil(n/2): 5 * ceil(n/4)
Packit 5c3484
Packit 5c3484
   Storage for hgcd, input (n + 1)/2: 9 n/4 plus some.
Packit 5c3484
Packit 5c3484
   When hgcd succeeds: 1 + floor(3n/2) for adjusting a and b, and 2(n+1) for the cofactors.
Packit 5c3484
Packit 5c3484
   When hgcd fails: 2n + 1 for mpn_gcdext_subdiv_step, which is less.
Packit 5c3484
Packit 5c3484
   For the lehmer call after the loop, Let T denote
Packit 5c3484
   GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
Packit 5c3484
   u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
Packit 5c3484
   for u, T+1 for v and 2T scratch space. In all, 7T + 3 is
Packit 5c3484
   sufficient for both operations.
Packit 5c3484
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
/* Optimal choice of p seems difficult. In each iteration the division
Packit 5c3484
 * of work between hgcd and the updates of u0 and u1 depends on the
Packit 5c3484
 * current size of the u. It may be desirable to use a different
Packit 5c3484
 * choice of p in each iteration. Also the input size seems to matter;
Packit 5c3484
 * choosing p = n / 3 in the first iteration seems to improve
Packit 5c3484
 * performance slightly for input size just above the threshold, but
Packit 5c3484
 * degrade performance for larger inputs. */
Packit 5c3484
#define CHOOSE_P_1(n) ((n) / 2)
Packit 5c3484
#define CHOOSE_P_2(n) ((n) / 3)
Packit 5c3484
Packit 5c3484
mp_size_t
Packit 5c3484
mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
Packit 5c3484
	    mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
Packit 5c3484
{
Packit 5c3484
  mp_size_t talloc;
Packit 5c3484
  mp_size_t scratch;
Packit 5c3484
  mp_size_t matrix_scratch;
Packit 5c3484
  mp_size_t ualloc = n + 1;
Packit 5c3484
Packit 5c3484
  struct gcdext_ctx ctx;
Packit 5c3484
  mp_size_t un;
Packit 5c3484
  mp_ptr u0;
Packit 5c3484
  mp_ptr u1;
Packit 5c3484
Packit 5c3484
  mp_ptr tp;
Packit 5c3484
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  ASSERT (an >= n);
Packit 5c3484
  ASSERT (n > 0);
Packit 5c3484
  ASSERT (bp[n-1] > 0);
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  /* FIXME: Check for small sizes first, before setting up temporary
Packit 5c3484
     storage etc. */
Packit 5c3484
  talloc = MPN_GCDEXT_LEHMER_N_ITCH(n);
Packit 5c3484
Packit 5c3484
  /* For initial division */
Packit 5c3484
  scratch = an - n + 1;
Packit 5c3484
  if (scratch > talloc)
Packit 5c3484
    talloc = scratch;
Packit 5c3484
Packit 5c3484
  if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      /* For hgcd loop. */
Packit 5c3484
      mp_size_t hgcd_scratch;
Packit 5c3484
      mp_size_t update_scratch;
Packit 5c3484
      mp_size_t p1 = CHOOSE_P_1 (n);
Packit 5c3484
      mp_size_t p2 = CHOOSE_P_2 (n);
Packit 5c3484
      mp_size_t min_p = MIN(p1, p2);
Packit 5c3484
      mp_size_t max_p = MAX(p1, p2);
Packit 5c3484
      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p);
Packit 5c3484
      hgcd_scratch = mpn_hgcd_itch (n - min_p);
Packit 5c3484
      update_scratch = max_p + n - 1;
Packit 5c3484
Packit 5c3484
      scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
Packit 5c3484
      if (scratch > talloc)
Packit 5c3484
	talloc = scratch;
Packit 5c3484
Packit 5c3484
      /* Final mpn_gcdext_lehmer_n call. Need space for u and for
Packit 5c3484
	 copies of a and b. */
Packit 5c3484
      scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD)
Packit 5c3484
	+ 3*GCDEXT_DC_THRESHOLD;
Packit 5c3484
Packit 5c3484
      if (scratch > talloc)
Packit 5c3484
	talloc = scratch;
Packit 5c3484
Packit 5c3484
      /* Cofactors u0 and u1 */
Packit 5c3484
      talloc += 2*(n+1);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  tp = TMP_ALLOC_LIMBS(talloc);
Packit 5c3484
Packit 5c3484
  if (an > n)
Packit 5c3484
    {
Packit 5c3484
      mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n);
Packit 5c3484
Packit 5c3484
      if (mpn_zero_p (ap, n))
Packit 5c3484
	{
Packit 5c3484
	  MPN_COPY (gp, bp, n);
Packit 5c3484
	  *usizep = 0;
Packit 5c3484
	  TMP_FREE;
Packit 5c3484
	  return n;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp);
Packit 5c3484
Packit 5c3484
      TMP_FREE;
Packit 5c3484
      return gn;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  MPN_ZERO (tp, 2*ualloc);
Packit 5c3484
  u0 = tp; tp += ualloc;
Packit 5c3484
  u1 = tp; tp += ualloc;
Packit 5c3484
Packit 5c3484
  ctx.gp = gp;
Packit 5c3484
  ctx.up = up;
Packit 5c3484
  ctx.usize = usizep;
Packit 5c3484
Packit 5c3484
  {
Packit 5c3484
    /* For the first hgcd call, there are no u updates, and it makes
Packit 5c3484
       some sense to use a different choice for p. */
Packit 5c3484
Packit 5c3484
    /* FIXME: We could trim use of temporary storage, since u0 and u1
Packit 5c3484
       are not used yet. For the hgcd call, we could swap in the u0
Packit 5c3484
       and u1 pointers for the relevant matrix elements. */
Packit 5c3484
Packit 5c3484
    struct hgcd_matrix M;
Packit 5c3484
    mp_size_t p = CHOOSE_P_1 (n);
Packit 5c3484
    mp_size_t nn;
Packit 5c3484
Packit 5c3484
    mpn_hgcd_matrix_init (&M, n - p, tp);
Packit 5c3484
    nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
Packit 5c3484
    if (nn > 0)
Packit 5c3484
      {
Packit 5c3484
	ASSERT (M.n <= (n - p - 1)/2);
Packit 5c3484
	ASSERT (M.n + p <= (p + n - 1) / 2);
Packit 5c3484
Packit 5c3484
	/* Temporary storage 2 (p + M->n) <= p + n - 1 */
Packit 5c3484
	n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
Packit 5c3484
Packit 5c3484
	MPN_COPY (u0, M.p[1][0], M.n);
Packit 5c3484
	MPN_COPY (u1, M.p[1][1], M.n);
Packit 5c3484
	un = M.n;
Packit 5c3484
	while ( (u0[un-1] | u1[un-1] ) == 0)
Packit 5c3484
	  un--;
Packit 5c3484
      }
Packit 5c3484
    else
Packit 5c3484
      {
Packit 5c3484
	/* mpn_hgcd has failed. Then either one of a or b is very
Packit 5c3484
	   small, or the difference is very small. Perform one
Packit 5c3484
	   subtraction followed by one division. */
Packit 5c3484
	u1[0] = 1;
Packit 5c3484
Packit 5c3484
	ctx.u0 = u0;
Packit 5c3484
	ctx.u1 = u1;
Packit 5c3484
	ctx.tp = tp + n; /* ualloc */
Packit 5c3484
	ctx.un = 1;
Packit 5c3484
Packit 5c3484
	/* Temporary storage n */
Packit 5c3484
	n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
Packit 5c3484
	if (n == 0)
Packit 5c3484
	  {
Packit 5c3484
	    TMP_FREE;
Packit 5c3484
	    return ctx.gn;
Packit 5c3484
	  }
Packit 5c3484
Packit 5c3484
	un = ctx.un;
Packit 5c3484
	ASSERT (un < ualloc);
Packit 5c3484
      }
Packit 5c3484
  }
Packit 5c3484
Packit 5c3484
  while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      struct hgcd_matrix M;
Packit 5c3484
      mp_size_t p = CHOOSE_P_2 (n);
Packit 5c3484
      mp_size_t nn;
Packit 5c3484
Packit 5c3484
      mpn_hgcd_matrix_init (&M, n - p, tp);
Packit 5c3484
      nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
Packit 5c3484
      if (nn > 0)
Packit 5c3484
	{
Packit 5c3484
	  mp_ptr t0;
Packit 5c3484
Packit 5c3484
	  t0 = tp + matrix_scratch;
Packit 5c3484
	  ASSERT (M.n <= (n - p - 1)/2);
Packit 5c3484
	  ASSERT (M.n + p <= (p + n - 1) / 2);
Packit 5c3484
Packit 5c3484
	  /* Temporary storage 2 (p + M->n) <= p + n - 1 */
Packit 5c3484
	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0);
Packit 5c3484
Packit 5c3484
	  /* By the same analysis as for mpn_hgcd_matrix_mul */
Packit 5c3484
	  ASSERT (M.n + un <= ualloc);
Packit 5c3484
Packit 5c3484
	  /* FIXME: This copying could be avoided by some swapping of
Packit 5c3484
	   * pointers. May need more temporary storage, though. */
Packit 5c3484
	  MPN_COPY (t0, u0, un);
Packit 5c3484
Packit 5c3484
	  /* Temporary storage ualloc */
Packit 5c3484
	  un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un);
Packit 5c3484
Packit 5c3484
	  ASSERT (un < ualloc);
Packit 5c3484
	  ASSERT ( (u0[un-1] | u1[un-1]) > 0);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  /* mpn_hgcd has failed. Then either one of a or b is very
Packit 5c3484
	     small, or the difference is very small. Perform one
Packit 5c3484
	     subtraction followed by one division. */
Packit 5c3484
	  ctx.u0 = u0;
Packit 5c3484
	  ctx.u1 = u1;
Packit 5c3484
	  ctx.tp = tp + n; /* ualloc */
Packit 5c3484
	  ctx.un = un;
Packit 5c3484
Packit 5c3484
	  /* Temporary storage n */
Packit 5c3484
	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
Packit 5c3484
	  if (n == 0)
Packit 5c3484
	    {
Packit 5c3484
	      TMP_FREE;
Packit 5c3484
	      return ctx.gn;
Packit 5c3484
	    }
Packit 5c3484
Packit 5c3484
	  un = ctx.un;
Packit 5c3484
	  ASSERT (un < ualloc);
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  /* We have A = ... a + ... b
Packit 5c3484
	     B =  u0 a +  u1 b
Packit 5c3484
Packit 5c3484
	     a = u1  A + ... B
Packit 5c3484
	     b = -u0 A + ... B
Packit 5c3484
Packit 5c3484
     with bounds
Packit 5c3484
Packit 5c3484
       |u0|, |u1| <= B / min(a, b)
Packit 5c3484
Packit 5c3484
     We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
Packit 5c3484
     in which case the only reduction done so far is a = A - k B for
Packit 5c3484
     some k.
Packit 5c3484
Packit 5c3484
     Compute g = u a + v b = (u u1 - v u0) A + (...) B
Packit 5c3484
     Here, u, v are bounded by
Packit 5c3484
Packit 5c3484
       |u| <= b,
Packit 5c3484
       |v| <= a
Packit 5c3484
  */
Packit 5c3484
Packit 5c3484
  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
Packit 5c3484
Packit 5c3484
  if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
Packit 5c3484
    {
Packit 5c3484
      /* Must return the smallest cofactor, +u1 or -u0 */
Packit 5c3484
      int c;
Packit 5c3484
Packit 5c3484
      MPN_COPY (gp, ap, n);
Packit 5c3484
Packit 5c3484
      MPN_CMP (c, u0, u1, un);
Packit 5c3484
      /* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in
Packit 5c3484
	 this case we choose the cofactor + 1, corresponding to G = A
Packit 5c3484
	 - k B, rather than -1, corresponding to G = - A + (k+1) B. */
Packit 5c3484
      ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
Packit 5c3484
      if (c < 0)
Packit 5c3484
	{
Packit 5c3484
	  MPN_NORMALIZE (u0, un);
Packit 5c3484
	  MPN_COPY (up, u0, un);
Packit 5c3484
	  *usizep = -un;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  MPN_NORMALIZE_NOT_ZERO (u1, un);
Packit 5c3484
	  MPN_COPY (up, u1, un);
Packit 5c3484
	  *usizep = un;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      TMP_FREE;
Packit 5c3484
      return n;
Packit 5c3484
    }
Packit 5c3484
  else if (UNLIKELY (u0[0] == 0) && un == 1)
Packit 5c3484
    {
Packit 5c3484
      mp_size_t gn;
Packit 5c3484
      ASSERT (u1[0] == 1);
Packit 5c3484
Packit 5c3484
      /* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
Packit 5c3484
      gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp);
Packit 5c3484
Packit 5c3484
      TMP_FREE;
Packit 5c3484
      return gn;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      mp_size_t u0n;
Packit 5c3484
      mp_size_t u1n;
Packit 5c3484
      mp_size_t lehmer_un;
Packit 5c3484
      mp_size_t lehmer_vn;
Packit 5c3484
      mp_size_t gn;
Packit 5c3484
Packit 5c3484
      mp_ptr lehmer_up;
Packit 5c3484
      mp_ptr lehmer_vp;
Packit 5c3484
      int negate;
Packit 5c3484
Packit 5c3484
      lehmer_up = tp; tp += n;
Packit 5c3484
Packit 5c3484
      /* Call mpn_gcdext_lehmer_n with copies of a and b. */
Packit 5c3484
      MPN_COPY (tp, ap, n);
Packit 5c3484
      MPN_COPY (tp + n, bp, n);
Packit 5c3484
      gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n);
Packit 5c3484
Packit 5c3484
      u0n = un;
Packit 5c3484
      MPN_NORMALIZE (u0, u0n);
Packit 5c3484
      ASSERT (u0n > 0);
Packit 5c3484
Packit 5c3484
      if (lehmer_un == 0)
Packit 5c3484
	{
Packit 5c3484
	  /* u == 0  ==>  v = g / b == 1  ==> g = - u0 A + (...) B */
Packit 5c3484
	  MPN_COPY (up, u0, u0n);
Packit 5c3484
	  *usizep = -u0n;
Packit 5c3484
Packit 5c3484
	  TMP_FREE;
Packit 5c3484
	  return gn;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      lehmer_vp = tp;
Packit 5c3484
      /* Compute v = (g - u a) / b */
Packit 5c3484
      lehmer_vn = compute_v (lehmer_vp,
Packit 5c3484
			     ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1);
Packit 5c3484
Packit 5c3484
      if (lehmer_un > 0)
Packit 5c3484
	negate = 0;
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  lehmer_un = -lehmer_un;
Packit 5c3484
	  negate = 1;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      u1n = un;
Packit 5c3484
      MPN_NORMALIZE (u1, u1n);
Packit 5c3484
      ASSERT (u1n > 0);
Packit 5c3484
Packit 5c3484
      ASSERT (lehmer_un + u1n <= ualloc);
Packit 5c3484
      ASSERT (lehmer_vn + u0n <= ualloc);
Packit 5c3484
Packit 5c3484
      /* We may still have v == 0 */
Packit 5c3484
Packit 5c3484
      /* Compute u u0 */
Packit 5c3484
      if (lehmer_un <= u1n)
Packit 5c3484
	/* Should be the common case */
Packit 5c3484
	mpn_mul (up, u1, u1n, lehmer_up, lehmer_un);
Packit 5c3484
      else
Packit 5c3484
	mpn_mul (up, lehmer_up, lehmer_un, u1, u1n);
Packit 5c3484
Packit 5c3484
      un = u1n + lehmer_un;
Packit 5c3484
      un -= (up[un - 1] == 0);
Packit 5c3484
Packit 5c3484
      if (lehmer_vn > 0)
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t cy;
Packit 5c3484
Packit 5c3484
	  /* Overwrites old u1 value */
Packit 5c3484
	  if (lehmer_vn <= u0n)
Packit 5c3484
	    /* Should be the common case */
Packit 5c3484
	    mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn);
Packit 5c3484
	  else
Packit 5c3484
	    mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n);
Packit 5c3484
Packit 5c3484
	  u1n = u0n + lehmer_vn;
Packit 5c3484
	  u1n -= (u1[u1n - 1] == 0);
Packit 5c3484
Packit 5c3484
	  if (u1n <= un)
Packit 5c3484
	    {
Packit 5c3484
	      cy = mpn_add (up, up, un, u1, u1n);
Packit 5c3484
	    }
Packit 5c3484
	  else
Packit 5c3484
	    {
Packit 5c3484
	      cy = mpn_add (up, u1, u1n, up, un);
Packit 5c3484
	      un = u1n;
Packit 5c3484
	    }
Packit 5c3484
	  up[un] = cy;
Packit 5c3484
	  un += (cy != 0);
Packit 5c3484
Packit 5c3484
	  ASSERT (un < ualloc);
Packit 5c3484
	}
Packit 5c3484
      *usizep = negate ? -un : un;
Packit 5c3484
Packit 5c3484
      TMP_FREE;
Packit 5c3484
      return gn;
Packit 5c3484
    }
Packit 5c3484
}