Blame mpn/generic/hgcd2.c

Packit 5c3484
/* hgcd2.c
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 1996, 1998, 2000-2004, 2008, 2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
#if GMP_NAIL_BITS == 0
Packit 5c3484
Packit 5c3484
/* Copied from the old mpn/generic/gcdext.c, and modified slightly to return
Packit 5c3484
   the remainder. */
Packit 5c3484
Packit 5c3484
/* Single-limb division optimized for small quotients. */
Packit 5c3484
static inline mp_limb_t
Packit 5c3484
div1 (mp_ptr rp,
Packit 5c3484
      mp_limb_t n0,
Packit 5c3484
      mp_limb_t d0)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t q = 0;
Packit 5c3484
Packit 5c3484
  if ((mp_limb_signed_t) n0 < 0)
Packit 5c3484
    {
Packit 5c3484
      int cnt;
Packit 5c3484
      for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
Packit 5c3484
	{
Packit 5c3484
	  d0 = d0 << 1;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      q = 0;
Packit 5c3484
      while (cnt)
Packit 5c3484
	{
Packit 5c3484
	  q <<= 1;
Packit 5c3484
	  if (n0 >= d0)
Packit 5c3484
	    {
Packit 5c3484
	      n0 = n0 - d0;
Packit 5c3484
	      q |= 1;
Packit 5c3484
	    }
Packit 5c3484
	  d0 = d0 >> 1;
Packit 5c3484
	  cnt--;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      int cnt;
Packit 5c3484
      for (cnt = 0; n0 >= d0; cnt++)
Packit 5c3484
	{
Packit 5c3484
	  d0 = d0 << 1;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      q = 0;
Packit 5c3484
      while (cnt)
Packit 5c3484
	{
Packit 5c3484
	  d0 = d0 >> 1;
Packit 5c3484
	  q <<= 1;
Packit 5c3484
	  if (n0 >= d0)
Packit 5c3484
	    {
Packit 5c3484
	      n0 = n0 - d0;
Packit 5c3484
	      q |= 1;
Packit 5c3484
	    }
Packit 5c3484
	  cnt--;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  *rp = n0;
Packit 5c3484
  return q;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* Two-limb division optimized for small quotients.  */
Packit 5c3484
static inline mp_limb_t
Packit 5c3484
div2 (mp_ptr rp,
Packit 5c3484
      mp_limb_t nh, mp_limb_t nl,
Packit 5c3484
      mp_limb_t dh, mp_limb_t dl)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t q = 0;
Packit 5c3484
Packit 5c3484
  if ((mp_limb_signed_t) nh < 0)
Packit 5c3484
    {
Packit 5c3484
      int cnt;
Packit 5c3484
      for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
Packit 5c3484
	{
Packit 5c3484
	  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
Packit 5c3484
	  dl = dl << 1;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      while (cnt)
Packit 5c3484
	{
Packit 5c3484
	  q <<= 1;
Packit 5c3484
	  if (nh > dh || (nh == dh && nl >= dl))
Packit 5c3484
	    {
Packit 5c3484
	      sub_ddmmss (nh, nl, nh, nl, dh, dl);
Packit 5c3484
	      q |= 1;
Packit 5c3484
	    }
Packit 5c3484
	  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
Packit 5c3484
	  dh = dh >> 1;
Packit 5c3484
	  cnt--;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      int cnt;
Packit 5c3484
      for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
Packit 5c3484
	{
Packit 5c3484
	  dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
Packit 5c3484
	  dl = dl << 1;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      while (cnt)
Packit 5c3484
	{
Packit 5c3484
	  dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
Packit 5c3484
	  dh = dh >> 1;
Packit 5c3484
	  q <<= 1;
Packit 5c3484
	  if (nh > dh || (nh == dh && nl >= dl))
Packit 5c3484
	    {
Packit 5c3484
	      sub_ddmmss (nh, nl, nh, nl, dh, dl);
Packit 5c3484
	      q |= 1;
Packit 5c3484
	    }
Packit 5c3484
	  cnt--;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  rp[0] = nl;
Packit 5c3484
  rp[1] = nh;
Packit 5c3484
Packit 5c3484
  return q;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#if 0
Packit 5c3484
/* This div2 uses less branches, but it seems to nevertheless be
Packit 5c3484
   slightly slower than the above code. */
Packit 5c3484
static inline mp_limb_t
Packit 5c3484
div2 (mp_ptr rp,
Packit 5c3484
      mp_limb_t nh, mp_limb_t nl,
Packit 5c3484
      mp_limb_t dh, mp_limb_t dl)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t q = 0;
Packit 5c3484
  int ncnt;
Packit 5c3484
  int dcnt;
Packit 5c3484
Packit 5c3484
  count_leading_zeros (ncnt, nh);
Packit 5c3484
  count_leading_zeros (dcnt, dh);
Packit 5c3484
  dcnt -= ncnt;
Packit 5c3484
Packit 5c3484
  dh = (dh << dcnt) + (-(dcnt > 0) & (dl >> (GMP_LIMB_BITS - dcnt)));
Packit 5c3484
  dl <<= dcnt;
Packit 5c3484
Packit 5c3484
  do
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t bit;
Packit 5c3484
      q <<= 1;
Packit 5c3484
      if (UNLIKELY (nh == dh))
Packit 5c3484
	bit = (nl >= dl);
Packit 5c3484
      else
Packit 5c3484
	bit = (nh > dh);
Packit 5c3484
Packit 5c3484
      q |= bit;
Packit 5c3484
Packit 5c3484
      sub_ddmmss (nh, nl, nh, nl, (-bit) & dh, (-bit) & dl);
Packit 5c3484
Packit 5c3484
      dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
Packit 5c3484
      dh = dh >> 1;
Packit 5c3484
    }
Packit 5c3484
  while (dcnt--);
Packit 5c3484
Packit 5c3484
  rp[0] = nl;
Packit 5c3484
  rp[1] = nh;
Packit 5c3484
Packit 5c3484
  return q;
Packit 5c3484
}
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#else /* GMP_NAIL_BITS != 0 */
Packit 5c3484
/* Check all functions for nail support. */
Packit 5c3484
/* hgcd2 should be defined to take inputs including nail bits, and
Packit 5c3484
   produce a matrix with elements also including nail bits. This is
Packit 5c3484
   necessary, for the matrix elements to be useful with mpn_mul_1,
Packit 5c3484
   mpn_addmul_1 and friends. */
Packit 5c3484
#error Not implemented
Packit 5c3484
#endif /* GMP_NAIL_BITS != 0 */
Packit 5c3484
Packit 5c3484
/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
Packit 5c3484
   matrix M. Returns 1 if we make progress, i.e. can perform at least
Packit 5c3484
   one subtraction. Otherwise returns zero. */
Packit 5c3484
Packit 5c3484
/* FIXME: Possible optimizations:
Packit 5c3484
Packit 5c3484
   The div2 function starts with checking the most significant bit of
Packit 5c3484
   the numerator. We can maintained normalized operands here, call
Packit 5c3484
   hgcd with normalized operands only, which should make the code
Packit 5c3484
   simpler and possibly faster.
Packit 5c3484
Packit 5c3484
   Experiment with table lookups on the most significant bits.
Packit 5c3484
Packit 5c3484
   This function is also a candidate for assembler implementation.
Packit 5c3484
*/
Packit 5c3484
int
Packit 5c3484
mpn_hgcd2 (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
Packit 5c3484
	   struct hgcd_matrix1 *M)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t u00, u01, u10, u11;
Packit 5c3484
Packit 5c3484
  if (ah < 2 || bh < 2)
Packit 5c3484
    return 0;
Packit 5c3484
Packit 5c3484
  if (ah > bh || (ah == bh && al > bl))
Packit 5c3484
    {
Packit 5c3484
      sub_ddmmss (ah, al, ah, al, bh, bl);
Packit 5c3484
      if (ah < 2)
Packit 5c3484
	return 0;
Packit 5c3484
Packit 5c3484
      u00 = u01 = u11 = 1;
Packit 5c3484
      u10 = 0;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      sub_ddmmss (bh, bl, bh, bl, ah, al);
Packit 5c3484
      if (bh < 2)
Packit 5c3484
	return 0;
Packit 5c3484
Packit 5c3484
      u00 = u10 = u11 = 1;
Packit 5c3484
      u01 = 0;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (ah < bh)
Packit 5c3484
    goto subtract_a;
Packit 5c3484
Packit 5c3484
  for (;;)
Packit 5c3484
    {
Packit 5c3484
      ASSERT (ah >= bh);
Packit 5c3484
      if (ah == bh)
Packit 5c3484
	goto done;
Packit 5c3484
Packit 5c3484
      if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
Packit 5c3484
	{
Packit 5c3484
	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
Packit 5c3484
	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
Packit 5c3484
Packit 5c3484
	  break;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
Packit 5c3484
	 1), affecting the second column of M. */
Packit 5c3484
      ASSERT (ah > bh);
Packit 5c3484
      sub_ddmmss (ah, al, ah, al, bh, bl);
Packit 5c3484
Packit 5c3484
      if (ah < 2)
Packit 5c3484
	goto done;
Packit 5c3484
Packit 5c3484
      if (ah <= bh)
Packit 5c3484
	{
Packit 5c3484
	  /* Use q = 1 */
Packit 5c3484
	  u01 += u00;
Packit 5c3484
	  u11 += u10;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t r[2];
Packit 5c3484
	  mp_limb_t q = div2 (r, ah, al, bh, bl);
Packit 5c3484
	  al = r[0]; ah = r[1];
Packit 5c3484
	  if (ah < 2)
Packit 5c3484
	    {
Packit 5c3484
	      /* A is too small, but q is correct. */
Packit 5c3484
	      u01 += q * u00;
Packit 5c3484
	      u11 += q * u10;
Packit 5c3484
	      goto done;
Packit 5c3484
	    }
Packit 5c3484
	  q++;
Packit 5c3484
	  u01 += q * u00;
Packit 5c3484
	  u11 += q * u10;
Packit 5c3484
	}
Packit 5c3484
    subtract_a:
Packit 5c3484
      ASSERT (bh >= ah);
Packit 5c3484
      if (ah == bh)
Packit 5c3484
	goto done;
Packit 5c3484
Packit 5c3484
      if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
Packit 5c3484
	{
Packit 5c3484
	  ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
Packit 5c3484
	  bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
Packit 5c3484
Packit 5c3484
	  goto subtract_a1;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
Packit 5c3484
	 1), affecting the first column of M. */
Packit 5c3484
      sub_ddmmss (bh, bl, bh, bl, ah, al);
Packit 5c3484
Packit 5c3484
      if (bh < 2)
Packit 5c3484
	goto done;
Packit 5c3484
Packit 5c3484
      if (bh <= ah)
Packit 5c3484
	{
Packit 5c3484
	  /* Use q = 1 */
Packit 5c3484
	  u00 += u01;
Packit 5c3484
	  u10 += u11;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t r[2];
Packit 5c3484
	  mp_limb_t q = div2 (r, bh, bl, ah, al);
Packit 5c3484
	  bl = r[0]; bh = r[1];
Packit 5c3484
	  if (bh < 2)
Packit 5c3484
	    {
Packit 5c3484
	      /* B is too small, but q is correct. */
Packit 5c3484
	      u00 += q * u01;
Packit 5c3484
	      u10 += q * u11;
Packit 5c3484
	      goto done;
Packit 5c3484
	    }
Packit 5c3484
	  q++;
Packit 5c3484
	  u00 += q * u01;
Packit 5c3484
	  u10 += q * u11;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* NOTE: Since we discard the least significant half limb, we don't
Packit 5c3484
     get a truly maximal M (corresponding to |a - b| <
Packit 5c3484
     2^{GMP_LIMB_BITS +1}). */
Packit 5c3484
  /* Single precision loop */
Packit 5c3484
  for (;;)
Packit 5c3484
    {
Packit 5c3484
      ASSERT (ah >= bh);
Packit 5c3484
Packit 5c3484
      ah -= bh;
Packit 5c3484
      if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
Packit 5c3484
	break;
Packit 5c3484
Packit 5c3484
      if (ah <= bh)
Packit 5c3484
	{
Packit 5c3484
	  /* Use q = 1 */
Packit 5c3484
	  u01 += u00;
Packit 5c3484
	  u11 += u10;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t r;
Packit 5c3484
	  mp_limb_t q = div1 (&r, ah, bh);
Packit 5c3484
	  ah = r;
Packit 5c3484
	  if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
Packit 5c3484
	    {
Packit 5c3484
	      /* A is too small, but q is correct. */
Packit 5c3484
	      u01 += q * u00;
Packit 5c3484
	      u11 += q * u10;
Packit 5c3484
	      break;
Packit 5c3484
	    }
Packit 5c3484
	  q++;
Packit 5c3484
	  u01 += q * u00;
Packit 5c3484
	  u11 += q * u10;
Packit 5c3484
	}
Packit 5c3484
    subtract_a1:
Packit 5c3484
      ASSERT (bh >= ah);
Packit 5c3484
Packit 5c3484
      bh -= ah;
Packit 5c3484
      if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
Packit 5c3484
	break;
Packit 5c3484
Packit 5c3484
      if (bh <= ah)
Packit 5c3484
	{
Packit 5c3484
	  /* Use q = 1 */
Packit 5c3484
	  u00 += u01;
Packit 5c3484
	  u10 += u11;
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  mp_limb_t r;
Packit 5c3484
	  mp_limb_t q = div1 (&r, bh, ah);
Packit 5c3484
	  bh = r;
Packit 5c3484
	  if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
Packit 5c3484
	    {
Packit 5c3484
	      /* B is too small, but q is correct. */
Packit 5c3484
	      u00 += q * u01;
Packit 5c3484
	      u10 += q * u11;
Packit 5c3484
	      break;
Packit 5c3484
	    }
Packit 5c3484
	  q++;
Packit 5c3484
	  u00 += q * u01;
Packit 5c3484
	  u10 += q * u11;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
 done:
Packit 5c3484
  M->u[0][0] = u00; M->u[0][1] = u01;
Packit 5c3484
  M->u[1][0] = u10; M->u[1][1] = u11;
Packit 5c3484
Packit 5c3484
  return 1;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* Sets (r;b) = (a;b) M, with M = (u00, u01; u10, u11). Vector must
Packit 5c3484
 * have space for n + 1 limbs. Uses three buffers to avoid a copy*/
Packit 5c3484
mp_size_t
Packit 5c3484
mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *M,
Packit 5c3484
			     mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t ah, bh;
Packit 5c3484
Packit 5c3484
  /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
Packit 5c3484
Packit 5c3484
     r  = u00 * a
Packit 5c3484
     r += u10 * b
Packit 5c3484
     b *= u11
Packit 5c3484
     b += u01 * a
Packit 5c3484
  */
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_addaddmul_1msb0
Packit 5c3484
  ah = mpn_addaddmul_1msb0 (rp, ap, bp, n, M->u[0][0], M->u[1][0]);
Packit 5c3484
  bh = mpn_addaddmul_1msb0 (bp, bp, ap, n, M->u[1][1], M->u[0][1]);
Packit 5c3484
#else
Packit 5c3484
  ah =     mpn_mul_1 (rp, ap, n, M->u[0][0]);
Packit 5c3484
  ah += mpn_addmul_1 (rp, bp, n, M->u[1][0]);
Packit 5c3484
Packit 5c3484
  bh =     mpn_mul_1 (bp, bp, n, M->u[1][1]);
Packit 5c3484
  bh += mpn_addmul_1 (bp, ap, n, M->u[0][1]);
Packit 5c3484
#endif
Packit 5c3484
  rp[n] = ah;
Packit 5c3484
  bp[n] = bh;
Packit 5c3484
Packit 5c3484
  n += (ah | bh) > 0;
Packit 5c3484
  return n;
Packit 5c3484
}