Blame mpn/generic/hgcd_reduce.c

Packit 5c3484
/* hgcd_reduce.c.
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2011, 2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
/* Computes R -= A * B. Result must be non-negative. Normalized down
Packit 5c3484
   to size an, and resulting size is returned. */
Packit 5c3484
static mp_size_t
Packit 5c3484
submul (mp_ptr rp, mp_size_t rn,
Packit 5c3484
	mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
Packit 5c3484
{
Packit 5c3484
  mp_ptr tp;
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  ASSERT (bn > 0);
Packit 5c3484
  ASSERT (an >= bn);
Packit 5c3484
  ASSERT (rn >= an);
Packit 5c3484
  ASSERT (an + bn <= rn + 1);
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
  tp = TMP_ALLOC_LIMBS (an + bn);
Packit 5c3484
Packit 5c3484
  mpn_mul (tp, ap, an, bp, bn);
Packit 5c3484
  ASSERT ((an + bn <= rn) || (tp[rn] == 0));
Packit 5c3484
  ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn - (an + bn > rn)));
Packit 5c3484
  TMP_FREE;
Packit 5c3484
Packit 5c3484
  while (rn > an && (rp[rn-1] == 0))
Packit 5c3484
    rn--;
Packit 5c3484
Packit 5c3484
  return rn;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* Computes (a, b)  <--  M^{-1} (a; b) */
Packit 5c3484
/* FIXME:
Packit 5c3484
    x Take scratch parameter, and figure out scratch need.
Packit 5c3484
Packit 5c3484
    x Use some fallback for small M->n?
Packit 5c3484
*/
Packit 5c3484
static mp_size_t
Packit 5c3484
hgcd_matrix_apply (const struct hgcd_matrix *M,
Packit 5c3484
		   mp_ptr ap, mp_ptr bp,
Packit 5c3484
		   mp_size_t n)
Packit 5c3484
{
Packit 5c3484
  mp_size_t an, bn, un, vn, nn;
Packit 5c3484
  mp_size_t mn[2][2];
Packit 5c3484
  mp_size_t modn;
Packit 5c3484
  mp_ptr tp, sp, scratch;
Packit 5c3484
  mp_limb_t cy;
Packit 5c3484
  unsigned i, j;
Packit 5c3484
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
Packit 5c3484
Packit 5c3484
  an = n;
Packit 5c3484
  MPN_NORMALIZE (ap, an);
Packit 5c3484
  bn = n;
Packit 5c3484
  MPN_NORMALIZE (bp, bn);
Packit 5c3484
Packit 5c3484
  for (i = 0; i < 2; i++)
Packit 5c3484
    for (j = 0; j < 2; j++)
Packit 5c3484
      {
Packit 5c3484
	mp_size_t k;
Packit 5c3484
	k = M->n;
Packit 5c3484
	MPN_NORMALIZE (M->p[i][j], k);
Packit 5c3484
	mn[i][j] = k;
Packit 5c3484
      }
Packit 5c3484
Packit 5c3484
  ASSERT (mn[0][0] > 0);
Packit 5c3484
  ASSERT (mn[1][1] > 0);
Packit 5c3484
  ASSERT ( (mn[0][1] | mn[1][0]) > 0);
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
Packit 5c3484
  if (mn[0][1] == 0)
Packit 5c3484
    {
Packit 5c3484
      /* A unchanged, M = (1, 0; q, 1) */
Packit 5c3484
      ASSERT (mn[0][0] == 1);
Packit 5c3484
      ASSERT (M->p[0][0][0] == 1);
Packit 5c3484
      ASSERT (mn[1][1] == 1);
Packit 5c3484
      ASSERT (M->p[1][1][0] == 1);
Packit 5c3484
Packit 5c3484
      /* Put B <-- B - q A */
Packit 5c3484
      nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
Packit 5c3484
    }
Packit 5c3484
  else if (mn[1][0] == 0)
Packit 5c3484
    {
Packit 5c3484
      /* B unchanged, M = (1, q; 0, 1) */
Packit 5c3484
      ASSERT (mn[0][0] == 1);
Packit 5c3484
      ASSERT (M->p[0][0][0] == 1);
Packit 5c3484
      ASSERT (mn[1][1] == 1);
Packit 5c3484
      ASSERT (M->p[1][1][0] == 1);
Packit 5c3484
Packit 5c3484
      /* Put A  <-- A - q * B */
Packit 5c3484
      nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      /* A = m00 a + m01 b  ==> a <= A / m00, b <= A / m01.
Packit 5c3484
	 B = m10 a + m11 b  ==> a <= B / m10, b <= B / m11. */
Packit 5c3484
      un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
Packit 5c3484
      vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
Packit 5c3484
Packit 5c3484
      nn = MAX (un, vn);
Packit 5c3484
      /* In the range of interest, mulmod_bnm1 should always beat mullo. */
Packit 5c3484
      modn = mpn_mulmod_bnm1_next_size (nn + 1);
Packit 5c3484
Packit 5c3484
      TMP_ALLOC_LIMBS_3 (tp, modn,
Packit 5c3484
			 sp, modn,
Packit 5c3484
			 scratch, mpn_mulmod_bnm1_itch (modn, modn, M->n));
Packit 5c3484
Packit 5c3484
      ASSERT (n <= 2*modn);
Packit 5c3484
Packit 5c3484
      if (n > modn)
Packit 5c3484
	{
Packit 5c3484
	  cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
Packit 5c3484
	  MPN_INCR_U (ap, modn, cy);
Packit 5c3484
Packit 5c3484
	  cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
Packit 5c3484
	  MPN_INCR_U (bp, modn, cy);
Packit 5c3484
Packit 5c3484
	  n = modn;
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
Packit 5c3484
      mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
Packit 5c3484
Packit 5c3484
      /* FIXME: Handle the small n case in some better way. */
Packit 5c3484
      if (n + mn[1][1] < modn)
Packit 5c3484
	MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
Packit 5c3484
      if (n + mn[0][1] < modn)
Packit 5c3484
	MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
Packit 5c3484
Packit 5c3484
      cy = mpn_sub_n (tp, tp, sp, modn);
Packit 5c3484
      MPN_DECR_U (tp, modn, cy);
Packit 5c3484
Packit 5c3484
      ASSERT (mpn_zero_p (tp + nn, modn - nn));
Packit 5c3484
Packit 5c3484
      mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
Packit 5c3484
      MPN_COPY (ap, tp, nn);
Packit 5c3484
      mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
Packit 5c3484
Packit 5c3484
      if (n + mn[1][0] < modn)
Packit 5c3484
	MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
Packit 5c3484
      if (n + mn[0][0] < modn)
Packit 5c3484
	MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
Packit 5c3484
Packit 5c3484
      cy = mpn_sub_n (tp, tp, sp, modn);
Packit 5c3484
      MPN_DECR_U (tp, modn, cy);
Packit 5c3484
Packit 5c3484
      ASSERT (mpn_zero_p (tp + nn, modn - nn));
Packit 5c3484
      MPN_COPY (bp, tp, nn);
Packit 5c3484
Packit 5c3484
      while ( (ap[nn-1] | bp[nn-1]) == 0)
Packit 5c3484
	{
Packit 5c3484
	  nn--;
Packit 5c3484
	  ASSERT (nn > 0);
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
  TMP_FREE;
Packit 5c3484
Packit 5c3484
  return nn;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
mp_size_t
Packit 5c3484
mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)
Packit 5c3484
{
Packit 5c3484
  mp_size_t itch;
Packit 5c3484
  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      itch = mpn_hgcd_itch (n-p);
Packit 5c3484
Packit 5c3484
      /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *
Packit 5c3484
	 (p + ceil((n-p)/2) - 1 <= n + p - 1 */
Packit 5c3484
      if (itch < n + p - 1)
Packit 5c3484
	itch = n + p - 1;
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      itch = 2*(n-p) + mpn_hgcd_itch (n-p);
Packit 5c3484
      /* Currently, hgcd_matrix_apply allocates its own storage. */
Packit 5c3484
    }
Packit 5c3484
  return itch;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
/* FIXME: Document storage need. */
Packit 5c3484
mp_size_t
Packit 5c3484
mpn_hgcd_reduce (struct hgcd_matrix *M,
Packit 5c3484
		 mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,
Packit 5c3484
		 mp_ptr tp)
Packit 5c3484
{
Packit 5c3484
  mp_size_t nn;
Packit 5c3484
  if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
Packit 5c3484
      if (nn > 0)
Packit 5c3484
	/* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
Packit 5c3484
	   = 2 (n - 1) */
Packit 5c3484
	return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      MPN_COPY (tp, ap + p, n - p);
Packit 5c3484
      MPN_COPY (tp + n - p, bp + p, n - p);
Packit 5c3484
      if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))
Packit 5c3484
	return hgcd_matrix_apply (M, ap, bp, n);
Packit 5c3484
    }
Packit 5c3484
  return 0;
Packit 5c3484
}