Blame mpn/generic/jacobi.c

Packit 5c3484
/* jacobi.c
Packit 5c3484
Packit 5c3484
   THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 1996, 1998, 2000-2004, 2008, 2010, 2011 Free Software Foundation,
Packit 5c3484
Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
#include "longlong.h"
Packit 5c3484
Packit 5c3484
#ifndef JACOBI_DC_THRESHOLD
Packit 5c3484
#define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Schönhage's rules:
Packit 5c3484
 *
Packit 5c3484
 * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
Packit 5c3484
 *
Packit 5c3484
 * If r1 is odd, then
Packit 5c3484
 *
Packit 5c3484
 *   (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
Packit 5c3484
 *
Packit 5c3484
 * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
Packit 5c3484
 *
Packit 5c3484
 * If r1 is even, r2 must be odd. We have
Packit 5c3484
 *
Packit 5c3484
 *   (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
Packit 5c3484
 *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
Packit 5c3484
 *             = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
Packit 5c3484
 *
Packit 5c3484
 * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
Packit 5c3484
 * q1 times gives
Packit 5c3484
 *
Packit 5c3484
 *   (r1 | r0) = (r1 | r2) = (r3 | r2)
Packit 5c3484
 *
Packit 5c3484
 * On the other hand, if r1 = 2 (mod 4), the sign factor is
Packit 5c3484
 * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
Packit 5c3484
 *
Packit 5c3484
 *   (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
Packit 5c3484
 *   = q1 (r0-1)/2 + q1 (q1-1)/2
Packit 5c3484
 *
Packit 5c3484
 * and we can summarize the even case as
Packit 5c3484
 *
Packit 5c3484
 *   (r1 | r0) = t(r1, r0, q1) (r3 | r2)
Packit 5c3484
 *
Packit 5c3484
 * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
Packit 5c3484
 *
Packit 5c3484
 * What about termination? The remainder sequence ends with (0|1) = 1
Packit 5c3484
 * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
Packit 5c3484
 * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
Packit 5c3484
 * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
Packit 5c3484
 *
Packit 5c3484
 * Examples: (11|15) = - (15|11) = - (4|11)
Packit 5c3484
 *            (4|11) =    (4| 3) =   (1| 3)
Packit 5c3484
 *            (1| 3) = (3|1) = (0|1) = 1
Packit 5c3484
 *
Packit 5c3484
 *             (2|7) = (2|1) = (0|1) = 1
Packit 5c3484
 *
Packit 5c3484
 * Detail:     (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
Packit 5c3484
 *             (2|5) = (2-5|5) = (-1|5)(3|5) =  (5|3) =  (2|3)
Packit 5c3484
 *             (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
Packit 5c3484
 *
Packit 5c3484
 */
Packit 5c3484
Packit 5c3484
/* In principle, the state consists of four variables: e (one bit), a,
Packit 5c3484
   b (two bits each), d (one bit). Collected factors are (-1)^e. a and
Packit 5c3484
   b are the least significant bits of the current remainders. d
Packit 5c3484
   (denominator) is 0 if we're currently subtracting multiplies of a
Packit 5c3484
   from b, and 1 if we're subtracting b from a.
Packit 5c3484
Packit 5c3484
   e is stored in the least significant bit, while a, b and d are
Packit 5c3484
   coded as only 13 distinct values in bits 1-4, according to the
Packit 5c3484
   following table. For rows not mentioning d, the value is either
Packit 5c3484
   implied, or it doesn't matter. */
Packit 5c3484
Packit 5c3484
#if WANT_ASSERT
Packit 5c3484
static const struct
Packit 5c3484
{
Packit 5c3484
  unsigned char a;
Packit 5c3484
  unsigned char b;
Packit 5c3484
} decode_table[13] = {
Packit 5c3484
  /*  0 */ { 0, 1 },
Packit 5c3484
  /*  1 */ { 0, 3 },
Packit 5c3484
  /*  2 */ { 1, 1 },
Packit 5c3484
  /*  3 */ { 1, 3 },
Packit 5c3484
  /*  4 */ { 2, 1 },
Packit 5c3484
  /*  5 */ { 2, 3 },
Packit 5c3484
  /*  6 */ { 3, 1 },
Packit 5c3484
  /*  7 */ { 3, 3 }, /* d = 1 */
Packit 5c3484
  /*  8 */ { 1, 0 },
Packit 5c3484
  /*  9 */ { 1, 2 },
Packit 5c3484
  /* 10 */ { 3, 0 },
Packit 5c3484
  /* 11 */ { 3, 2 },
Packit 5c3484
  /* 12 */ { 3, 3 }, /* d = 0 */
Packit 5c3484
};
Packit 5c3484
#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
Packit 5c3484
#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
Packit 5c3484
#endif /* WANT_ASSERT */
Packit 5c3484
Packit 5c3484
const unsigned char jacobi_table[208] = {
Packit 5c3484
#include "jacobitab.h"
Packit 5c3484
};
Packit 5c3484
Packit 5c3484
#define BITS_FAIL 31
Packit 5c3484
Packit 5c3484
static void
Packit 5c3484
jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
Packit 5c3484
	     mp_srcptr qp, mp_size_t qn, int d)
Packit 5c3484
{
Packit 5c3484
  unsigned *bitsp = (unsigned *) p;
Packit 5c3484
Packit 5c3484
  if (gp)
Packit 5c3484
    {
Packit 5c3484
      ASSERT (gn > 0);
Packit 5c3484
      if (gn != 1 || gp[0] != 1)
Packit 5c3484
	{
Packit 5c3484
	  *bitsp = BITS_FAIL;
Packit 5c3484
	  return;
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (qp)
Packit 5c3484
    {
Packit 5c3484
      ASSERT (qn > 0);
Packit 5c3484
      ASSERT (d >= 0);
Packit 5c3484
      *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
Packit 5c3484
    }
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
#define CHOOSE_P(n) (2*(n) / 3)
Packit 5c3484
Packit 5c3484
int
Packit 5c3484
mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
Packit 5c3484
{
Packit 5c3484
  mp_size_t scratch;
Packit 5c3484
  mp_size_t matrix_scratch;
Packit 5c3484
  mp_ptr tp;
Packit 5c3484
Packit 5c3484
  TMP_DECL;
Packit 5c3484
Packit 5c3484
  ASSERT (n > 0);
Packit 5c3484
  ASSERT ( (ap[n-1] | bp[n-1]) > 0);
Packit 5c3484
  ASSERT ( (bp[0] | ap[0]) & 1);
Packit 5c3484
Packit 5c3484
  /* FIXME: Check for small sizes first, before setting up temporary
Packit 5c3484
     storage etc. */
Packit 5c3484
  scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
Packit 5c3484
Packit 5c3484
  if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      mp_size_t hgcd_scratch;
Packit 5c3484
      mp_size_t update_scratch;
Packit 5c3484
      mp_size_t p = CHOOSE_P (n);
Packit 5c3484
      mp_size_t dc_scratch;
Packit 5c3484
Packit 5c3484
      matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
Packit 5c3484
      hgcd_scratch = mpn_hgcd_itch (n - p);
Packit 5c3484
      update_scratch = p + n - 1;
Packit 5c3484
Packit 5c3484
      dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
Packit 5c3484
      if (dc_scratch > scratch)
Packit 5c3484
	scratch = dc_scratch;
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  TMP_MARK;
Packit 5c3484
  tp = TMP_ALLOC_LIMBS(scratch);
Packit 5c3484
Packit 5c3484
  while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
Packit 5c3484
    {
Packit 5c3484
      struct hgcd_matrix M;
Packit 5c3484
      mp_size_t p = 2*n/3;
Packit 5c3484
      mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
Packit 5c3484
      mp_size_t nn;
Packit 5c3484
      mpn_hgcd_matrix_init (&M, n - p, tp);
Packit 5c3484
Packit 5c3484
      nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
Packit 5c3484
			    tp + matrix_scratch);
Packit 5c3484
      if (nn > 0)
Packit 5c3484
	{
Packit 5c3484
	  ASSERT (M.n <= (n - p - 1)/2);
Packit 5c3484
	  ASSERT (M.n + p <= (p + n - 1) / 2);
Packit 5c3484
	  /* Temporary storage 2 (p + M->n) <= p + n - 1. */
Packit 5c3484
	  n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  /* Temporary storage n */
Packit 5c3484
	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
Packit 5c3484
	  if (!n)
Packit 5c3484
	    {
Packit 5c3484
	      TMP_FREE;
Packit 5c3484
	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  while (n > 2)
Packit 5c3484
    {
Packit 5c3484
      struct hgcd_matrix1 M;
Packit 5c3484
      mp_limb_t ah, al, bh, bl;
Packit 5c3484
      mp_limb_t mask;
Packit 5c3484
Packit 5c3484
      mask = ap[n-1] | bp[n-1];
Packit 5c3484
      ASSERT (mask > 0);
Packit 5c3484
Packit 5c3484
      if (mask & GMP_NUMB_HIGHBIT)
Packit 5c3484
	{
Packit 5c3484
	  ah = ap[n-1]; al = ap[n-2];
Packit 5c3484
	  bh = bp[n-1]; bl = bp[n-2];
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  int shift;
Packit 5c3484
Packit 5c3484
	  count_leading_zeros (shift, mask);
Packit 5c3484
	  ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
Packit 5c3484
	  al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
Packit 5c3484
	  bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
Packit 5c3484
	  bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
Packit 5c3484
	}
Packit 5c3484
Packit 5c3484
      /* Try an mpn_nhgcd2 step */
Packit 5c3484
      if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
Packit 5c3484
	{
Packit 5c3484
	  n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
Packit 5c3484
	  MP_PTR_SWAP (ap, tp);
Packit 5c3484
	}
Packit 5c3484
      else
Packit 5c3484
	{
Packit 5c3484
	  /* mpn_hgcd2 has failed. Then either one of a or b is very
Packit 5c3484
	     small, or the difference is very small. Perform one
Packit 5c3484
	     subtraction followed by one division. */
Packit 5c3484
	  n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
Packit 5c3484
	  if (!n)
Packit 5c3484
	    {
Packit 5c3484
	      TMP_FREE;
Packit 5c3484
	      return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
Packit 5c3484
	    }
Packit 5c3484
	}
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  if (bits >= 16)
Packit 5c3484
    MP_PTR_SWAP (ap, bp);
Packit 5c3484
Packit 5c3484
  ASSERT (bp[0] & 1);
Packit 5c3484
Packit 5c3484
  if (n == 1)
Packit 5c3484
    {
Packit 5c3484
      mp_limb_t al, bl;
Packit 5c3484
      al = ap[0];
Packit 5c3484
      bl = bp[0];
Packit 5c3484
Packit 5c3484
      TMP_FREE;
Packit 5c3484
      if (bl == 1)
Packit 5c3484
	return 1 - 2*(bits & 1);
Packit 5c3484
      else
Packit 5c3484
	return mpn_jacobi_base (al, bl, bits << 1);
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
      int res = mpn_jacobi_2 (ap, bp, bits & 1);
Packit 5c3484
      TMP_FREE;
Packit 5c3484
      return res;
Packit 5c3484
    }
Packit 5c3484
}