Blame mpn/generic/toom_interpolate_6pts.c

Packit 5c3484
/* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Marco Bodrato.
Packit 5c3484
Packit 5c3484
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
/* For odd divisors, mpn_divexact_1 works fine with two's complement. */
Packit 5c3484
#ifndef mpn_divexact_by3
Packit 5c3484
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && MODLIMB_INVERSE_3
Packit 5c3484
#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,MODLIMB_INVERSE_3,0)
Packit 5c3484
#else
Packit 5c3484
#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Interpolation for Toom-3.5, using the evaluation points: infinity,
Packit 5c3484
   1, -1, 2, -2. More precisely, we want to compute
Packit 5c3484
   f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the
Packit 5c3484
   six values
Packit 5c3484
Packit 5c3484
     w5 = f(0),
Packit 5c3484
     w4 = f(-1),
Packit 5c3484
     w3 = f(1)
Packit 5c3484
     w2 = f(-2),
Packit 5c3484
     w1 = f(2),
Packit 5c3484
     w0 = limit at infinity of f(x) / x^5,
Packit 5c3484
Packit 5c3484
   The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at
Packit 5c3484
   {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at
Packit 5c3484
   {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most
Packit 5c3484
   significant limbs small). f(-1) and f(-2) may be negative, signs
Packit 5c3484
   determined by the flag bits. All intermediate results are positive.
Packit 5c3484
   Inputs are destroyed.
Packit 5c3484
Packit 5c3484
   Interpolation sequence was taken from the paper: "Integer and
Packit 5c3484
   Polynomial Multiplication: Towards Optimal Toom-Cook Matrices".
Packit 5c3484
   Some slight variations were introduced: adaptation to "gmp
Packit 5c3484
   instruction set", and a final saving of an operation by interlacing
Packit 5c3484
   interpolation and recomposition phases.
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags,
Packit 5c3484
			   mp_ptr w4, mp_ptr w2, mp_ptr w1,
Packit 5c3484
			   mp_size_t w0n)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t cy;
Packit 5c3484
  /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
Packit 5c3484
  mp_limb_t cy4, cy6, embankment;
Packit 5c3484
Packit 5c3484
  ASSERT( n > 0 );
Packit 5c3484
  ASSERT( 2*n >= w0n && w0n > 0 );
Packit 5c3484
Packit 5c3484
#define w5  pp					/* 2n   */
Packit 5c3484
#define w3  (pp + 2 * n)			/* 2n+1 */
Packit 5c3484
#define w0  (pp + 5 * n)			/* w0n  */
Packit 5c3484
Packit 5c3484
  /* Interpolate with sequence:
Packit 5c3484
     W2 =(W1 - W2)>>2
Packit 5c3484
     W1 =(W1 - W5)>>1
Packit 5c3484
     W1 =(W1 - W2)>>1
Packit 5c3484
     W4 =(W3 - W4)>>1
Packit 5c3484
     W2 =(W2 - W4)/3
Packit 5c3484
     W3 = W3 - W4 - W5
Packit 5c3484
     W1 =(W1 - W3)/3
Packit 5c3484
     // Last steps are mixed with recomposition...
Packit 5c3484
     W2 = W2 - W0<<2
Packit 5c3484
     W4 = W4 - W2
Packit 5c3484
     W3 = W3 - W1
Packit 5c3484
     W2 = W2 - W0
Packit 5c3484
  */
Packit 5c3484
Packit 5c3484
  /* W2 =(W1 - W2)>>2 */
Packit 5c3484
  if (flags & toom6_vm2_neg)
Packit 5c3484
    mpn_add_n (w2, w1, w2, 2 * n + 1);
Packit 5c3484
  else
Packit 5c3484
    mpn_sub_n (w2, w1, w2, 2 * n + 1);
Packit 5c3484
  mpn_rshift (w2, w2, 2 * n + 1, 2);
Packit 5c3484
Packit 5c3484
  /* W1 =(W1 - W5)>>1 */
Packit 5c3484
  w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n);
Packit 5c3484
  mpn_rshift (w1, w1, 2 * n + 1, 1);
Packit 5c3484
Packit 5c3484
  /* W1 =(W1 - W2)>>1 */
Packit 5c3484
#if HAVE_NATIVE_mpn_rsh1sub_n
Packit 5c3484
  mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1);
Packit 5c3484
#else
Packit 5c3484
  mpn_sub_n (w1, w1, w2, 2 * n + 1);
Packit 5c3484
  mpn_rshift (w1, w1, 2 * n + 1, 1);
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  /* W4 =(W3 - W4)>>1 */
Packit 5c3484
  if (flags & toom6_vm1_neg)
Packit 5c3484
    {
Packit 5c3484
#if HAVE_NATIVE_mpn_rsh1add_n
Packit 5c3484
      mpn_rsh1add_n (w4, w3, w4, 2 * n + 1);
Packit 5c3484
#else
Packit 5c3484
      mpn_add_n (w4, w3, w4, 2 * n + 1);
Packit 5c3484
      mpn_rshift (w4, w4, 2 * n + 1, 1);
Packit 5c3484
#endif
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
#if HAVE_NATIVE_mpn_rsh1sub_n
Packit 5c3484
      mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1);
Packit 5c3484
#else
Packit 5c3484
      mpn_sub_n (w4, w3, w4, 2 * n + 1);
Packit 5c3484
      mpn_rshift (w4, w4, 2 * n + 1, 1);
Packit 5c3484
#endif
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* W2 =(W2 - W4)/3 */
Packit 5c3484
  mpn_sub_n (w2, w2, w4, 2 * n + 1);
Packit 5c3484
  mpn_divexact_by3 (w2, w2, 2 * n + 1);
Packit 5c3484
Packit 5c3484
  /* W3 = W3 - W4 - W5 */
Packit 5c3484
  mpn_sub_n (w3, w3, w4, 2 * n + 1);
Packit 5c3484
  w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n);
Packit 5c3484
Packit 5c3484
  /* W1 =(W1 - W3)/3 */
Packit 5c3484
  mpn_sub_n (w1, w1, w3, 2 * n + 1);
Packit 5c3484
  mpn_divexact_by3 (w1, w1, 2 * n + 1);
Packit 5c3484
Packit 5c3484
  /*
Packit 5c3484
    [1 0 0 0 0 0;
Packit 5c3484
     0 1 0 0 0 0;
Packit 5c3484
     1 0 1 0 0 0;
Packit 5c3484
     0 1 0 1 0 0;
Packit 5c3484
     1 0 1 0 1 0;
Packit 5c3484
     0 0 0 0 0 1]
Packit 5c3484
Packit 5c3484
    pp[] prior to operations:
Packit 5c3484
     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
Packit 5c3484
Packit 5c3484
    summation scheme for remaining operations:
Packit 5c3484
     |______________5|n_____4|n_____3|n_____2|n______|n______|pp
Packit 5c3484
     |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__|
Packit 5c3484
				    || H w4  | L w4  |
Packit 5c3484
		    || H w2  | L w2  |
Packit 5c3484
	    || H w1  | L w1  |
Packit 5c3484
			    ||-H w1  |-L w1  |
Packit 5c3484
		     |-H w0  |-L w0 ||-H w2  |-L w2  |
Packit 5c3484
  */
Packit 5c3484
  cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1);
Packit 5c3484
  MPN_INCR_U (pp + 3 * n + 1, n, cy);
Packit 5c3484
Packit 5c3484
  /* W2 -= W0<<2 */
Packit 5c3484
#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
Packit 5c3484
#if HAVE_NATIVE_mpn_sublsh2_n_ip1
Packit 5c3484
  cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
Packit 5c3484
#else
Packit 5c3484
  cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
Packit 5c3484
#endif
Packit 5c3484
#else
Packit 5c3484
  /* {W4,2*n+1} is now free and can be overwritten. */
Packit 5c3484
  cy = mpn_lshift(w4, w0, w0n, 2);
Packit 5c3484
  cy+= mpn_sub_n(w2, w2, w4, w0n);
Packit 5c3484
#endif
Packit 5c3484
  MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy);
Packit 5c3484
Packit 5c3484
  /* W4L = W4L - W2L */
Packit 5c3484
  cy = mpn_sub_n (pp + n, pp + n, w2, n);
Packit 5c3484
  MPN_DECR_U (w3, 2 * n + 1, cy);
Packit 5c3484
Packit 5c3484
  /* W3H = W3H + W2L */
Packit 5c3484
  cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n);
Packit 5c3484
  /* W1L + W2H */
Packit 5c3484
  cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n);
Packit 5c3484
  MPN_INCR_U (w1 + n, n + 1, cy);
Packit 5c3484
Packit 5c3484
  /* W0 = W0 + W1H */
Packit 5c3484
  if (LIKELY (w0n > n))
Packit 5c3484
    cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n);
Packit 5c3484
  else
Packit 5c3484
    cy6 = mpn_add_n (w0, w0, w1 + n, w0n);
Packit 5c3484
Packit 5c3484
  /*
Packit 5c3484
    summation scheme for the next operation:
Packit 5c3484
     |...____5|n_____4|n_____3|n_____2|n______|n______|pp
Packit 5c3484
     |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__|
Packit 5c3484
		     ...-w0___|-w1_w2 |
Packit 5c3484
  */
Packit 5c3484
  /* if(LIKELY(w0n>n)) the two operands below DO overlap! */
Packit 5c3484
  cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n);
Packit 5c3484
Packit 5c3484
  /* embankment is a "dirty trick" to avoid carry/borrow propagation
Packit 5c3484
     beyond allocated memory */
Packit 5c3484
  embankment = w0[w0n - 1] - 1;
Packit 5c3484
  w0[w0n - 1] = 1;
Packit 5c3484
  if (LIKELY (w0n > n)) {
Packit 5c3484
    if (cy4 > cy6)
Packit 5c3484
      MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
Packit 5c3484
    else
Packit 5c3484
      MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
Packit 5c3484
    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy);
Packit 5c3484
    MPN_INCR_U (w0 + n, w0n - n, cy6);
Packit 5c3484
  } else {
Packit 5c3484
    MPN_INCR_U (pp + 4 * n, w0n + n, cy4);
Packit 5c3484
    MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6);
Packit 5c3484
  }
Packit 5c3484
  w0[w0n - 1] += embankment;
Packit 5c3484
Packit 5c3484
#undef w5
Packit 5c3484
#undef w3
Packit 5c3484
#undef w0
Packit 5c3484
Packit 5c3484
}