Blame mpn/generic/toom_interpolate_5pts.c

Packit 5c3484
/* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Robert Harley.
Packit 5c3484
   Improvements by Paul Zimmermann and Marco Bodrato.
Packit 5c3484
Packit 5c3484
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
Packit 5c3484
			   mp_size_t k, mp_size_t twor, int sa,
Packit 5c3484
			   mp_limb_t vinf0)
Packit 5c3484
{
Packit 5c3484
  mp_limb_t cy, saved;
Packit 5c3484
  mp_size_t twok;
Packit 5c3484
  mp_size_t kk1;
Packit 5c3484
  mp_ptr c1, v1, c3, vinf;
Packit 5c3484
Packit 5c3484
  twok = k + k;
Packit 5c3484
  kk1 = twok + 1;
Packit 5c3484
Packit 5c3484
  c1 = c  + k;
Packit 5c3484
  v1 = c1 + k;
Packit 5c3484
  c3 = v1 + k;
Packit 5c3484
  vinf = c3 + k;
Packit 5c3484
Packit 5c3484
#define v0 (c)
Packit 5c3484
  /* (1) v2 <- v2-vm1 < v2+|vm1|,       (16 8 4 2 1) - (1 -1 1 -1  1) =
Packit 5c3484
     thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k)             (15 9 3  3  0)
Packit 5c3484
  */
Packit 5c3484
  if (sa)
Packit 5c3484
    ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
Packit 5c3484
  else
Packit 5c3484
    ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
Packit 5c3484
Packit 5c3484
  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
Packit 5c3484
       v0       v1       hi(vinf)       |vm1|     v2-vm1      EMPTY */
Packit 5c3484
Packit 5c3484
  ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1));    /* v2 <- v2 / 3 */
Packit 5c3484
						      /* (5 3 1 1 0)*/
Packit 5c3484
Packit 5c3484
  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
Packit 5c3484
       v0       v1      hi(vinf)       |vm1|     (v2-vm1)/3    EMPTY */
Packit 5c3484
Packit 5c3484
  /* (2) vm1 <- tm1 := (v1 - vm1) / 2  [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
Packit 5c3484
     tm1 >= 0                                         (0  1 0  1 0)
Packit 5c3484
     No carry comes out from {v1, kk1} +/- {vm1, kk1},
Packit 5c3484
     and the division by two is exact.
Packit 5c3484
     If (sa!=0) the sign of vm1 is negative */
Packit 5c3484
  if (sa)
Packit 5c3484
    {
Packit 5c3484
#ifdef HAVE_NATIVE_mpn_rsh1add_n
Packit 5c3484
      mpn_rsh1add_n (vm1, v1, vm1, kk1);
Packit 5c3484
#else
Packit 5c3484
      ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
Packit 5c3484
      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
Packit 5c3484
#endif
Packit 5c3484
    }
Packit 5c3484
  else
Packit 5c3484
    {
Packit 5c3484
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
Packit 5c3484
      mpn_rsh1sub_n (vm1, v1, vm1, kk1);
Packit 5c3484
#else
Packit 5c3484
      ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
Packit 5c3484
      ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
Packit 5c3484
#endif
Packit 5c3484
    }
Packit 5c3484
Packit 5c3484
  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
Packit 5c3484
       v0       v1        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
Packit 5c3484
Packit 5c3484
  /* (3) v1 <- t1 := v1 - v0    (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
Packit 5c3484
     t1 >= 0
Packit 5c3484
  */
Packit 5c3484
  vinf[0] -= mpn_sub_n (v1, v1, c, twok);
Packit 5c3484
Packit 5c3484
  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
Packit 5c3484
       v0     v1-v0        hi(vinf)       tm1     (v2-vm1)/3    EMPTY */
Packit 5c3484
Packit 5c3484
  /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
Packit 5c3484
     t2 >= 0                  [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
Packit 5c3484
  */
Packit 5c3484
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
Packit 5c3484
  mpn_rsh1sub_n (v2, v2, v1, kk1);
Packit 5c3484
#else
Packit 5c3484
  ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
Packit 5c3484
  ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
  /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
Packit 5c3484
       v0     v1-v0        hi(vinf)     tm1    (v2-vm1-3t1)/6    EMPTY */
Packit 5c3484
Packit 5c3484
  /* (5) v1 <- t1-tm1           (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
Packit 5c3484
     result is v1 >= 0
Packit 5c3484
  */
Packit 5c3484
  ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
Packit 5c3484
Packit 5c3484
  /* We do not need to read the value in vm1, so we add it in {c+k, ...} */
Packit 5c3484
  cy = mpn_add_n (c1, c1, vm1, kk1);
Packit 5c3484
  MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
Packit 5c3484
  /* Memory allocated for vm1 is now free, it can be recycled ...*/
Packit 5c3484
Packit 5c3484
  /* (6) v2 <- v2 - 2*vinf,     (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
Packit 5c3484
     result is v2 >= 0 */
Packit 5c3484
  saved = vinf[0];       /* Remember v1's highest byte (will be overwritten). */
Packit 5c3484
  vinf[0] = vinf0;       /* Set the right value for vinf0                     */
Packit 5c3484
#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
Packit 5c3484
  cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
Packit 5c3484
#else
Packit 5c3484
  /* Overwrite unused vm1 */
Packit 5c3484
  cy = mpn_lshift (vm1, vinf, twor, 1);
Packit 5c3484
  cy += mpn_sub_n (v2, v2, vm1, twor);
Packit 5c3484
#endif
Packit 5c3484
  MPN_DECR_U (v2 + twor, kk1 - twor, cy);
Packit 5c3484
Packit 5c3484
  /* Current matrix is
Packit 5c3484
     [1 0 0 0 0; vinf
Packit 5c3484
      0 1 0 0 0; v2
Packit 5c3484
      1 0 1 0 0; v1
Packit 5c3484
      0 1 0 1 0; vm1
Packit 5c3484
      0 0 0 0 1] v0
Packit 5c3484
     Some values already are in-place (we added vm1 in the correct position)
Packit 5c3484
     | vinf|  v1 |  v0 |
Packit 5c3484
	      | vm1 |
Packit 5c3484
     One still is in a separated area
Packit 5c3484
	| +v2 |
Packit 5c3484
     We have to compute v1-=vinf; vm1 -= v2,
Packit 5c3484
	   |-vinf|
Packit 5c3484
	      | -v2 |
Packit 5c3484
     Carefully reordering operations we can avoid to compute twice the sum
Packit 5c3484
     of the high half of v2 plus the low half of vinf.
Packit 5c3484
  */
Packit 5c3484
Packit 5c3484
  /* Add the high half of t2 in {vinf} */
Packit 5c3484
  if ( LIKELY(twor > k + 1) ) { /* This is the expected flow  */
Packit 5c3484
    cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
Packit 5c3484
    MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
Packit 5c3484
  } else { /* triggered only by very unbalanced cases like
Packit 5c3484
	      (k+k+(k-2))x(k+k+1) , should be handled by toom32 */
Packit 5c3484
    ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
Packit 5c3484
  }
Packit 5c3484
  /* (7) v1 <- v1 - vinf,       (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
Packit 5c3484
     result is >= 0 */
Packit 5c3484
  /* Side effect: we also subtracted (high half) vm1 -= v2 */
Packit 5c3484
  cy = mpn_sub_n (v1, v1, vinf, twor);          /* vinf is at most twor long.  */
Packit 5c3484
  vinf0 = vinf[0];                     /* Save again the right value for vinf0 */
Packit 5c3484
  vinf[0] = saved;
Packit 5c3484
  MPN_DECR_U (v1 + twor, kk1 - twor, cy);       /* Treat the last bytes.       */
Packit 5c3484
Packit 5c3484
  /* (8) vm1 <- vm1-v2          (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
Packit 5c3484
     Operate only on the low half.
Packit 5c3484
  */
Packit 5c3484
  cy = mpn_sub_n (c1, c1, v2, k);
Packit 5c3484
  MPN_DECR_U (v1, kk1, cy);
Packit 5c3484
Packit 5c3484
  /********************* Beginning the final phase **********************/
Packit 5c3484
Packit 5c3484
  /* Most of the recomposition was done */
Packit 5c3484
Packit 5c3484
  /* add t2 in {c+3k, ...}, but only the low half */
Packit 5c3484
  cy = mpn_add_n (c3, c3, v2, k);
Packit 5c3484
  vinf[0] += cy;
Packit 5c3484
  ASSERT(vinf[0] >= cy); /* No carry */
Packit 5c3484
  MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
Packit 5c3484
Packit 5c3484
#undef v0
Packit 5c3484
}