Blame mpn/generic/toom_interpolate_8pts.c

Packit 5c3484
/* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72.
Packit 5c3484
Packit 5c3484
   Contributed to the GNU project by Marco Bodrato.
Packit 5c3484
Packit 5c3484
   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
Packit 5c3484
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
Packit 5c3484
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Packit 5c3484
Packit 5c3484
Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
This file is part of the GNU MP Library.
Packit 5c3484
Packit 5c3484
The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
it under the terms of either:
Packit 5c3484
Packit 5c3484
  * the GNU Lesser General Public License as published by the Free
Packit 5c3484
    Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
    option) any later version.
Packit 5c3484
Packit 5c3484
or
Packit 5c3484
Packit 5c3484
  * the GNU General Public License as published by the Free Software
Packit 5c3484
    Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
    later version.
Packit 5c3484
Packit 5c3484
or both in parallel, as here.
Packit 5c3484
Packit 5c3484
The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
for more details.
Packit 5c3484
Packit 5c3484
You should have received copies of the GNU General Public License and the
Packit 5c3484
GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
see https://www.gnu.org/licenses/.  */
Packit 5c3484
Packit 5c3484
#include "gmp.h"
Packit 5c3484
#include "gmp-impl.h"
Packit 5c3484
Packit 5c3484
#define BINVERT_3 MODLIMB_INVERSE_3
Packit 5c3484
Packit 5c3484
#define BINVERT_15 \
Packit 5c3484
  ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)
Packit 5c3484
Packit 5c3484
#define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK)
Packit 5c3484
Packit 5c3484
#ifndef mpn_divexact_by3
Packit 5c3484
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
Packit 5c3484
#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)
Packit 5c3484
#else
Packit 5c3484
#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#ifndef mpn_divexact_by45
Packit 5c3484
#if GMP_NUMB_BITS % 12 == 0
Packit 5c3484
#define mpn_divexact_by45(dst,src,size) \
Packit 5c3484
  (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45)))
Packit 5c3484
#else
Packit 5c3484
#if HAVE_NATIVE_mpn_pi1_bdiv_q_1
Packit 5c3484
#define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0)
Packit 5c3484
#else
Packit 5c3484
#define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45)
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_sublsh2_n_ip1
Packit 5c3484
#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
Packit 5c3484
#else
Packit 5c3484
#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_sublsh_n
Packit 5c3484
#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
Packit 5c3484
#else
Packit 5c3484
static mp_limb_t
Packit 5c3484
DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
Packit 5c3484
{
Packit 5c3484
#if USE_MUL_1 && 0
Packit 5c3484
  return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
Packit 5c3484
#else
Packit 5c3484
  mp_limb_t __cy;
Packit 5c3484
  __cy = mpn_lshift (ws,src,n,s);
Packit 5c3484
  return __cy + mpn_sub_n (dst,dst,ws,n);
Packit 5c3484
#endif
Packit 5c3484
}
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
Packit 5c3484
#if HAVE_NATIVE_mpn_subrsh
Packit 5c3484
#define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s)
Packit 5c3484
#else
Packit 5c3484
/* This is not a correct definition, it assumes no carry */
Packit 5c3484
#define DO_mpn_subrsh(dst,nd,src,ns,s,ws)				\
Packit 5c3484
do {									\
Packit 5c3484
  mp_limb_t __cy;							\
Packit 5c3484
  MPN_DECR_U (dst, nd, src[0] >> s);					\
Packit 5c3484
  __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws);	\
Packit 5c3484
  MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy);				\
Packit 5c3484
} while (0)
Packit 5c3484
#endif
Packit 5c3484
Packit 5c3484
/* Interpolation for Toom-4.5 (or Toom-4), using the evaluation
Packit 5c3484
   points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely,
Packit 5c3484
   we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of
Packit 5c3484
   degree 7 (or 6), given the 8 (rsp. 7) values:
Packit 5c3484
Packit 5c3484
     r1 = limit at infinity of f(x) / x^7,
Packit 5c3484
     r2 = f(4),
Packit 5c3484
     r3 = f(-4),
Packit 5c3484
     r4 = f(2),
Packit 5c3484
     r5 = f(-2),
Packit 5c3484
     r6 = f(1),
Packit 5c3484
     r7 = f(-1),
Packit 5c3484
     r8 = f(0).
Packit 5c3484
Packit 5c3484
   All couples of the form f(n),f(-n) must be already mixed with
Packit 5c3484
   toom_couple_handling(f(n),...,f(-n),...)
Packit 5c3484
Packit 5c3484
   The result is stored in {pp, spt + 7*n (or 6*n)}.
Packit 5c3484
   At entry, r8 is stored at {pp, 2n},
Packit 5c3484
   r5 is stored at {pp + 3n, 3n + 1}.
Packit 5c3484
Packit 5c3484
   The other values are 2n+... limbs each (with most significant limbs small).
Packit 5c3484
Packit 5c3484
   All intermediate results are positive.
Packit 5c3484
   Inputs are destroyed.
Packit 5c3484
*/
Packit 5c3484
Packit 5c3484
void
Packit 5c3484
mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n,
Packit 5c3484
			   mp_ptr r3, mp_ptr r7,
Packit 5c3484
			   mp_size_t spt, mp_ptr ws)
Packit 5c3484
{
Packit 5c3484
  mp_limb_signed_t cy;
Packit 5c3484
  mp_ptr r5, r1;
Packit 5c3484
  r5 = (pp + 3 * n);			/* 3n+1 */
Packit 5c3484
  r1 = (pp + 7 * n);			/* spt */
Packit 5c3484
Packit 5c3484
  /******************************* interpolation *****************************/
Packit 5c3484
Packit 5c3484
  DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws);
Packit 5c3484
  cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws);
Packit 5c3484
  MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy);
Packit 5c3484
Packit 5c3484
  DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws);
Packit 5c3484
  cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws);
Packit 5c3484
  MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy);
Packit 5c3484
Packit 5c3484
  r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n);
Packit 5c3484
  cy = mpn_sub_n (r7, r7, r1, spt);
Packit 5c3484
  MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy);
Packit 5c3484
Packit 5c3484
  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
Packit 5c3484
  ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2));
Packit 5c3484
Packit 5c3484
  ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1));
Packit 5c3484
Packit 5c3484
  ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1));
Packit 5c3484
Packit 5c3484
  mpn_divexact_by45 (r3, r3, 3 * n + 1);
Packit 5c3484
Packit 5c3484
  ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
Packit 5c3484
Packit 5c3484
  ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
Packit 5c3484
Packit 5c3484
  /* last interpolation steps... */
Packit 5c3484
  /* ... are mixed with recomposition */
Packit 5c3484
Packit 5c3484
  /***************************** recomposition *******************************/
Packit 5c3484
  /*
Packit 5c3484
    pp[] prior to operations:
Packit 5c3484
     |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp
Packit 5c3484
Packit 5c3484
    summation scheme for remaining operations:
Packit 5c3484
     |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp
Packit 5c3484
     |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp
Packit 5c3484
	  ||_H r3|_M r3|_L*r3|
Packit 5c3484
				  ||_H_r7|_M_r7|_L_r7|
Packit 5c3484
		      ||-H r3|-M r3|-L*r3|
Packit 5c3484
				  ||-H*r5|-M_r5|-L_r5|
Packit 5c3484
  */
Packit 5c3484
Packit 5c3484
  cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */
Packit 5c3484
  cy-= mpn_sub_n (pp + n, pp + n, r5, n);
Packit 5c3484
  if (0 > cy)
Packit 5c3484
    MPN_DECR_U (r7 + n, 2*n + 1, 1);
Packit 5c3484
  else
Packit 5c3484
    MPN_INCR_U (r7 + n, 2*n + 1, cy);
Packit 5c3484
Packit 5c3484
  cy = mpn_sub_n (pp + 2*n, r7 + n, r5 + n, n); /* Mr7-Mr5 */
Packit 5c3484
  MPN_DECR_U (r7 + 2*n, n + 1, cy);
Packit 5c3484
Packit 5c3484
  cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */
Packit 5c3484
  r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */
Packit 5c3484
  cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */
Packit 5c3484
  if (UNLIKELY(0 > cy))
Packit 5c3484
    MPN_DECR_U (r5 + n + 1, 2*n, 1);
Packit 5c3484
  else
Packit 5c3484
    MPN_INCR_U (r5 + n + 1, 2*n, cy);
Packit 5c3484
Packit 5c3484
  ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */
Packit 5c3484
Packit 5c3484
  cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
Packit 5c3484
  MPN_INCR_U (r3 + 2*n, n + 1, cy);
Packit 5c3484
  cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
Packit 5c3484
  if (LIKELY(spt != n))
Packit 5c3484
    MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
Packit 5c3484
  else
Packit 5c3484
    ASSERT (r3[3*n] + cy == 0);
Packit 5c3484
}