Tree - source-git/gmp - CentOS Git server

source-git / gmp

Blame mpn/generic/toom_interpolate_7pts.c

Blob History Raw

Packit	5c3484	`/* mpn_toom_interpolate_7pts -- Interpolate for toom44, 53, 62.`
Packit	5c3484
Packit	5c3484	`Contributed to the GNU project by Niels Möller.`
Packit	5c3484	`Improvements by Marco Bodrato.`
Packit	5c3484
Packit	5c3484	`THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY`
Packit	5c3484	`SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST`
Packit	5c3484	`GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.`
Packit	5c3484
Packit	5c3484	`Copyright 2006, 2007, 2009, 2014 Free Software Foundation, Inc.`
Packit	5c3484
Packit	5c3484	`This file is part of the GNU MP Library.`
Packit	5c3484
Packit	5c3484	`The GNU MP Library is free software; you can redistribute it and/or modify`
Packit	5c3484	`it under the terms of either:`
Packit	5c3484
Packit	5c3484	`* the GNU Lesser General Public License as published by the Free`
Packit	5c3484	`Software Foundation; either version 3 of the License, or (at your`
Packit	5c3484	`option) any later version.`
Packit	5c3484
Packit	5c3484	`or`
Packit	5c3484
Packit	5c3484	`* the GNU General Public License as published by the Free Software`
Packit	5c3484	`Foundation; either version 2 of the License, or (at your option) any`
Packit	5c3484	`later version.`
Packit	5c3484
Packit	5c3484	`or both in parallel, as here.`
Packit	5c3484
Packit	5c3484	`The GNU MP Library is distributed in the hope that it will be useful, but`
Packit	5c3484	`WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY`
Packit	5c3484	`or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
Packit	5c3484	`for more details.`
Packit	5c3484
Packit	5c3484	`You should have received copies of the GNU General Public License and the`
Packit	5c3484	`GNU Lesser General Public License along with the GNU MP Library. If not,`
Packit	5c3484	`see https://www.gnu.org/licenses/. */`
Packit	5c3484
Packit	5c3484	`#include "gmp.h"`
Packit	5c3484	`#include "gmp-impl.h"`
Packit	5c3484
Packit	5c3484	`#define BINVERT_3 MODLIMB_INVERSE_3`
Packit	5c3484
Packit	5c3484	`#define BINVERT_9 \`
Packit	5c3484	`((((GMP_NUMB_MAX / 9) << (6 - GMP_NUMB_BITS % 6)) * 8 & GMP_NUMB_MAX) \| 0x39)`
Packit	5c3484
Packit	5c3484	`#define BINVERT_15 \`
Packit	5c3484	`((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15)`
Packit	5c3484
Packit	5c3484	`/* For the various mpn_divexact_byN here, fall back to using either`
Packit	5c3484	`mpn_pi1_bdiv_q_1 or mpn_divexact_1. The former has less overhead and is`
Packit	5c3484	`many faster if it is native. For now, since mpn_divexact_1 is native on`
Packit	5c3484	`several platforms where mpn_pi1_bdiv_q_1 does not yet exist, do not use`
Packit	5c3484	`mpn_pi1_bdiv_q_1 unconditionally. FIXME. */`
Packit	5c3484
Packit	5c3484	`/* For odd divisors, mpn_divexact_1 works fine with two's complement. */`
Packit	5c3484	`#ifndef mpn_divexact_by3`
Packit	5c3484	`#if HAVE_NATIVE_mpn_pi1_bdiv_q_1`
Packit	5c3484	`#define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0)`
Packit	5c3484	`#else`
Packit	5c3484	`#define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3)`
Packit	5c3484	`#endif`
Packit	5c3484	`#endif`
Packit	5c3484
Packit	5c3484	`#ifndef mpn_divexact_by9`
Packit	5c3484	`#if HAVE_NATIVE_mpn_pi1_bdiv_q_1`
Packit	5c3484	`#define mpn_divexact_by9(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,9,BINVERT_9,0)`
Packit	5c3484	`#else`
Packit	5c3484	`#define mpn_divexact_by9(dst,src,size) mpn_divexact_1(dst,src,size,9)`
Packit	5c3484	`#endif`
Packit	5c3484	`#endif`
Packit	5c3484
Packit	5c3484	`#ifndef mpn_divexact_by15`
Packit	5c3484	`#if HAVE_NATIVE_mpn_pi1_bdiv_q_1`
Packit	5c3484	`#define mpn_divexact_by15(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,15,BINVERT_15,0)`
Packit	5c3484	`#else`
Packit	5c3484	`#define mpn_divexact_by15(dst,src,size) mpn_divexact_1(dst,src,size,15)`
Packit	5c3484	`#endif`
Packit	5c3484	`#endif`
Packit	5c3484
Packit	5c3484	`/* Interpolation for toom4, using the evaluation points 0, infinity,`
Packit	5c3484	`1, -1, 2, -2, 1/2. More precisely, we want to compute`
Packit	5c3484	`f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 6, given the`
Packit	5c3484	`seven values`
Packit	5c3484
Packit	5c3484	`w0 = f(0),`
Packit	5c3484	`w1 = f(-2),`
Packit	5c3484	`w2 = f(1),`
Packit	5c3484	`w3 = f(-1),`
Packit	5c3484	`w4 = f(2)`
Packit	5c3484	`w5 = 64 * f(1/2)`
Packit	5c3484	`w6 = limit at infinity of f(x) / x^6,`
Packit	5c3484
Packit	5c3484	`The result is 6*n + w6n limbs. At entry, w0 is stored at {rp, 2n },`
Packit	5c3484	`w2 is stored at { rp + 2n, 2n+1 }, and w6 is stored at { rp + 6n,`
Packit	5c3484	`w6n }. The other values are 2n + 1 limbs each (with most`
Packit	5c3484	`significant limbs small). f(-1) and f(-1/2) may be negative, signs`
Packit	5c3484	`determined by the flag bits. Inputs are destroyed.`
Packit	5c3484
Packit	5c3484	`Needs (2*n + 1) limbs of temporary storage.`
Packit	5c3484	`*/`
Packit	5c3484
Packit	5c3484	`void`
Packit	5c3484	`mpn_toom_interpolate_7pts (mp_ptr rp, mp_size_t n, enum toom7_flags flags,`
Packit	5c3484	`mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5,`
Packit	5c3484	`mp_size_t w6n, mp_ptr tp)`
Packit	5c3484	`{`
Packit	5c3484	`mp_size_t m;`
Packit	5c3484	`mp_limb_t cy;`
Packit	5c3484
Packit	5c3484	`m = 2*n + 1;`
Packit	5c3484	`#define w0 rp`
Packit	5c3484	`#define w2 (rp + 2*n)`
Packit	5c3484	`#define w6 (rp + 6*n)`
Packit	5c3484
Packit	5c3484	`ASSERT (w6n > 0);`
Packit	5c3484	`ASSERT (w6n <= 2*n);`
Packit	5c3484
Packit	5c3484	`/* Using formulas similar to Marco Bodrato's`
Packit	5c3484
Packit	5c3484	`W5 = W5 + W4`
Packit	5c3484	`W1 =(W4 - W1)/2`
Packit	5c3484	`W4 = W4 - W0`
Packit	5c3484	`W4 =(W4 - W1)/4 - W6*16`
Packit	5c3484	`W3 =(W2 - W3)/2`
Packit	5c3484	`W2 = W2 - W3`
Packit	5c3484
Packit	5c3484	`W5 = W5 - W2*65 May be negative.`
Packit	5c3484	`W2 = W2 - W6 - W0`
Packit	5c3484	`W5 =(W5 + W2*45)/2 Now >= 0 again.`
Packit	5c3484	`W4 =(W4 - W2)/3`
Packit	5c3484	`W2 = W2 - W4`
Packit	5c3484
Packit	5c3484	`W1 = W5 - W1 May be negative.`
Packit	5c3484	`W5 =(W5 - W3*8)/9`
Packit	5c3484	`W3 = W3 - W5`
Packit	5c3484	`W1 =(W1/15 + W5)/2 Now >= 0 again.`
Packit	5c3484	`W5 = W5 - W1`
Packit	5c3484
Packit	5c3484	`where W0 = f(0), W1 = f(-2), W2 = f(1), W3 = f(-1),`
Packit	5c3484	`W4 = f(2), W5 = f(1/2), W6 = f(oo),`
Packit	5c3484
Packit	5c3484	`Note that most intermediate results are positive; the ones that`
Packit	5c3484	`may be negative are represented in two's complement. We must`
Packit	5c3484	`never shift right a value that may be negative, since that would`
Packit	5c3484	`invalidate the sign bit. On the other hand, divexact by odd`
Packit	5c3484	`numbers work fine with two's complement.`
Packit	5c3484	`*/`
Packit	5c3484
Packit	5c3484	`mpn_add_n (w5, w5, w4, m);`
Packit	5c3484	`if (flags & toom7_w1_neg)`
Packit	5c3484	`{`
Packit	5c3484	`#ifdef HAVE_NATIVE_mpn_rsh1add_n`
Packit	5c3484	`mpn_rsh1add_n (w1, w1, w4, m);`
Packit	5c3484	`#else`
Packit	5c3484	`mpn_add_n (w1, w1, w4, m); ASSERT (!(w1[0] & 1));`
Packit	5c3484	`mpn_rshift (w1, w1, m, 1);`
Packit	5c3484	`#endif`
Packit	5c3484	`}`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`#ifdef HAVE_NATIVE_mpn_rsh1sub_n`
Packit	5c3484	`mpn_rsh1sub_n (w1, w4, w1, m);`
Packit	5c3484	`#else`
Packit	5c3484	`mpn_sub_n (w1, w4, w1, m); ASSERT (!(w1[0] & 1));`
Packit	5c3484	`mpn_rshift (w1, w1, m, 1);`
Packit	5c3484	`#endif`
Packit	5c3484	`}`
Packit	5c3484	`mpn_sub (w4, w4, m, w0, 2*n);`
Packit	5c3484	`mpn_sub_n (w4, w4, w1, m); ASSERT (!(w4[0] & 3));`
Packit	5c3484	`mpn_rshift (w4, w4, m, 2); /* w4>=0 */`
Packit	5c3484
Packit	5c3484	`tp[w6n] = mpn_lshift (tp, w6, w6n, 4);`
Packit	5c3484	`mpn_sub (w4, w4, m, tp, w6n+1);`
Packit	5c3484
Packit	5c3484	`if (flags & toom7_w3_neg)`
Packit	5c3484	`{`
Packit	5c3484	`#ifdef HAVE_NATIVE_mpn_rsh1add_n`
Packit	5c3484	`mpn_rsh1add_n (w3, w3, w2, m);`
Packit	5c3484	`#else`
Packit	5c3484	`mpn_add_n (w3, w3, w2, m); ASSERT (!(w3[0] & 1));`
Packit	5c3484	`mpn_rshift (w3, w3, m, 1);`
Packit	5c3484	`#endif`
Packit	5c3484	`}`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`#ifdef HAVE_NATIVE_mpn_rsh1sub_n`
Packit	5c3484	`mpn_rsh1sub_n (w3, w2, w3, m);`
Packit	5c3484	`#else`
Packit	5c3484	`mpn_sub_n (w3, w2, w3, m); ASSERT (!(w3[0] & 1));`
Packit	5c3484	`mpn_rshift (w3, w3, m, 1);`
Packit	5c3484	`#endif`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484	`mpn_sub_n (w2, w2, w3, m);`
Packit	5c3484
Packit	5c3484	`mpn_submul_1 (w5, w2, m, 65);`
Packit	5c3484	`mpn_sub (w2, w2, m, w6, w6n);`
Packit	5c3484	`mpn_sub (w2, w2, m, w0, 2*n);`
Packit	5c3484
Packit	5c3484	`mpn_addmul_1 (w5, w2, m, 45); ASSERT (!(w5[0] & 1));`
Packit	5c3484	`mpn_rshift (w5, w5, m, 1);`
Packit	5c3484	`mpn_sub_n (w4, w4, w2, m);`
Packit	5c3484
Packit	5c3484	`mpn_divexact_by3 (w4, w4, m);`
Packit	5c3484	`mpn_sub_n (w2, w2, w4, m);`
Packit	5c3484
Packit	5c3484	`mpn_sub_n (w1, w5, w1, m);`
Packit	5c3484	`mpn_lshift (tp, w3, m, 3);`
Packit	5c3484	`mpn_sub_n (w5, w5, tp, m);`
Packit	5c3484	`mpn_divexact_by9 (w5, w5, m);`
Packit	5c3484	`mpn_sub_n (w3, w3, w5, m);`
Packit	5c3484
Packit	5c3484	`mpn_divexact_by15 (w1, w1, m);`
Packit	5c3484	`mpn_add_n (w1, w1, w5, m); ASSERT (!(w1[0] & 1));`
Packit	5c3484	`mpn_rshift (w1, w1, m, 1); /* w1>=0 now */`
Packit	5c3484	`mpn_sub_n (w5, w5, w1, m);`
Packit	5c3484
Packit	5c3484	`/* These bounds are valid for the 4x4 polynomial product of toom44,`
Packit	5c3484	`* and they are conservative for toom53 and toom62. */`
Packit	5c3484	`ASSERT (w1[2*n] < 2);`
Packit	5c3484	`ASSERT (w2[2*n] < 3);`
Packit	5c3484	`ASSERT (w3[2*n] < 4);`
Packit	5c3484	`ASSERT (w4[2*n] < 3);`
Packit	5c3484	`ASSERT (w5[2*n] < 2);`
Packit	5c3484
Packit	5c3484	`/* Addition chain. Note carries and the 2n'th limbs that need to be`
Packit	5c3484	`* added in.`
Packit	5c3484	`*`
Packit	5c3484	`* Special care is needed for w2[2n] and the corresponding carry,`
Packit	5c3484	`* since the "simple" way of adding it all together would overwrite`
Packit	5c3484	`* the limb at wp[2n] and rp[4n] (same location) with the sum of`
Packit	5c3484	`* the high half of w3 and the low half of w4.`
Packit	5c3484	`*`
Packit	5c3484	`* 7 6 5 4 3 2 1 0`
Packit	5c3484	`* \| \| \| \| \| \| \| \| \|`
Packit	5c3484	`* \|\|w3 (2n+1)\|`
Packit	5c3484	`* \|\|w4 (2n+1)\|`
Packit	5c3484	`* \|\|w5 (2n+1)\| \|\|w1 (2n+1)\|`
Packit	5c3484	`* + \| w6 (w6n)\| \|\|w2 (2n+1)\| w0 (2n) \| (share storage with r)`
Packit	5c3484	`* -----------------------------------------------`
Packit	5c3484	`* r \| \| \| \| \| \| \| \| \|`
Packit	5c3484	`* c7 c6 c5 c4 c3 Carries to propagate`
Packit	5c3484	`*/`
Packit	5c3484
Packit	5c3484	`cy = mpn_add_n (rp + n, rp + n, w1, m);`
Packit	5c3484	`MPN_INCR_U (w2 + n + 1, n , cy);`
Packit	5c3484	`cy = mpn_add_n (rp + 3n, rp + 3n, w3, n);`
Packit	5c3484	`MPN_INCR_U (w3 + n, n + 1, w2[2*n] + cy);`
Packit	5c3484	`cy = mpn_add_n (rp + 4*n, w3 + n, w4, n);`
Packit	5c3484	`MPN_INCR_U (w4 + n, n + 1, w3[2*n] + cy);`
Packit	5c3484	`cy = mpn_add_n (rp + 5*n, w4 + n, w5, n);`
Packit	5c3484	`MPN_INCR_U (w5 + n, n + 1, w4[2*n] + cy);`
Packit	5c3484	`if (w6n > n + 1)`
Packit	5c3484	`{`
Packit	5c3484	`cy = mpn_add_n (rp + 6n, rp + 6n, w5 + n, n + 1);`
Packit	5c3484	`MPN_INCR_U (rp + 7*n + 1, w6n - n - 1, cy);`
Packit	5c3484	`}`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`ASSERT_NOCARRY (mpn_add_n (rp + 6n, rp + 6n, w5 + n, w6n));`
Packit	5c3484	`#if WANT_ASSERT`
Packit	5c3484	`{`
Packit	5c3484	`mp_size_t i;`
Packit	5c3484	`for (i = w6n; i <= n; i++)`
Packit	5c3484	`ASSERT (w5[n + i] == 0);`
Packit	5c3484	`}`
Packit	5c3484	`#endif`
Packit	5c3484	`}`
Packit	5c3484	`}`

source-git / gmp

Source Code

Blame mpn/generic/toom_interpolate_7pts.c