|
Packit |
5c3484 |
/* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Contributed to the GNU project by Robert Harley.
|
|
Packit |
5c3484 |
Improvements by Paul Zimmermann and Marco Bodrato.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
|
|
Packit |
5c3484 |
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
|
|
Packit |
5c3484 |
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1,
|
|
Packit |
5c3484 |
mp_size_t k, mp_size_t twor, int sa,
|
|
Packit |
5c3484 |
mp_limb_t vinf0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t cy, saved;
|
|
Packit |
5c3484 |
mp_size_t twok;
|
|
Packit |
5c3484 |
mp_size_t kk1;
|
|
Packit |
5c3484 |
mp_ptr c1, v1, c3, vinf;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
twok = k + k;
|
|
Packit |
5c3484 |
kk1 = twok + 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
c1 = c + k;
|
|
Packit |
5c3484 |
v1 = c1 + k;
|
|
Packit |
5c3484 |
c3 = v1 + k;
|
|
Packit |
5c3484 |
vinf = c3 + k;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define v0 (c)
|
|
Packit |
5c3484 |
/* (1) v2 <- v2-vm1 < v2+|vm1|, (16 8 4 2 1) - (1 -1 1 -1 1) =
|
|
Packit |
5c3484 |
thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k) (15 9 3 3 0)
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
if (sa)
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1));
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1));
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
|
|
Packit |
5c3484 |
v0 v1 hi(vinf) |vm1| v2-vm1 EMPTY */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1)); /* v2 <- v2 / 3 */
|
|
Packit |
5c3484 |
/* (5 3 1 1 0)*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
|
|
Packit |
5c3484 |
v0 v1 hi(vinf) |vm1| (v2-vm1)/3 EMPTY */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* (2) vm1 <- tm1 := (v1 - vm1) / 2 [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 =
|
|
Packit |
5c3484 |
tm1 >= 0 (0 1 0 1 0)
|
|
Packit |
5c3484 |
No carry comes out from {v1, kk1} +/- {vm1, kk1},
|
|
Packit |
5c3484 |
and the division by two is exact.
|
|
Packit |
5c3484 |
If (sa!=0) the sign of vm1 is negative */
|
|
Packit |
5c3484 |
if (sa)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
#ifdef HAVE_NATIVE_mpn_rsh1add_n
|
|
Packit |
5c3484 |
mpn_rsh1add_n (vm1, v1, vm1, kk1);
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1));
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
|
|
Packit |
5c3484 |
mpn_rsh1sub_n (vm1, v1, vm1, kk1);
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1));
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1));
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
|
|
Packit |
5c3484 |
v0 v1 hi(vinf) tm1 (v2-vm1)/3 EMPTY */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* (3) v1 <- t1 := v1 - v0 (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0)
|
|
Packit |
5c3484 |
t1 >= 0
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
vinf[0] -= mpn_sub_n (v1, v1, c, twok);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
|
|
Packit |
5c3484 |
v0 v1-v0 hi(vinf) tm1 (v2-vm1)/3 EMPTY */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6
|
|
Packit |
5c3484 |
t2 >= 0 [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0)
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
#ifdef HAVE_NATIVE_mpn_rsh1sub_n
|
|
Packit |
5c3484 |
mpn_rsh1sub_n (v2, v2, v1, kk1);
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1));
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1));
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r}
|
|
Packit |
5c3484 |
v0 v1-v0 hi(vinf) tm1 (v2-vm1-3t1)/6 EMPTY */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* (5) v1 <- t1-tm1 (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0)
|
|
Packit |
5c3484 |
result is v1 >= 0
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1));
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* We do not need to read the value in vm1, so we add it in {c+k, ...} */
|
|
Packit |
5c3484 |
cy = mpn_add_n (c1, c1, vm1, kk1);
|
|
Packit |
5c3484 |
MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */
|
|
Packit |
5c3484 |
/* Memory allocated for vm1 is now free, it can be recycled ...*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* (6) v2 <- v2 - 2*vinf, (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0)
|
|
Packit |
5c3484 |
result is v2 >= 0 */
|
|
Packit |
5c3484 |
saved = vinf[0]; /* Remember v1's highest byte (will be overwritten). */
|
|
Packit |
5c3484 |
vinf[0] = vinf0; /* Set the right value for vinf0 */
|
|
Packit |
5c3484 |
#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
|
|
Packit |
5c3484 |
cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
/* Overwrite unused vm1 */
|
|
Packit |
5c3484 |
cy = mpn_lshift (vm1, vinf, twor, 1);
|
|
Packit |
5c3484 |
cy += mpn_sub_n (v2, v2, vm1, twor);
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
MPN_DECR_U (v2 + twor, kk1 - twor, cy);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Current matrix is
|
|
Packit |
5c3484 |
[1 0 0 0 0; vinf
|
|
Packit |
5c3484 |
0 1 0 0 0; v2
|
|
Packit |
5c3484 |
1 0 1 0 0; v1
|
|
Packit |
5c3484 |
0 1 0 1 0; vm1
|
|
Packit |
5c3484 |
0 0 0 0 1] v0
|
|
Packit |
5c3484 |
Some values already are in-place (we added vm1 in the correct position)
|
|
Packit |
5c3484 |
| vinf| v1 | v0 |
|
|
Packit |
5c3484 |
| vm1 |
|
|
Packit |
5c3484 |
One still is in a separated area
|
|
Packit |
5c3484 |
| +v2 |
|
|
Packit |
5c3484 |
We have to compute v1-=vinf; vm1 -= v2,
|
|
Packit |
5c3484 |
|-vinf|
|
|
Packit |
5c3484 |
| -v2 |
|
|
Packit |
5c3484 |
Carefully reordering operations we can avoid to compute twice the sum
|
|
Packit |
5c3484 |
of the high half of v2 plus the low half of vinf.
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Add the high half of t2 in {vinf} */
|
|
Packit |
5c3484 |
if ( LIKELY(twor > k + 1) ) { /* This is the expected flow */
|
|
Packit |
5c3484 |
cy = mpn_add_n (vinf, vinf, v2 + k, k + 1);
|
|
Packit |
5c3484 |
MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */
|
|
Packit |
5c3484 |
} else { /* triggered only by very unbalanced cases like
|
|
Packit |
5c3484 |
(k+k+(k-2))x(k+k+1) , should be handled by toom32 */
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor));
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
/* (7) v1 <- v1 - vinf, (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0)
|
|
Packit |
5c3484 |
result is >= 0 */
|
|
Packit |
5c3484 |
/* Side effect: we also subtracted (high half) vm1 -= v2 */
|
|
Packit |
5c3484 |
cy = mpn_sub_n (v1, v1, vinf, twor); /* vinf is at most twor long. */
|
|
Packit |
5c3484 |
vinf0 = vinf[0]; /* Save again the right value for vinf0 */
|
|
Packit |
5c3484 |
vinf[0] = saved;
|
|
Packit |
5c3484 |
MPN_DECR_U (v1 + twor, kk1 - twor, cy); /* Treat the last bytes. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* (8) vm1 <- vm1-v2 (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0)
|
|
Packit |
5c3484 |
Operate only on the low half.
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
cy = mpn_sub_n (c1, c1, v2, k);
|
|
Packit |
5c3484 |
MPN_DECR_U (v1, kk1, cy);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/********************* Beginning the final phase **********************/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Most of the recomposition was done */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* add t2 in {c+3k, ...}, but only the low half */
|
|
Packit |
5c3484 |
cy = mpn_add_n (c3, c3, v2, k);
|
|
Packit |
5c3484 |
vinf[0] += cy;
|
|
Packit |
5c3484 |
ASSERT(vinf[0] >= cy); /* No carry */
|
|
Packit |
5c3484 |
MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#undef v0
|
|
Packit |
5c3484 |
}
|