|
Packit |
5c3484 |
/* mpn_mulmid -- middle product
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Contributed by David Harvey.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
|
|
Packit |
5c3484 |
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
|
|
Packit |
5c3484 |
GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 2011 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
mpn_mulmid (mp_ptr rp,
|
|
Packit |
5c3484 |
mp_srcptr ap, mp_size_t an,
|
|
Packit |
5c3484 |
mp_srcptr bp, mp_size_t bn)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t rn, k;
|
|
Packit |
5c3484 |
mp_ptr scratch, temp;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (an >= bn);
|
|
Packit |
5c3484 |
ASSERT (bn >= 1);
|
|
Packit |
5c3484 |
ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
|
|
Packit |
5c3484 |
ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (bn < MULMID_TOOM42_THRESHOLD)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* region not tall enough to make toom42 worthwhile for any portion */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (an < CHUNK)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* region not too wide either, just call basecase directly */
|
|
Packit |
5c3484 |
mpn_mulmid_basecase (rp, ap, an, bp, bn);
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Region quite wide. For better locality, use basecase on chunks:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
AAABBBCC..
|
|
Packit |
5c3484 |
.AAABBBCC.
|
|
Packit |
5c3484 |
..AAABBBCC
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
k = CHUNK - bn + 1; /* number of diagonals per chunk */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* first chunk (marked A in the above diagram) */
|
|
Packit |
5c3484 |
mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* remaining chunks (B, C, etc) */
|
|
Packit |
5c3484 |
an -= k;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
while (an >= CHUNK)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t t0, t1, cy;
|
|
Packit |
5c3484 |
ap += k, rp += k;
|
|
Packit |
5c3484 |
t0 = rp[0], t1 = rp[1];
|
|
Packit |
5c3484 |
mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
|
|
Packit |
5c3484 |
ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */
|
|
Packit |
5c3484 |
MPN_INCR_U (rp + 1, k + 1, t1 + cy);
|
|
Packit |
5c3484 |
an -= k;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (an >= bn)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* last remaining chunk */
|
|
Packit |
5c3484 |
mp_limb_t t0, t1, cy;
|
|
Packit |
5c3484 |
ap += k, rp += k;
|
|
Packit |
5c3484 |
t0 = rp[0], t1 = rp[1];
|
|
Packit |
5c3484 |
mpn_mulmid_basecase (rp, ap, an, bp, bn);
|
|
Packit |
5c3484 |
ADDC_LIMB (cy, rp[0], rp[0], t0);
|
|
Packit |
5c3484 |
MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* region is tall enough for toom42 */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
rn = an - bn + 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (rn < MULMID_TOOM42_THRESHOLD)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* region not wide enough to make toom42 worthwhile for any portion */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_DECL;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (bn < CHUNK)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* region not too tall either, just call basecase directly */
|
|
Packit |
5c3484 |
mpn_mulmid_basecase (rp, ap, an, bp, bn);
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Region quite tall. For better locality, use basecase on chunks:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
AAAAA....
|
|
Packit |
5c3484 |
.AAAAA...
|
|
Packit |
5c3484 |
..BBBBB..
|
|
Packit |
5c3484 |
...BBBBB.
|
|
Packit |
5c3484 |
....CCCCC
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_MARK;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
temp = TMP_ALLOC_LIMBS (rn + 2);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* first chunk (marked A in the above diagram) */
|
|
Packit |
5c3484 |
bp += bn - CHUNK, an -= bn - CHUNK;
|
|
Packit |
5c3484 |
mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* remaining chunks (B, C, etc) */
|
|
Packit |
5c3484 |
bn -= CHUNK;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
while (bn >= CHUNK)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
ap += CHUNK, bp -= CHUNK;
|
|
Packit |
5c3484 |
mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
|
|
Packit |
5c3484 |
mpn_add_n (rp, rp, temp, rn + 2);
|
|
Packit |
5c3484 |
bn -= CHUNK;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (bn)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* last remaining chunk */
|
|
Packit |
5c3484 |
ap += CHUNK, bp -= bn;
|
|
Packit |
5c3484 |
mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
|
|
Packit |
5c3484 |
mpn_add_n (rp, rp, temp, rn + 2);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* we're definitely going to use toom42 somewhere */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (bn > rn)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* slice region into chunks, use toom42 on all chunks except possibly
|
|
Packit |
5c3484 |
the last:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
AA....
|
|
Packit |
5c3484 |
.AA...
|
|
Packit |
5c3484 |
..BB..
|
|
Packit |
5c3484 |
...BB.
|
|
Packit |
5c3484 |
....CC
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_DECL;
|
|
Packit |
5c3484 |
TMP_MARK;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
|
|
Packit |
5c3484 |
scratch = temp + rn + 2;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* first chunk (marked A in the above diagram) */
|
|
Packit |
5c3484 |
bp += bn - rn;
|
|
Packit |
5c3484 |
mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* remaining chunks (B, C, etc) */
|
|
Packit |
5c3484 |
bn -= rn;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
while (bn >= rn)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
ap += rn, bp -= rn;
|
|
Packit |
5c3484 |
mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
|
|
Packit |
5c3484 |
mpn_add_n (rp, rp, temp, rn + 2);
|
|
Packit |
5c3484 |
bn -= rn;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (bn)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* last remaining chunk */
|
|
Packit |
5c3484 |
ap += rn, bp -= bn;
|
|
Packit |
5c3484 |
mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
|
|
Packit |
5c3484 |
mpn_add_n (rp, rp, temp, rn + 2);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* slice region into chunks, use toom42 on all chunks except possibly
|
|
Packit |
5c3484 |
the last:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
AAABBBCC..
|
|
Packit |
5c3484 |
.AAABBBCC.
|
|
Packit |
5c3484 |
..AAABBBCC
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_DECL;
|
|
Packit |
5c3484 |
TMP_MARK;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* first chunk (marked A in the above diagram) */
|
|
Packit |
5c3484 |
mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* remaining chunks (B, C, etc) */
|
|
Packit |
5c3484 |
rn -= bn;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
while (rn >= bn)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t t0, t1, cy;
|
|
Packit |
5c3484 |
ap += bn, rp += bn;
|
|
Packit |
5c3484 |
t0 = rp[0], t1 = rp[1];
|
|
Packit |
5c3484 |
mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
|
|
Packit |
5c3484 |
ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */
|
|
Packit |
5c3484 |
MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
|
|
Packit |
5c3484 |
rn -= bn;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (rn)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* last remaining chunk */
|
|
Packit |
5c3484 |
mp_limb_t t0, t1, cy;
|
|
Packit |
5c3484 |
ap += bn, rp += bn;
|
|
Packit |
5c3484 |
t0 = rp[0], t1 = rp[1];
|
|
Packit |
5c3484 |
mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
|
|
Packit |
5c3484 |
ADDC_LIMB (cy, rp[0], rp[0], t0);
|
|
Packit |
5c3484 |
MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|