|
Packit |
5c3484 |
/* mpn_sqrlo_basecase -- Internal routine to square a natural number
|
|
Packit |
5c3484 |
of length n.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
|
|
Packit |
5c3484 |
SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 1991-1994, 1996, 1997, 2000-2005, 2008, 2010, 2011, 2015
|
|
Packit |
5c3484 |
Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#ifndef SQRLO_SHORTCUT_MULTIPLICATIONS
|
|
Packit |
5c3484 |
#if HAVE_NATIVE_mpn_addmul_1
|
|
Packit |
5c3484 |
#define SQRLO_SHORTCUT_MULTIPLICATIONS 0
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
#define SQRLO_SHORTCUT_MULTIPLICATIONS 1
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if HAVE_NATIVE_mpn_sqr_diagonal
|
|
Packit |
5c3484 |
#define MPN_SQR_DIAGONAL(rp, up, n) \
|
|
Packit |
5c3484 |
mpn_sqr_diagonal (rp, up, n)
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
#define MPN_SQR_DIAGONAL(rp, up, n) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
mp_size_t _i; \
|
|
Packit |
5c3484 |
for (_i = 0; _i < (n); _i++) \
|
|
Packit |
5c3484 |
{ \
|
|
Packit |
5c3484 |
mp_limb_t ul, lpl; \
|
|
Packit |
5c3484 |
ul = (up)[_i]; \
|
|
Packit |
5c3484 |
umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS); \
|
|
Packit |
5c3484 |
(rp)[2 * _i] = lpl >> GMP_NAIL_BITS; \
|
|
Packit |
5c3484 |
} \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define MPN_SQRLO_DIAGONAL(rp, up, n) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
mp_size_t nhalf; \
|
|
Packit |
5c3484 |
nhalf = (n) >> 1; \
|
|
Packit |
5c3484 |
MPN_SQR_DIAGONAL ((rp), (up), nhalf); \
|
|
Packit |
5c3484 |
if (((n) & 1) != 0) \
|
|
Packit |
5c3484 |
{ \
|
|
Packit |
5c3484 |
mp_limb_t op; \
|
|
Packit |
5c3484 |
op = (up)[nhalf]; \
|
|
Packit |
5c3484 |
(rp)[(n) - 1] = (op * op) & GMP_NUMB_MASK; \
|
|
Packit |
5c3484 |
} \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if HAVE_NATIVE_mpn_addlsh1_n_ip1
|
|
Packit |
5c3484 |
#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
MPN_SQRLO_DIAGONAL((rp), (up), (n)); \
|
|
Packit |
5c3484 |
mpn_addlsh1_n_ip1 ((rp) + 1, (tp), (n) - 1); \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
#define MPN_SQRLO_DIAG_ADDLSH1(rp, tp, up, n) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
MPN_SQRLO_DIAGONAL((rp), (up), (n)); \
|
|
Packit |
5c3484 |
mpn_lshift ((tp), (tp), (n) - 1, 1); \
|
|
Packit |
5c3484 |
mpn_add_n ((rp) + 1, (rp) + 1, (tp), (n) - 1); \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Avoid zero allocations when SQRLO_LO_THRESHOLD is 0 (this code not used). */
|
|
Packit |
5c3484 |
#define SQRLO_BASECASE_ALLOC \
|
|
Packit |
5c3484 |
(SQRLO_DC_THRESHOLD_LIMIT < 2 ? 1 : SQRLO_DC_THRESHOLD_LIMIT - 1)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Default mpn_sqrlo_basecase using mpn_addmul_1. */
|
|
Packit |
5c3484 |
#ifndef SQRLO_SPECIAL_CASES
|
|
Packit |
5c3484 |
#define SQRLO_SPECIAL_CASES 2
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
void
|
|
Packit |
5c3484 |
mpn_sqrlo_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t ul;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (n >= 1);
|
|
Packit |
5c3484 |
ASSERT (! MPN_OVERLAP_P (rp, n, up, n));
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ul = up[0];
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (n <= SQRLO_SPECIAL_CASES)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
#if SQRLO_SPECIAL_CASES == 1
|
|
Packit |
5c3484 |
rp[0] = (ul * ul) & GMP_NUMB_MASK;
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
if (n == 1)
|
|
Packit |
5c3484 |
rp[0] = (ul * ul) & GMP_NUMB_MASK;
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t hi, lo, ul1;
|
|
Packit |
5c3484 |
umul_ppmm (hi, lo, ul, ul << GMP_NAIL_BITS);
|
|
Packit |
5c3484 |
rp[0] = lo >> GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
ul1 = up[1];
|
|
Packit |
5c3484 |
#if SQRLO_SPECIAL_CASES == 2
|
|
Packit |
5c3484 |
rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
if (n == 2)
|
|
Packit |
5c3484 |
rp[1] = (hi + ul * ul1 * 2) & GMP_NUMB_MASK;
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t hi1;
|
|
Packit |
5c3484 |
#if GMP_NAIL_BITS != 0
|
|
Packit |
5c3484 |
ul <<= 1;
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
umul_ppmm (hi1, lo, ul1 << GMP_NAIL_BITS, ul);
|
|
Packit |
5c3484 |
hi1 += ul * up[2];
|
|
Packit |
5c3484 |
#if GMP_NAIL_BITS == 0
|
|
Packit |
5c3484 |
hi1 = (hi1 << 1) | (lo >> (GMP_LIMB_BITS - 1));
|
|
Packit |
5c3484 |
add_ssaaaa(rp[2], rp[1], hi1, lo << 1, ul1 * ul1, hi);
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
hi += lo >> GMP_NAIL_BITS;
|
|
Packit |
5c3484 |
rp[1] = hi & GMP_NUMB_MASK;
|
|
Packit |
5c3484 |
rp[2] = (hi1 + ul1 * ul1 + (hi >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t tp[SQRLO_BASECASE_ALLOC];
|
|
Packit |
5c3484 |
mp_size_t i;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* must fit n-1 limbs in tp */
|
|
Packit |
5c3484 |
ASSERT (n <= SQRLO_DC_THRESHOLD_LIMIT);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
--n;
|
|
Packit |
5c3484 |
#if SQRLO_SHORTCUT_MULTIPLICATIONS
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t cy;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
cy = ul * up[n] + mpn_mul_1 (tp, up + 1, n - 1, ul);
|
|
Packit |
5c3484 |
for (i = 1; 2 * i + 1 < n; ++i)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
ul = up[i];
|
|
Packit |
5c3484 |
cy += ul * up[n - i] + mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i - 1, ul);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
tp [n-1] = (cy + ((n & 1)?up[i] * up[i + 1]:0)) & GMP_NUMB_MASK;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
mpn_mul_1 (tp, up + 1, n, ul);
|
|
Packit |
5c3484 |
for (i = 1; 2 * i < n; ++i)
|
|
Packit |
5c3484 |
mpn_addmul_1 (tp + 2 * i, up + i + 1, n - 2 * i, up[i]);
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
MPN_SQRLO_DIAG_ADDLSH1 (rp, tp, up, n + 1);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
#undef SQRLO_SPECIAL_CASES
|