|
Packit |
5c3484 |
/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
|
|
Packit |
5c3484 |
zero otherwise.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2012 Free Software
|
|
Packit |
5c3484 |
Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include <stdio.h> /* for NULL */
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "perfsqr.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* change this to "#define TRACE(x) x" for diagnostics */
|
|
Packit |
5c3484 |
#define TRACE(x)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* PERFSQR_MOD_* detects non-squares using residue tests.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
A macro PERFSQR_MOD_TEST is setup by gen-psqr.c in perfsqr.h. It takes
|
|
Packit |
5c3484 |
{up,usize} modulo a selected modulus to get a remainder r. For 32-bit or
|
|
Packit |
5c3484 |
64-bit limbs this modulus will be 2^24-1 or 2^48-1 using PERFSQR_MOD_34,
|
|
Packit |
5c3484 |
or for other limb or nail sizes a PERFSQR_PP is chosen and PERFSQR_MOD_PP
|
|
Packit |
5c3484 |
used. PERFSQR_PP_NORM and PERFSQR_PP_INVERTED are pre-calculated in this
|
|
Packit |
5c3484 |
case too.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
PERFSQR_MOD_TEST then makes various calls to PERFSQR_MOD_1 or
|
|
Packit |
5c3484 |
PERFSQR_MOD_2 with divisors d which are factors of the modulus, and table
|
|
Packit |
5c3484 |
data indicating residues and non-residues modulo those divisors. The
|
|
Packit |
5c3484 |
table data is in 1 or 2 limbs worth of bits respectively, per the size of
|
|
Packit |
5c3484 |
each d.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
A "modexact" style remainder is taken to reduce r modulo d.
|
|
Packit |
5c3484 |
PERFSQR_MOD_IDX implements this, producing an index "idx" for use with
|
|
Packit |
5c3484 |
the table data. Notice there's just one multiplication by a constant
|
|
Packit |
5c3484 |
"inv", for each d.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The modexact doesn't produce a true r%d remainder, instead idx satisfies
|
|
Packit |
5c3484 |
"-(idx<
|
|
Packit |
5c3484 |
-2^PERFSQR_MOD_BITS is a one-to-one mapping between r and idx, and is
|
|
Packit |
5c3484 |
accounted for by having the table data suitably permuted.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The remainder r fits within PERFSQR_MOD_BITS which is less than a limb.
|
|
Packit |
5c3484 |
In fact the GMP_LIMB_BITS - PERFSQR_MOD_BITS spare bits are enough to fit
|
|
Packit |
5c3484 |
each divisor d meaning the modexact multiply can take place entirely
|
|
Packit |
5c3484 |
within one limb, giving the compiler the chance to optimize it, in a way
|
|
Packit |
5c3484 |
that say umul_ppmm would not give.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
There's no need for the divisors d to be prime, in fact gen-psqr.c makes
|
|
Packit |
5c3484 |
a deliberate effort to combine factors so as to reduce the number of
|
|
Packit |
5c3484 |
separate tests done on r. But such combining is limited to d <=
|
|
Packit |
5c3484 |
2*GMP_LIMB_BITS so that the table data fits in at most 2 limbs.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Alternatives:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
It'd be possible to use bigger divisors d, and more than 2 limbs of table
|
|
Packit |
5c3484 |
data, but this doesn't look like it would be of much help to the prime
|
|
Packit |
5c3484 |
factors in the usual moduli 2^24-1 or 2^48-1.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The moduli 2^24-1 or 2^48-1 are nothing particularly special, they're
|
|
Packit |
5c3484 |
just easy to calculate (see mpn_mod_34lsub1) and have a nice set of prime
|
|
Packit |
5c3484 |
factors. 2^32-1 and 2^64-1 would be equally easy to calculate, but have
|
|
Packit |
5c3484 |
fewer prime factors.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The nails case usually ends up using mpn_mod_1, which is a lot slower
|
|
Packit |
5c3484 |
than mpn_mod_34lsub1. Perhaps other such special moduli could be found
|
|
Packit |
5c3484 |
for the nails case. Two-term things like 2^30-2^15-1 might be
|
|
Packit |
5c3484 |
candidates. Or at worst some on-the-fly de-nailing would allow the plain
|
|
Packit |
5c3484 |
2^24-1 to be used. Currently nails are too preliminary to be worried
|
|
Packit |
5c3484 |
about.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define PERFSQR_MOD_MASK ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define MOD34_BITS (GMP_NUMB_BITS / 4 * 3)
|
|
Packit |
5c3484 |
#define MOD34_MASK ((CNST_LIMB(1) << MOD34_BITS) - 1)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define PERFSQR_MOD_34(r, up, usize) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
(r) = mpn_mod_34lsub1 (up, usize); \
|
|
Packit |
5c3484 |
(r) = ((r) & MOD34_MASK) + ((r) >> MOD34_BITS); \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* FIXME: The %= here isn't good, and might destroy any savings from keeping
|
|
Packit |
5c3484 |
the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).
|
|
Packit |
5c3484 |
Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor
|
|
Packit |
5c3484 |
and a shift count, like mpn_preinv_divrem_1. But mod_34lsub1 is our
|
|
Packit |
5c3484 |
normal case, so lets not worry too much about mod_1. */
|
|
Packit |
5c3484 |
#define PERFSQR_MOD_PP(r, up, usize) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD)) \
|
|
Packit |
5c3484 |
{ \
|
|
Packit |
5c3484 |
(r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM, \
|
|
Packit |
5c3484 |
PERFSQR_PP_INVERTED); \
|
|
Packit |
5c3484 |
(r) %= PERFSQR_PP; \
|
|
Packit |
5c3484 |
} \
|
|
Packit |
5c3484 |
else \
|
|
Packit |
5c3484 |
{ \
|
|
Packit |
5c3484 |
(r) = mpn_mod_1 (up, usize, PERFSQR_PP); \
|
|
Packit |
5c3484 |
} \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define PERFSQR_MOD_IDX(idx, r, d, inv) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
mp_limb_t q; \
|
|
Packit |
5c3484 |
ASSERT ((r) <= PERFSQR_MOD_MASK); \
|
|
Packit |
5c3484 |
ASSERT ((((inv) * (d)) & PERFSQR_MOD_MASK) == 1); \
|
|
Packit |
5c3484 |
ASSERT (MP_LIMB_T_MAX / (d) >= PERFSQR_MOD_MASK); \
|
|
Packit |
5c3484 |
\
|
|
Packit |
5c3484 |
q = ((r) * (inv)) & PERFSQR_MOD_MASK; \
|
|
Packit |
5c3484 |
ASSERT (r == ((q * (d)) & PERFSQR_MOD_MASK)); \
|
|
Packit |
5c3484 |
(idx) = (q * (d)) >> PERFSQR_MOD_BITS; \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define PERFSQR_MOD_1(r, d, inv, mask) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
unsigned idx; \
|
|
Packit |
5c3484 |
ASSERT ((d) <= GMP_LIMB_BITS); \
|
|
Packit |
5c3484 |
PERFSQR_MOD_IDX(idx, r, d, inv); \
|
|
Packit |
5c3484 |
TRACE (printf (" PERFSQR_MOD_1 d=%u r=%lu idx=%u\n", \
|
|
Packit |
5c3484 |
d, r%d, idx)); \
|
|
Packit |
5c3484 |
if ((((mask) >> idx) & 1) == 0) \
|
|
Packit |
5c3484 |
{ \
|
|
Packit |
5c3484 |
TRACE (printf (" non-square\n")); \
|
|
Packit |
5c3484 |
return 0; \
|
|
Packit |
5c3484 |
} \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the
|
|
Packit |
5c3484 |
sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch. */
|
|
Packit |
5c3484 |
#define PERFSQR_MOD_2(r, d, inv, mhi, mlo) \
|
|
Packit |
5c3484 |
do { \
|
|
Packit |
5c3484 |
mp_limb_t m; \
|
|
Packit |
5c3484 |
unsigned idx; \
|
|
Packit |
5c3484 |
ASSERT ((d) <= 2*GMP_LIMB_BITS); \
|
|
Packit |
5c3484 |
\
|
|
Packit |
5c3484 |
PERFSQR_MOD_IDX (idx, r, d, inv); \
|
|
Packit |
5c3484 |
TRACE (printf (" PERFSQR_MOD_2 d=%u r=%lu idx=%u\n", \
|
|
Packit |
5c3484 |
d, r%d, idx)); \
|
|
Packit |
5c3484 |
m = ((int) idx - GMP_LIMB_BITS < 0 ? (mlo) : (mhi)); \
|
|
Packit |
5c3484 |
idx %= GMP_LIMB_BITS; \
|
|
Packit |
5c3484 |
if (((m >> idx) & 1) == 0) \
|
|
Packit |
5c3484 |
{ \
|
|
Packit |
5c3484 |
TRACE (printf (" non-square\n")); \
|
|
Packit |
5c3484 |
return 0; \
|
|
Packit |
5c3484 |
} \
|
|
Packit |
5c3484 |
} while (0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
int
|
|
Packit |
5c3484 |
mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
ASSERT (usize >= 1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TRACE (gmp_printf ("mpn_perfect_square_p %Nd\n", up, usize));
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* The first test excludes 212/256 (82.8%) of the perfect square candidates
|
|
Packit |
5c3484 |
in O(1) time. */
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
unsigned idx = up[0] % 0x100;
|
|
Packit |
5c3484 |
if (((sq_res_0x100[idx / GMP_LIMB_BITS]
|
|
Packit |
5c3484 |
>> (idx % GMP_LIMB_BITS)) & 1) == 0)
|
|
Packit |
5c3484 |
return 0;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if 0
|
|
Packit |
5c3484 |
/* Check that we have even multiplicity of 2, and then check that the rest is
|
|
Packit |
5c3484 |
a possible perfect square. Leave disabled until we can determine this
|
|
Packit |
5c3484 |
really is an improvement. It it is, it could completely replace the
|
|
Packit |
5c3484 |
simple probe above, since this should throw out more non-squares, but at
|
|
Packit |
5c3484 |
the expense of somewhat more cycles. */
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t lo;
|
|
Packit |
5c3484 |
int cnt;
|
|
Packit |
5c3484 |
lo = up[0];
|
|
Packit |
5c3484 |
while (lo == 0)
|
|
Packit |
5c3484 |
up++, lo = up[0], usize--;
|
|
Packit |
5c3484 |
count_trailing_zeros (cnt, lo);
|
|
Packit |
5c3484 |
if ((cnt & 1) != 0)
|
|
Packit |
5c3484 |
return 0; /* return of not even multiplicity of 2 */
|
|
Packit |
5c3484 |
lo >>= cnt; /* shift down to align lowest non-zero bit */
|
|
Packit |
5c3484 |
lo >>= 1; /* shift away lowest non-zero bit */
|
|
Packit |
5c3484 |
if ((lo & 3) != 0)
|
|
Packit |
5c3484 |
return 0;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* The second test uses mpn_mod_34lsub1 or mpn_mod_1 to detect non-squares
|
|
Packit |
5c3484 |
according to their residues modulo small primes (or powers of
|
|
Packit |
5c3484 |
primes). See perfsqr.h. */
|
|
Packit |
5c3484 |
PERFSQR_MOD_TEST (up, usize);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* For the third and last test, we finally compute the square root,
|
|
Packit |
5c3484 |
to make sure we've really got a perfect square. */
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_ptr root_ptr;
|
|
Packit |
5c3484 |
int res;
|
|
Packit |
5c3484 |
TMP_DECL;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_MARK;
|
|
Packit |
5c3484 |
root_ptr = TMP_ALLOC_LIMBS ((usize + 1) / 2);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Iff mpn_sqrtrem returns zero, the square is perfect. */
|
|
Packit |
5c3484 |
res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return res;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|