Tree - source-git/gmp - CentOS Git server

source-git / gmp

Blame mpn/generic/perfsqr.c

Blob History Raw

Packit	5c3484	`/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,`
Packit	5c3484	`zero otherwise.`
Packit	5c3484
Packit	5c3484	`Copyright 1991, 1993, 1994, 1996, 1997, 2000-2002, 2005, 2012 Free Software`
Packit	5c3484	`Foundation, Inc.`
Packit	5c3484
Packit	5c3484	`This file is part of the GNU MP Library.`
Packit	5c3484
Packit	5c3484	`The GNU MP Library is free software; you can redistribute it and/or modify`
Packit	5c3484	`it under the terms of either:`
Packit	5c3484
Packit	5c3484	`* the GNU Lesser General Public License as published by the Free`
Packit	5c3484	`Software Foundation; either version 3 of the License, or (at your`
Packit	5c3484	`option) any later version.`
Packit	5c3484
Packit	5c3484	`or`
Packit	5c3484
Packit	5c3484	`* the GNU General Public License as published by the Free Software`
Packit	5c3484	`Foundation; either version 2 of the License, or (at your option) any`
Packit	5c3484	`later version.`
Packit	5c3484
Packit	5c3484	`or both in parallel, as here.`
Packit	5c3484
Packit	5c3484	`The GNU MP Library is distributed in the hope that it will be useful, but`
Packit	5c3484	`WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY`
Packit	5c3484	`or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
Packit	5c3484	`for more details.`
Packit	5c3484
Packit	5c3484	`You should have received copies of the GNU General Public License and the`
Packit	5c3484	`GNU Lesser General Public License along with the GNU MP Library. If not,`
Packit	5c3484	`see https://www.gnu.org/licenses/. */`
Packit	5c3484
Packit	5c3484	`#include <stdio.h> /* for NULL */`
Packit	5c3484	`#include "gmp.h"`
Packit	5c3484	`#include "gmp-impl.h"`
Packit	5c3484	`#include "longlong.h"`
Packit	5c3484
Packit	5c3484	`#include "perfsqr.h"`
Packit	5c3484
Packit	5c3484
Packit	5c3484	`/* change this to "#define TRACE(x) x" for diagnostics */`
Packit	5c3484	`#define TRACE(x)`
Packit	5c3484
Packit	5c3484
Packit	5c3484
Packit	5c3484	`/* PERFSQR_MOD_* detects non-squares using residue tests.`
Packit	5c3484
Packit	5c3484	`A macro PERFSQR_MOD_TEST is setup by gen-psqr.c in perfsqr.h. It takes`
Packit	5c3484	`{up,usize} modulo a selected modulus to get a remainder r. For 32-bit or`
Packit	5c3484	`64-bit limbs this modulus will be 2^24-1 or 2^48-1 using PERFSQR_MOD_34,`
Packit	5c3484	`or for other limb or nail sizes a PERFSQR_PP is chosen and PERFSQR_MOD_PP`
Packit	5c3484	`used. PERFSQR_PP_NORM and PERFSQR_PP_INVERTED are pre-calculated in this`
Packit	5c3484	`case too.`
Packit	5c3484
Packit	5c3484	`PERFSQR_MOD_TEST then makes various calls to PERFSQR_MOD_1 or`
Packit	5c3484	`PERFSQR_MOD_2 with divisors d which are factors of the modulus, and table`
Packit	5c3484	`data indicating residues and non-residues modulo those divisors. The`
Packit	5c3484	`table data is in 1 or 2 limbs worth of bits respectively, per the size of`
Packit	5c3484	`each d.`
Packit	5c3484
Packit	5c3484	`A "modexact" style remainder is taken to reduce r modulo d.`
Packit	5c3484	`PERFSQR_MOD_IDX implements this, producing an index "idx" for use with`
Packit	5c3484	`the table data. Notice there's just one multiplication by a constant`
Packit	5c3484	`"inv", for each d.`
Packit	5c3484
Packit	5c3484	`The modexact doesn't produce a true r%d remainder, instead idx satisfies`
Packit	5c3484	`"-(idx<`
Packit	5c3484	`-2^PERFSQR_MOD_BITS is a one-to-one mapping between r and idx, and is`
Packit	5c3484	`accounted for by having the table data suitably permuted.`
Packit	5c3484
Packit	5c3484	`The remainder r fits within PERFSQR_MOD_BITS which is less than a limb.`
Packit	5c3484	`In fact the GMP_LIMB_BITS - PERFSQR_MOD_BITS spare bits are enough to fit`
Packit	5c3484	`each divisor d meaning the modexact multiply can take place entirely`
Packit	5c3484	`within one limb, giving the compiler the chance to optimize it, in a way`
Packit	5c3484	`that say umul_ppmm would not give.`
Packit	5c3484
Packit	5c3484	`There's no need for the divisors d to be prime, in fact gen-psqr.c makes`
Packit	5c3484	`a deliberate effort to combine factors so as to reduce the number of`
Packit	5c3484	`separate tests done on r. But such combining is limited to d <=`
Packit	5c3484	`2*GMP_LIMB_BITS so that the table data fits in at most 2 limbs.`
Packit	5c3484
Packit	5c3484	`Alternatives:`
Packit	5c3484
Packit	5c3484	`It'd be possible to use bigger divisors d, and more than 2 limbs of table`
Packit	5c3484	`data, but this doesn't look like it would be of much help to the prime`
Packit	5c3484	`factors in the usual moduli 2^24-1 or 2^48-1.`
Packit	5c3484
Packit	5c3484	`The moduli 2^24-1 or 2^48-1 are nothing particularly special, they're`
Packit	5c3484	`just easy to calculate (see mpn_mod_34lsub1) and have a nice set of prime`
Packit	5c3484	`factors. 2^32-1 and 2^64-1 would be equally easy to calculate, but have`
Packit	5c3484	`fewer prime factors.`
Packit	5c3484
Packit	5c3484	`The nails case usually ends up using mpn_mod_1, which is a lot slower`
Packit	5c3484	`than mpn_mod_34lsub1. Perhaps other such special moduli could be found`
Packit	5c3484	`for the nails case. Two-term things like 2^30-2^15-1 might be`
Packit	5c3484	`candidates. Or at worst some on-the-fly de-nailing would allow the plain`
Packit	5c3484	`2^24-1 to be used. Currently nails are too preliminary to be worried`
Packit	5c3484	`about.`
Packit	5c3484
Packit	5c3484	`*/`
Packit	5c3484
Packit	5c3484	`#define PERFSQR_MOD_MASK ((CNST_LIMB(1) << PERFSQR_MOD_BITS) - 1)`
Packit	5c3484
Packit	5c3484	`#define MOD34_BITS (GMP_NUMB_BITS / 4 * 3)`
Packit	5c3484	`#define MOD34_MASK ((CNST_LIMB(1) << MOD34_BITS) - 1)`
Packit	5c3484
Packit	5c3484	`#define PERFSQR_MOD_34(r, up, usize) \`
Packit	5c3484	`do { \`
Packit	5c3484	`(r) = mpn_mod_34lsub1 (up, usize); \`
Packit	5c3484	`(r) = ((r) & MOD34_MASK) + ((r) >> MOD34_BITS); \`
Packit	5c3484	`} while (0)`
Packit	5c3484
Packit	5c3484	`/* FIXME: The %= here isn't good, and might destroy any savings from keeping`
Packit	5c3484	`the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).`
Packit	5c3484	`Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor`
Packit	5c3484	`and a shift count, like mpn_preinv_divrem_1. But mod_34lsub1 is our`
Packit	5c3484	`normal case, so lets not worry too much about mod_1. */`
Packit	5c3484	`#define PERFSQR_MOD_PP(r, up, usize) \`
Packit	5c3484	`do { \`
Packit	5c3484	`if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD)) \`
Packit	5c3484	`{ \`
Packit	5c3484	`(r) = mpn_preinv_mod_1 (up, usize, PERFSQR_PP_NORM, \`
Packit	5c3484	`PERFSQR_PP_INVERTED); \`
Packit	5c3484	`(r) %= PERFSQR_PP; \`
Packit	5c3484	`} \`
Packit	5c3484	`else \`
Packit	5c3484	`{ \`
Packit	5c3484	`(r) = mpn_mod_1 (up, usize, PERFSQR_PP); \`
Packit	5c3484	`} \`
Packit	5c3484	`} while (0)`
Packit	5c3484
Packit	5c3484	`#define PERFSQR_MOD_IDX(idx, r, d, inv) \`
Packit	5c3484	`do { \`
Packit	5c3484	`mp_limb_t q; \`
Packit	5c3484	`ASSERT ((r) <= PERFSQR_MOD_MASK); \`
Packit	5c3484	`ASSERT ((((inv) * (d)) & PERFSQR_MOD_MASK) == 1); \`
Packit	5c3484	`ASSERT (MP_LIMB_T_MAX / (d) >= PERFSQR_MOD_MASK); \`
Packit	5c3484	`\`
Packit	5c3484	`q = ((r) * (inv)) & PERFSQR_MOD_MASK; \`
Packit	5c3484	`ASSERT (r == ((q * (d)) & PERFSQR_MOD_MASK)); \`
Packit	5c3484	`(idx) = (q * (d)) >> PERFSQR_MOD_BITS; \`
Packit	5c3484	`} while (0)`
Packit	5c3484
Packit	5c3484	`#define PERFSQR_MOD_1(r, d, inv, mask) \`
Packit	5c3484	`do { \`
Packit	5c3484	`unsigned idx; \`
Packit	5c3484	`ASSERT ((d) <= GMP_LIMB_BITS); \`
Packit	5c3484	`PERFSQR_MOD_IDX(idx, r, d, inv); \`
Packit	5c3484	`TRACE (printf (" PERFSQR_MOD_1 d=%u r=%lu idx=%u\n", \`
Packit	5c3484	`d, r%d, idx)); \`
Packit	5c3484	`if ((((mask) >> idx) & 1) == 0) \`
Packit	5c3484	`{ \`
Packit	5c3484	`TRACE (printf (" non-square\n")); \`
Packit	5c3484	`return 0; \`
Packit	5c3484	`} \`
Packit	5c3484	`} while (0)`
Packit	5c3484
Packit	5c3484	`/* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the`
Packit	5c3484	`sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch. */`
Packit	5c3484	`#define PERFSQR_MOD_2(r, d, inv, mhi, mlo) \`
Packit	5c3484	`do { \`
Packit	5c3484	`mp_limb_t m; \`
Packit	5c3484	`unsigned idx; \`
Packit	5c3484	`ASSERT ((d) <= 2*GMP_LIMB_BITS); \`
Packit	5c3484	`\`
Packit	5c3484	`PERFSQR_MOD_IDX (idx, r, d, inv); \`
Packit	5c3484	`TRACE (printf (" PERFSQR_MOD_2 d=%u r=%lu idx=%u\n", \`
Packit	5c3484	`d, r%d, idx)); \`
Packit	5c3484	`m = ((int) idx - GMP_LIMB_BITS < 0 ? (mlo) : (mhi)); \`
Packit	5c3484	`idx %= GMP_LIMB_BITS; \`
Packit	5c3484	`if (((m >> idx) & 1) == 0) \`
Packit	5c3484	`{ \`
Packit	5c3484	`TRACE (printf (" non-square\n")); \`
Packit	5c3484	`return 0; \`
Packit	5c3484	`} \`
Packit	5c3484	`} while (0)`
Packit	5c3484
Packit	5c3484
Packit	5c3484	`int`
Packit	5c3484	`mpn_perfect_square_p (mp_srcptr up, mp_size_t usize)`
Packit	5c3484	`{`
Packit	5c3484	`ASSERT (usize >= 1);`
Packit	5c3484
Packit	5c3484	`TRACE (gmp_printf ("mpn_perfect_square_p %Nd\n", up, usize));`
Packit	5c3484
Packit	5c3484	`/* The first test excludes 212/256 (82.8%) of the perfect square candidates`
Packit	5c3484	`in O(1) time. */`
Packit	5c3484	`{`
Packit	5c3484	`unsigned idx = up[0] % 0x100;`
Packit	5c3484	`if (((sq_res_0x100[idx / GMP_LIMB_BITS]`
Packit	5c3484	`>> (idx % GMP_LIMB_BITS)) & 1) == 0)`
Packit	5c3484	`return 0;`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484	`#if 0`
Packit	5c3484	`/* Check that we have even multiplicity of 2, and then check that the rest is`
Packit	5c3484	`a possible perfect square. Leave disabled until we can determine this`
Packit	5c3484	`really is an improvement. It it is, it could completely replace the`
Packit	5c3484	`simple probe above, since this should throw out more non-squares, but at`
Packit	5c3484	`the expense of somewhat more cycles. */`
Packit	5c3484	`{`
Packit	5c3484	`mp_limb_t lo;`
Packit	5c3484	`int cnt;`
Packit	5c3484	`lo = up[0];`
Packit	5c3484	`while (lo == 0)`
Packit	5c3484	`up++, lo = up[0], usize--;`
Packit	5c3484	`count_trailing_zeros (cnt, lo);`
Packit	5c3484	`if ((cnt & 1) != 0)`
Packit	5c3484	`return 0; /* return of not even multiplicity of 2 */`
Packit	5c3484	`lo >>= cnt; /* shift down to align lowest non-zero bit */`
Packit	5c3484	`lo >>= 1; /* shift away lowest non-zero bit */`
Packit	5c3484	`if ((lo & 3) != 0)`
Packit	5c3484	`return 0;`
Packit	5c3484	`}`
Packit	5c3484	`#endif`
Packit	5c3484
Packit	5c3484
Packit	5c3484	`/* The second test uses mpn_mod_34lsub1 or mpn_mod_1 to detect non-squares`
Packit	5c3484	`according to their residues modulo small primes (or powers of`
Packit	5c3484	`primes). See perfsqr.h. */`
Packit	5c3484	`PERFSQR_MOD_TEST (up, usize);`
Packit	5c3484
Packit	5c3484
Packit	5c3484	`/* For the third and last test, we finally compute the square root,`
Packit	5c3484	`to make sure we've really got a perfect square. */`
Packit	5c3484	`{`
Packit	5c3484	`mp_ptr root_ptr;`
Packit	5c3484	`int res;`
Packit	5c3484	`TMP_DECL;`
Packit	5c3484
Packit	5c3484	`TMP_MARK;`
Packit	5c3484	`root_ptr = TMP_ALLOC_LIMBS ((usize + 1) / 2);`
Packit	5c3484
Packit	5c3484	`/* Iff mpn_sqrtrem returns zero, the square is perfect. */`
Packit	5c3484	`res = ! mpn_sqrtrem (root_ptr, NULL, up, usize);`
Packit	5c3484	`TMP_FREE;`
Packit	5c3484
Packit	5c3484	`return res;`
Packit	5c3484	`}`
Packit	5c3484	`}`

source-git / gmp

Source Code

Blame mpn/generic/perfsqr.c