|
Packit |
5c3484 |
/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 1991, 1993-1998, 2000-2005, 2008, 2010, 2012 Free Software
|
|
Packit |
5c3484 |
Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Uses the HGCD operation described in
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
N. Möller, On Schönhage's algorithm and subquadratic integer gcd
|
|
Packit |
5c3484 |
computation, Math. Comp. 77 (2008), 589-607.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
to reduce inputs until they are of size below GCD_DC_THRESHOLD, and
|
|
Packit |
5c3484 |
then uses Lehmer's algorithm.
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Some reasonable choices are n / 2 (same as in hgcd), and p = (n +
|
|
Packit |
5c3484 |
* 2)/3, which gives a balanced multiplication in
|
|
Packit |
5c3484 |
* mpn_hgcd_matrix_adjust. However, p = 2 n/3 gives slightly better
|
|
Packit |
5c3484 |
* performance. The matrix-vector multiplication is then
|
|
Packit |
5c3484 |
* 4:1-unbalanced, with matrix elements of size n/6, and vector
|
|
Packit |
5c3484 |
* elements of size p = 2n/3. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* From analysis of the theoretical running time, it appears that when
|
|
Packit |
5c3484 |
* multiplication takes time O(n^alpha), p should be chosen so that
|
|
Packit |
5c3484 |
* the ratio of the time for the mpn_hgcd call, and the time for the
|
|
Packit |
5c3484 |
* multiplication in mpn_hgcd_matrix_adjust, is roughly 1/(alpha -
|
|
Packit |
5c3484 |
* 1). */
|
|
Packit |
5c3484 |
#ifdef TUNE_GCD_P
|
|
Packit |
5c3484 |
#define P_TABLE_SIZE 10000
|
|
Packit |
5c3484 |
mp_size_t p_table[P_TABLE_SIZE];
|
|
Packit |
5c3484 |
#define CHOOSE_P(n) ( (n) < P_TABLE_SIZE ? p_table[n] : 2*(n)/3)
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
#define CHOOSE_P(n) (2*(n) / 3)
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
struct gcd_ctx
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_ptr gp;
|
|
Packit |
5c3484 |
mp_size_t gn;
|
|
Packit |
5c3484 |
};
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
static void
|
|
Packit |
5c3484 |
gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
|
|
Packit |
5c3484 |
mp_srcptr qp, mp_size_t qn, int d)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
struct gcd_ctx *ctx = (struct gcd_ctx *) p;
|
|
Packit |
5c3484 |
MPN_COPY (ctx->gp, gp, gn);
|
|
Packit |
5c3484 |
ctx->gn = gn;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if GMP_NAIL_BITS > 0
|
|
Packit |
5c3484 |
/* Nail supports should be easy, replacing the sub_ddmmss with nails
|
|
Packit |
5c3484 |
* logic. */
|
|
Packit |
5c3484 |
#error Nails not supported.
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
|
|
Packit |
5c3484 |
Both U and V must be odd. */
|
|
Packit |
5c3484 |
static inline mp_size_t
|
|
Packit |
5c3484 |
gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t u0, u1, v0, v1;
|
|
Packit |
5c3484 |
mp_size_t gn;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
u0 = up[0];
|
|
Packit |
5c3484 |
u1 = up[1];
|
|
Packit |
5c3484 |
v0 = vp[0];
|
|
Packit |
5c3484 |
v1 = vp[1];
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (u0 & 1);
|
|
Packit |
5c3484 |
ASSERT (v0 & 1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Check for u0 != v0 needed to ensure that argument to
|
|
Packit |
5c3484 |
* count_trailing_zeros is non-zero. */
|
|
Packit |
5c3484 |
while (u1 != v1 && u0 != v0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
unsigned long int r;
|
|
Packit |
5c3484 |
if (u1 > v1)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
sub_ddmmss (u1, u0, u1, u0, v1, v0);
|
|
Packit |
5c3484 |
count_trailing_zeros (r, u0);
|
|
Packit |
5c3484 |
u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
|
|
Packit |
5c3484 |
u1 >>= r;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else /* u1 < v1. */
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
sub_ddmmss (v1, v0, v1, v0, u1, u0);
|
|
Packit |
5c3484 |
count_trailing_zeros (r, v0);
|
|
Packit |
5c3484 |
v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
|
|
Packit |
5c3484 |
v1 >>= r;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
|
|
Packit |
5c3484 |
if (u1 == v1 && u0 == v0)
|
|
Packit |
5c3484 |
return gn;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
|
|
Packit |
5c3484 |
gp[0] = mpn_gcd_1 (gp, gn, v0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return 1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_size_t
|
|
Packit |
5c3484 |
mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t talloc;
|
|
Packit |
5c3484 |
mp_size_t scratch;
|
|
Packit |
5c3484 |
mp_size_t matrix_scratch;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
struct gcd_ctx ctx;
|
|
Packit |
5c3484 |
mp_ptr tp;
|
|
Packit |
5c3484 |
TMP_DECL;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (usize >= n);
|
|
Packit |
5c3484 |
ASSERT (n > 0);
|
|
Packit |
5c3484 |
ASSERT (vp[n-1] > 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* FIXME: Check for small sizes first, before setting up temporary
|
|
Packit |
5c3484 |
storage etc. */
|
|
Packit |
5c3484 |
talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* For initial division */
|
|
Packit |
5c3484 |
scratch = usize - n + 1;
|
|
Packit |
5c3484 |
if (scratch > talloc)
|
|
Packit |
5c3484 |
talloc = scratch;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if TUNE_GCD_P
|
|
Packit |
5c3484 |
if (CHOOSE_P (n) > 0)
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t hgcd_scratch;
|
|
Packit |
5c3484 |
mp_size_t update_scratch;
|
|
Packit |
5c3484 |
mp_size_t p = CHOOSE_P (n);
|
|
Packit |
5c3484 |
mp_size_t scratch;
|
|
Packit |
5c3484 |
#if TUNE_GCD_P
|
|
Packit |
5c3484 |
/* Worst case, since we don't guarantee that n - CHOOSE_P(n)
|
|
Packit |
5c3484 |
is increasing */
|
|
Packit |
5c3484 |
matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n);
|
|
Packit |
5c3484 |
hgcd_scratch = mpn_hgcd_itch (n);
|
|
Packit |
5c3484 |
update_scratch = 2*(n - 1);
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
|
|
Packit |
5c3484 |
hgcd_scratch = mpn_hgcd_itch (n - p);
|
|
Packit |
5c3484 |
update_scratch = p + n - 1;
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
|
|
Packit |
5c3484 |
if (scratch > talloc)
|
|
Packit |
5c3484 |
talloc = scratch;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_MARK;
|
|
Packit |
5c3484 |
tp = TMP_ALLOC_LIMBS(talloc);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (usize > n)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mpn_tdiv_qr (tp, up, 0, up, usize, vp, n);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (mpn_zero_p (up, n))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
MPN_COPY (gp, vp, n);
|
|
Packit |
5c3484 |
ctx.gn = n;
|
|
Packit |
5c3484 |
goto done;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ctx.gp = gp;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#if TUNE_GCD_P
|
|
Packit |
5c3484 |
while (CHOOSE_P (n) > 0)
|
|
Packit |
5c3484 |
#else
|
|
Packit |
5c3484 |
while (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
|
|
Packit |
5c3484 |
#endif
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
struct hgcd_matrix M;
|
|
Packit |
5c3484 |
mp_size_t p = CHOOSE_P (n);
|
|
Packit |
5c3484 |
mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
|
|
Packit |
5c3484 |
mp_size_t nn;
|
|
Packit |
5c3484 |
mpn_hgcd_matrix_init (&M, n - p, tp);
|
|
Packit |
5c3484 |
nn = mpn_hgcd (up + p, vp + p, n - p, &M, tp + matrix_scratch);
|
|
Packit |
5c3484 |
if (nn > 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
ASSERT (M.n <= (n - p - 1)/2);
|
|
Packit |
5c3484 |
ASSERT (M.n + p <= (p + n - 1) / 2);
|
|
Packit |
5c3484 |
/* Temporary storage 2 (p + M->n) <= p + n - 1. */
|
|
Packit |
5c3484 |
n = mpn_hgcd_matrix_adjust (&M, p + nn, up, vp, p, tp + matrix_scratch);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* Temporary storage n */
|
|
Packit |
5c3484 |
n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
|
|
Packit |
5c3484 |
if (n == 0)
|
|
Packit |
5c3484 |
goto done;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
while (n > 2)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
struct hgcd_matrix1 M;
|
|
Packit |
5c3484 |
mp_limb_t uh, ul, vh, vl;
|
|
Packit |
5c3484 |
mp_limb_t mask;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mask = up[n-1] | vp[n-1];
|
|
Packit |
5c3484 |
ASSERT (mask > 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (mask & GMP_NUMB_HIGHBIT)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
uh = up[n-1]; ul = up[n-2];
|
|
Packit |
5c3484 |
vh = vp[n-1]; vl = vp[n-2];
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
int shift;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
count_leading_zeros (shift, mask);
|
|
Packit |
5c3484 |
uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
|
|
Packit |
5c3484 |
ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
|
|
Packit |
5c3484 |
vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
|
|
Packit |
5c3484 |
vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Try an mpn_hgcd2 step */
|
|
Packit |
5c3484 |
if (mpn_hgcd2 (uh, ul, vh, vl, &M))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
|
|
Packit |
5c3484 |
MP_PTR_SWAP (up, tp);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* mpn_hgcd2 has failed. Then either one of a or b is very
|
|
Packit |
5c3484 |
small, or the difference is very small. Perform one
|
|
Packit |
5c3484 |
subtraction followed by one division. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Temporary storage n */
|
|
Packit |
5c3484 |
n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
|
|
Packit |
5c3484 |
if (n == 0)
|
|
Packit |
5c3484 |
goto done;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT(up[n-1] | vp[n-1]);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (n == 1)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
*gp = mpn_gcd_1(up, 1, vp[0]);
|
|
Packit |
5c3484 |
ctx.gn = 1;
|
|
Packit |
5c3484 |
goto done;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Due to the calling convention for mpn_gcd, at most one can be
|
|
Packit |
5c3484 |
even. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (! (up[0] & 1))
|
|
Packit |
5c3484 |
MP_PTR_SWAP (up, vp);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (up[0] & 1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (vp[0] == 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
*gp = mpn_gcd_1 (up, 2, vp[1]);
|
|
Packit |
5c3484 |
ctx.gn = 1;
|
|
Packit |
5c3484 |
goto done;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else if (! (vp[0] & 1))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
int r;
|
|
Packit |
5c3484 |
count_trailing_zeros (r, vp[0]);
|
|
Packit |
5c3484 |
vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);
|
|
Packit |
5c3484 |
vp[1] >>= r;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ctx.gn = gcd_2(gp, up, vp);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
done:
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return ctx.gn;
|
|
Packit |
5c3484 |
}
|