|
Packit |
5c3484 |
/* mpn_gcdext -- Extended Greatest Common Divisor.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Copyright 1996, 1998, 2000-2005, 2008, 2009, 2012 Free Software Foundation,
|
|
Packit |
5c3484 |
Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
it under the terms of either:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
or both in parallel, as here.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
see https://www.gnu.org/licenses/. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#include "gmp.h"
|
|
Packit |
5c3484 |
#include "gmp-impl.h"
|
|
Packit |
5c3484 |
#include "longlong.h"
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Computes (r;b) = (a; b) M. Result is of size n + M->n +/- 1, and
|
|
Packit |
5c3484 |
the size is returned (if inputs are non-normalized, result may be
|
|
Packit |
5c3484 |
non-normalized too). Temporary space needed is M->n + n.
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
static size_t
|
|
Packit |
5c3484 |
hgcd_mul_matrix_vector (struct hgcd_matrix *M,
|
|
Packit |
5c3484 |
mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t ah, bh;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t = u00 * a
|
|
Packit |
5c3484 |
r = u10 * b
|
|
Packit |
5c3484 |
r += t;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t = u11 * b
|
|
Packit |
5c3484 |
b = u01 * a
|
|
Packit |
5c3484 |
b += t;
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (M->n >= n)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mpn_mul (tp, M->p[0][0], M->n, ap, n);
|
|
Packit |
5c3484 |
mpn_mul (rp, M->p[1][0], M->n, bp, n);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mpn_mul (tp, ap, n, M->p[0][0], M->n);
|
|
Packit |
5c3484 |
mpn_mul (rp, bp, n, M->p[1][0], M->n);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ah = mpn_add_n (rp, rp, tp, n + M->n);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (M->n >= n)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mpn_mul (tp, M->p[1][1], M->n, bp, n);
|
|
Packit |
5c3484 |
mpn_mul (bp, M->p[0][1], M->n, ap, n);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mpn_mul (tp, bp, n, M->p[1][1], M->n);
|
|
Packit |
5c3484 |
mpn_mul (bp, ap, n, M->p[0][1], M->n);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
bh = mpn_add_n (bp, bp, tp, n + M->n);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
n += M->n;
|
|
Packit |
5c3484 |
if ( (ah | bh) > 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
rp[n] = ah;
|
|
Packit |
5c3484 |
bp[n] = bh;
|
|
Packit |
5c3484 |
n++;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* Normalize */
|
|
Packit |
5c3484 |
while ( (rp[n-1] | bp[n-1]) == 0)
|
|
Packit |
5c3484 |
n--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return n;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
#define COMPUTE_V_ITCH(n) (2*(n))
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Computes |v| = |(g - u a)| / b, where u may be positive or
|
|
Packit |
5c3484 |
negative, and v is of the opposite sign. max(a, b) is of size n, u and
|
|
Packit |
5c3484 |
v at most size n, and v must have space for n+1 limbs. */
|
|
Packit |
5c3484 |
static mp_size_t
|
|
Packit |
5c3484 |
compute_v (mp_ptr vp,
|
|
Packit |
5c3484 |
mp_srcptr ap, mp_srcptr bp, mp_size_t n,
|
|
Packit |
5c3484 |
mp_srcptr gp, mp_size_t gn,
|
|
Packit |
5c3484 |
mp_srcptr up, mp_size_t usize,
|
|
Packit |
5c3484 |
mp_ptr tp)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t size;
|
|
Packit |
5c3484 |
mp_size_t an;
|
|
Packit |
5c3484 |
mp_size_t bn;
|
|
Packit |
5c3484 |
mp_size_t vn;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (n > 0);
|
|
Packit |
5c3484 |
ASSERT (gn > 0);
|
|
Packit |
5c3484 |
ASSERT (usize != 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
size = ABS (usize);
|
|
Packit |
5c3484 |
ASSERT (size <= n);
|
|
Packit |
5c3484 |
ASSERT (up[size-1] > 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
an = n;
|
|
Packit |
5c3484 |
MPN_NORMALIZE (ap, an);
|
|
Packit |
5c3484 |
ASSERT (gn <= an);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (an >= size)
|
|
Packit |
5c3484 |
mpn_mul (tp, ap, an, up, size);
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
mpn_mul (tp, up, size, ap, an);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
size += an;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (usize > 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* |v| = -v = (u a - g) / b */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_sub (tp, tp, size, gp, gn));
|
|
Packit |
5c3484 |
MPN_NORMALIZE (tp, size);
|
|
Packit |
5c3484 |
if (size == 0)
|
|
Packit |
5c3484 |
return 0;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{ /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a,
|
|
Packit |
5c3484 |
(g + |u| a) always fits in (|usize| + an) limbs. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));
|
|
Packit |
5c3484 |
size -= (tp[size - 1] == 0);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Now divide t / b. There must be no remainder */
|
|
Packit |
5c3484 |
bn = n;
|
|
Packit |
5c3484 |
MPN_NORMALIZE (bp, bn);
|
|
Packit |
5c3484 |
ASSERT (size >= bn);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
vn = size + 1 - bn;
|
|
Packit |
5c3484 |
ASSERT (vn <= n + 1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mpn_divexact (vp, tp, size, bp, bn);
|
|
Packit |
5c3484 |
vn -= (vp[vn-1] == 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
return vn;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Temporary storage:
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Initial division: Quotient of at most an - n + 1 <= an limbs.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Storage for u0 and u1: 2(n+1).
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Storage for hgcd matrix M, with input ceil(n/2): 5 * ceil(n/4)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Storage for hgcd, input (n + 1)/2: 9 n/4 plus some.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
When hgcd succeeds: 1 + floor(3n/2) for adjusting a and b, and 2(n+1) for the cofactors.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
When hgcd fails: 2n + 1 for mpn_gcdext_subdiv_step, which is less.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
For the lehmer call after the loop, Let T denote
|
|
Packit |
5c3484 |
GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
|
|
Packit |
5c3484 |
u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
|
|
Packit |
5c3484 |
for u, T+1 for v and 2T scratch space. In all, 7T + 3 is
|
|
Packit |
5c3484 |
sufficient for both operations.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Optimal choice of p seems difficult. In each iteration the division
|
|
Packit |
5c3484 |
* of work between hgcd and the updates of u0 and u1 depends on the
|
|
Packit |
5c3484 |
* current size of the u. It may be desirable to use a different
|
|
Packit |
5c3484 |
* choice of p in each iteration. Also the input size seems to matter;
|
|
Packit |
5c3484 |
* choosing p = n / 3 in the first iteration seems to improve
|
|
Packit |
5c3484 |
* performance slightly for input size just above the threshold, but
|
|
Packit |
5c3484 |
* degrade performance for larger inputs. */
|
|
Packit |
5c3484 |
#define CHOOSE_P_1(n) ((n) / 2)
|
|
Packit |
5c3484 |
#define CHOOSE_P_2(n) ((n) / 3)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_size_t
|
|
Packit |
5c3484 |
mpn_gcdext (mp_ptr gp, mp_ptr up, mp_size_t *usizep,
|
|
Packit |
5c3484 |
mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t talloc;
|
|
Packit |
5c3484 |
mp_size_t scratch;
|
|
Packit |
5c3484 |
mp_size_t matrix_scratch;
|
|
Packit |
5c3484 |
mp_size_t ualloc = n + 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
struct gcdext_ctx ctx;
|
|
Packit |
5c3484 |
mp_size_t un;
|
|
Packit |
5c3484 |
mp_ptr u0;
|
|
Packit |
5c3484 |
mp_ptr u1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_ptr tp;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_DECL;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (an >= n);
|
|
Packit |
5c3484 |
ASSERT (n > 0);
|
|
Packit |
5c3484 |
ASSERT (bp[n-1] > 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_MARK;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* FIXME: Check for small sizes first, before setting up temporary
|
|
Packit |
5c3484 |
storage etc. */
|
|
Packit |
5c3484 |
talloc = MPN_GCDEXT_LEHMER_N_ITCH(n);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* For initial division */
|
|
Packit |
5c3484 |
scratch = an - n + 1;
|
|
Packit |
5c3484 |
if (scratch > talloc)
|
|
Packit |
5c3484 |
talloc = scratch;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* For hgcd loop. */
|
|
Packit |
5c3484 |
mp_size_t hgcd_scratch;
|
|
Packit |
5c3484 |
mp_size_t update_scratch;
|
|
Packit |
5c3484 |
mp_size_t p1 = CHOOSE_P_1 (n);
|
|
Packit |
5c3484 |
mp_size_t p2 = CHOOSE_P_2 (n);
|
|
Packit |
5c3484 |
mp_size_t min_p = MIN(p1, p2);
|
|
Packit |
5c3484 |
mp_size_t max_p = MAX(p1, p2);
|
|
Packit |
5c3484 |
matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - min_p);
|
|
Packit |
5c3484 |
hgcd_scratch = mpn_hgcd_itch (n - min_p);
|
|
Packit |
5c3484 |
update_scratch = max_p + n - 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
|
|
Packit |
5c3484 |
if (scratch > talloc)
|
|
Packit |
5c3484 |
talloc = scratch;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Final mpn_gcdext_lehmer_n call. Need space for u and for
|
|
Packit |
5c3484 |
copies of a and b. */
|
|
Packit |
5c3484 |
scratch = MPN_GCDEXT_LEHMER_N_ITCH (GCDEXT_DC_THRESHOLD)
|
|
Packit |
5c3484 |
+ 3*GCDEXT_DC_THRESHOLD;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (scratch > talloc)
|
|
Packit |
5c3484 |
talloc = scratch;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Cofactors u0 and u1 */
|
|
Packit |
5c3484 |
talloc += 2*(n+1);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
tp = TMP_ALLOC_LIMBS(talloc);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (an > n)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mpn_tdiv_qr (tp, ap, 0, ap, an, bp, n);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (mpn_zero_p (ap, n))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
MPN_COPY (gp, bp, n);
|
|
Packit |
5c3484 |
*usizep = 0;
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return n;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (BELOW_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t gn = mpn_gcdext_lehmer_n(gp, up, usizep, ap, bp, n, tp);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return gn;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
MPN_ZERO (tp, 2*ualloc);
|
|
Packit |
5c3484 |
u0 = tp; tp += ualloc;
|
|
Packit |
5c3484 |
u1 = tp; tp += ualloc;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ctx.gp = gp;
|
|
Packit |
5c3484 |
ctx.up = up;
|
|
Packit |
5c3484 |
ctx.usize = usizep;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* For the first hgcd call, there are no u updates, and it makes
|
|
Packit |
5c3484 |
some sense to use a different choice for p. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* FIXME: We could trim use of temporary storage, since u0 and u1
|
|
Packit |
5c3484 |
are not used yet. For the hgcd call, we could swap in the u0
|
|
Packit |
5c3484 |
and u1 pointers for the relevant matrix elements. */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
struct hgcd_matrix M;
|
|
Packit |
5c3484 |
mp_size_t p = CHOOSE_P_1 (n);
|
|
Packit |
5c3484 |
mp_size_t nn;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mpn_hgcd_matrix_init (&M, n - p, tp);
|
|
Packit |
5c3484 |
nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
|
|
Packit |
5c3484 |
if (nn > 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
ASSERT (M.n <= (n - p - 1)/2);
|
|
Packit |
5c3484 |
ASSERT (M.n + p <= (p + n - 1) / 2);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Temporary storage 2 (p + M->n) <= p + n - 1 */
|
|
Packit |
5c3484 |
n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
MPN_COPY (u0, M.p[1][0], M.n);
|
|
Packit |
5c3484 |
MPN_COPY (u1, M.p[1][1], M.n);
|
|
Packit |
5c3484 |
un = M.n;
|
|
Packit |
5c3484 |
while ( (u0[un-1] | u1[un-1] ) == 0)
|
|
Packit |
5c3484 |
un--;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* mpn_hgcd has failed. Then either one of a or b is very
|
|
Packit |
5c3484 |
small, or the difference is very small. Perform one
|
|
Packit |
5c3484 |
subtraction followed by one division. */
|
|
Packit |
5c3484 |
u1[0] = 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ctx.u0 = u0;
|
|
Packit |
5c3484 |
ctx.u1 = u1;
|
|
Packit |
5c3484 |
ctx.tp = tp + n; /* ualloc */
|
|
Packit |
5c3484 |
ctx.un = 1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Temporary storage n */
|
|
Packit |
5c3484 |
n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
|
|
Packit |
5c3484 |
if (n == 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return ctx.gn;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
un = ctx.un;
|
|
Packit |
5c3484 |
ASSERT (un < ualloc);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
while (ABOVE_THRESHOLD (n, GCDEXT_DC_THRESHOLD))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
struct hgcd_matrix M;
|
|
Packit |
5c3484 |
mp_size_t p = CHOOSE_P_2 (n);
|
|
Packit |
5c3484 |
mp_size_t nn;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mpn_hgcd_matrix_init (&M, n - p, tp);
|
|
Packit |
5c3484 |
nn = mpn_hgcd (ap + p, bp + p, n - p, &M, tp + matrix_scratch);
|
|
Packit |
5c3484 |
if (nn > 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_ptr t0;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
t0 = tp + matrix_scratch;
|
|
Packit |
5c3484 |
ASSERT (M.n <= (n - p - 1)/2);
|
|
Packit |
5c3484 |
ASSERT (M.n + p <= (p + n - 1) / 2);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Temporary storage 2 (p + M->n) <= p + n - 1 */
|
|
Packit |
5c3484 |
n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, t0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* By the same analysis as for mpn_hgcd_matrix_mul */
|
|
Packit |
5c3484 |
ASSERT (M.n + un <= ualloc);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* FIXME: This copying could be avoided by some swapping of
|
|
Packit |
5c3484 |
* pointers. May need more temporary storage, though. */
|
|
Packit |
5c3484 |
MPN_COPY (t0, u0, un);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Temporary storage ualloc */
|
|
Packit |
5c3484 |
un = hgcd_mul_matrix_vector (&M, u0, t0, u1, un, t0 + un);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (un < ualloc);
|
|
Packit |
5c3484 |
ASSERT ( (u0[un-1] | u1[un-1]) > 0);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* mpn_hgcd has failed. Then either one of a or b is very
|
|
Packit |
5c3484 |
small, or the difference is very small. Perform one
|
|
Packit |
5c3484 |
subtraction followed by one division. */
|
|
Packit |
5c3484 |
ctx.u0 = u0;
|
|
Packit |
5c3484 |
ctx.u1 = u1;
|
|
Packit |
5c3484 |
ctx.tp = tp + n; /* ualloc */
|
|
Packit |
5c3484 |
ctx.un = un;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Temporary storage n */
|
|
Packit |
5c3484 |
n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
|
|
Packit |
5c3484 |
if (n == 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return ctx.gn;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
un = ctx.un;
|
|
Packit |
5c3484 |
ASSERT (un < ualloc);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
/* We have A = ... a + ... b
|
|
Packit |
5c3484 |
B = u0 a + u1 b
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
a = u1 A + ... B
|
|
Packit |
5c3484 |
b = -u0 A + ... B
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
with bounds
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|u0|, |u1| <= B / min(a, b)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
|
|
Packit |
5c3484 |
in which case the only reduction done so far is a = A - k B for
|
|
Packit |
5c3484 |
some k.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
Compute g = u a + v b = (u u1 - v u0) A + (...) B
|
|
Packit |
5c3484 |
Here, u, v are bounded by
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|u| <= b,
|
|
Packit |
5c3484 |
|v| <= a
|
|
Packit |
5c3484 |
*/
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT ( (ap[n-1] | bp[n-1]) > 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* Must return the smallest cofactor, +u1 or -u0 */
|
|
Packit |
5c3484 |
int c;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
MPN_COPY (gp, ap, n);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
MPN_CMP (c, u0, u1, un);
|
|
Packit |
5c3484 |
/* c == 0 can happen only when A = (2k+1) G, B = 2 G. And in
|
|
Packit |
5c3484 |
this case we choose the cofactor + 1, corresponding to G = A
|
|
Packit |
5c3484 |
- k B, rather than -1, corresponding to G = - A + (k+1) B. */
|
|
Packit |
5c3484 |
ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
|
|
Packit |
5c3484 |
if (c < 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
MPN_NORMALIZE (u0, un);
|
|
Packit |
5c3484 |
MPN_COPY (up, u0, un);
|
|
Packit |
5c3484 |
*usizep = -un;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
MPN_NORMALIZE_NOT_ZERO (u1, un);
|
|
Packit |
5c3484 |
MPN_COPY (up, u1, un);
|
|
Packit |
5c3484 |
*usizep = un;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return n;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else if (UNLIKELY (u0[0] == 0) && un == 1)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t gn;
|
|
Packit |
5c3484 |
ASSERT (u1[0] == 1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
|
|
Packit |
5c3484 |
gn = mpn_gcdext_lehmer_n (gp, up, usizep, ap, bp, n, tp);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return gn;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_size_t u0n;
|
|
Packit |
5c3484 |
mp_size_t u1n;
|
|
Packit |
5c3484 |
mp_size_t lehmer_un;
|
|
Packit |
5c3484 |
mp_size_t lehmer_vn;
|
|
Packit |
5c3484 |
mp_size_t gn;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_ptr lehmer_up;
|
|
Packit |
5c3484 |
mp_ptr lehmer_vp;
|
|
Packit |
5c3484 |
int negate;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
lehmer_up = tp; tp += n;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Call mpn_gcdext_lehmer_n with copies of a and b. */
|
|
Packit |
5c3484 |
MPN_COPY (tp, ap, n);
|
|
Packit |
5c3484 |
MPN_COPY (tp + n, bp, n);
|
|
Packit |
5c3484 |
gn = mpn_gcdext_lehmer_n (gp, lehmer_up, &lehmer_un, tp, tp + n, n, tp + 2*n);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
u0n = un;
|
|
Packit |
5c3484 |
MPN_NORMALIZE (u0, u0n);
|
|
Packit |
5c3484 |
ASSERT (u0n > 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (lehmer_un == 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
/* u == 0 ==> v = g / b == 1 ==> g = - u0 A + (...) B */
|
|
Packit |
5c3484 |
MPN_COPY (up, u0, u0n);
|
|
Packit |
5c3484 |
*usizep = -u0n;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return gn;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
lehmer_vp = tp;
|
|
Packit |
5c3484 |
/* Compute v = (g - u a) / b */
|
|
Packit |
5c3484 |
lehmer_vn = compute_v (lehmer_vp,
|
|
Packit |
5c3484 |
ap, bp, n, gp, gn, lehmer_up, lehmer_un, tp + n + 1);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (lehmer_un > 0)
|
|
Packit |
5c3484 |
negate = 0;
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
lehmer_un = -lehmer_un;
|
|
Packit |
5c3484 |
negate = 1;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
u1n = un;
|
|
Packit |
5c3484 |
MPN_NORMALIZE (u1, u1n);
|
|
Packit |
5c3484 |
ASSERT (u1n > 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (lehmer_un + u1n <= ualloc);
|
|
Packit |
5c3484 |
ASSERT (lehmer_vn + u0n <= ualloc);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* We may still have v == 0 */
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Compute u u0 */
|
|
Packit |
5c3484 |
if (lehmer_un <= u1n)
|
|
Packit |
5c3484 |
/* Should be the common case */
|
|
Packit |
5c3484 |
mpn_mul (up, u1, u1n, lehmer_up, lehmer_un);
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
mpn_mul (up, lehmer_up, lehmer_un, u1, u1n);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
un = u1n + lehmer_un;
|
|
Packit |
5c3484 |
un -= (up[un - 1] == 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (lehmer_vn > 0)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t cy;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
/* Overwrites old u1 value */
|
|
Packit |
5c3484 |
if (lehmer_vn <= u0n)
|
|
Packit |
5c3484 |
/* Should be the common case */
|
|
Packit |
5c3484 |
mpn_mul (u1, u0, u0n, lehmer_vp, lehmer_vn);
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
mpn_mul (u1, lehmer_vp, lehmer_vn, u0, u0n);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
u1n = u0n + lehmer_vn;
|
|
Packit |
5c3484 |
u1n -= (u1[u1n - 1] == 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
if (u1n <= un)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
cy = mpn_add (up, up, un, u1, u1n);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
else
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
cy = mpn_add (up, u1, u1n, up, un);
|
|
Packit |
5c3484 |
un = u1n;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
up[un] = cy;
|
|
Packit |
5c3484 |
un += (cy != 0);
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASSERT (un < ualloc);
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
*usizep = negate ? -un : un;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
TMP_FREE;
|
|
Packit |
5c3484 |
return gn;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
}
|