Tree - source-git/gmp - CentOS Git server

source-git / gmp

Blame mpn/generic/sqrtrem.c

Blob History Raw

Packit	5c3484	`/* mpn_sqrtrem -- square root and remainder`
Packit	5c3484
Packit	5c3484	`Contributed to the GNU project by Paul Zimmermann (most code),`
Packit	5c3484	`Torbjorn Granlund (mpn_sqrtrem1) and Marco Bodrato (mpn_dc_sqrt).`
Packit	5c3484
Packit	5c3484	`THE FUNCTIONS IN THIS FILE EXCEPT mpn_sqrtrem ARE INTERNAL WITH A`
Packit	5c3484	`MUTABLE INTERFACE. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED`
Packit	5c3484	`INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR`
Packit	5c3484	`DISAPPEAR IN A FUTURE GMP RELEASE.`
Packit	5c3484
Packit	5c3484	`Copyright 1999-2002, 2004, 2005, 2008, 2010, 2012, 2015 Free Software`
Packit	5c3484	`Foundation, Inc.`
Packit	5c3484
Packit	5c3484	`This file is part of the GNU MP Library.`
Packit	5c3484
Packit	5c3484	`The GNU MP Library is free software; you can redistribute it and/or modify`
Packit	5c3484	`it under the terms of either:`
Packit	5c3484
Packit	5c3484	`* the GNU Lesser General Public License as published by the Free`
Packit	5c3484	`Software Foundation; either version 3 of the License, or (at your`
Packit	5c3484	`option) any later version.`
Packit	5c3484
Packit	5c3484	`or`
Packit	5c3484
Packit	5c3484	`* the GNU General Public License as published by the Free Software`
Packit	5c3484	`Foundation; either version 2 of the License, or (at your option) any`
Packit	5c3484	`later version.`
Packit	5c3484
Packit	5c3484	`or both in parallel, as here.`
Packit	5c3484
Packit	5c3484	`The GNU MP Library is distributed in the hope that it will be useful, but`
Packit	5c3484	`WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY`
Packit	5c3484	`or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
Packit	5c3484	`for more details.`
Packit	5c3484
Packit	5c3484	`You should have received copies of the GNU General Public License and the`
Packit	5c3484	`GNU Lesser General Public License along with the GNU MP Library. If not,`
Packit	5c3484	`see https://www.gnu.org/licenses/. */`
Packit	5c3484
Packit	5c3484
Packit	5c3484	`/* See "Karatsuba Square Root", reference in gmp.texi. */`
Packit	5c3484
Packit	5c3484
Packit	5c3484	`#include <stdio.h>`
Packit	5c3484	`#include <stdlib.h>`
Packit	5c3484
Packit	5c3484	`#include "gmp.h"`
Packit	5c3484	`#include "gmp-impl.h"`
Packit	5c3484	`#include "longlong.h"`
Packit	5c3484	`#define USE_DIVAPPR_Q 1`
Packit	5c3484	`#define TRACE(x)`
Packit	5c3484
Packit	5c3484	`static const unsigned char invsqrttab[384] = /* The common 0x100 was removed */`
Packit	5c3484	`{`
Packit	5c3484	`0xff,0xfd,0xfb,0xf9,0xf7,0xf5,0xf3,0xf2, /* sqrt(1/80)..sqrt(1/87) */`
Packit	5c3484	`0xf0,0xee,0xec,0xea,0xe9,0xe7,0xe5,0xe4, /* sqrt(1/88)..sqrt(1/8f) */`
Packit	5c3484	`0xe2,0xe0,0xdf,0xdd,0xdb,0xda,0xd8,0xd7, /* sqrt(1/90)..sqrt(1/97) */`
Packit	5c3484	`0xd5,0xd4,0xd2,0xd1,0xcf,0xce,0xcc,0xcb, /* sqrt(1/98)..sqrt(1/9f) */`
Packit	5c3484	`0xc9,0xc8,0xc6,0xc5,0xc4,0xc2,0xc1,0xc0, /* sqrt(1/a0)..sqrt(1/a7) */`
Packit	5c3484	`0xbe,0xbd,0xbc,0xba,0xb9,0xb8,0xb7,0xb5, /* sqrt(1/a8)..sqrt(1/af) */`
Packit	5c3484	`0xb4,0xb3,0xb2,0xb0,0xaf,0xae,0xad,0xac, /* sqrt(1/b0)..sqrt(1/b7) */`
Packit	5c3484	`0xaa,0xa9,0xa8,0xa7,0xa6,0xa5,0xa4,0xa3, /* sqrt(1/b8)..sqrt(1/bf) */`
Packit	5c3484	`0xa2,0xa0,0x9f,0x9e,0x9d,0x9c,0x9b,0x9a, /* sqrt(1/c0)..sqrt(1/c7) */`
Packit	5c3484	`0x99,0x98,0x97,0x96,0x95,0x94,0x93,0x92, /* sqrt(1/c8)..sqrt(1/cf) */`
Packit	5c3484	`0x91,0x90,0x8f,0x8e,0x8d,0x8c,0x8c,0x8b, /* sqrt(1/d0)..sqrt(1/d7) */`
Packit	5c3484	`0x8a,0x89,0x88,0x87,0x86,0x85,0x84,0x83, /* sqrt(1/d8)..sqrt(1/df) */`
Packit	5c3484	`0x83,0x82,0x81,0x80,0x7f,0x7e,0x7e,0x7d, /* sqrt(1/e0)..sqrt(1/e7) */`
Packit	5c3484	`0x7c,0x7b,0x7a,0x79,0x79,0x78,0x77,0x76, /* sqrt(1/e8)..sqrt(1/ef) */`
Packit	5c3484	`0x76,0x75,0x74,0x73,0x72,0x72,0x71,0x70, /* sqrt(1/f0)..sqrt(1/f7) */`
Packit	5c3484	`0x6f,0x6f,0x6e,0x6d,0x6d,0x6c,0x6b,0x6a, /* sqrt(1/f8)..sqrt(1/ff) */`
Packit	5c3484	`0x6a,0x69,0x68,0x68,0x67,0x66,0x66,0x65, /* sqrt(1/100)..sqrt(1/107) */`
Packit	5c3484	`0x64,0x64,0x63,0x62,0x62,0x61,0x60,0x60, /* sqrt(1/108)..sqrt(1/10f) */`
Packit	5c3484	`0x5f,0x5e,0x5e,0x5d,0x5c,0x5c,0x5b,0x5a, /* sqrt(1/110)..sqrt(1/117) */`
Packit	5c3484	`0x5a,0x59,0x59,0x58,0x57,0x57,0x56,0x56, /* sqrt(1/118)..sqrt(1/11f) */`
Packit	5c3484	`0x55,0x54,0x54,0x53,0x53,0x52,0x52,0x51, /* sqrt(1/120)..sqrt(1/127) */`
Packit	5c3484	`0x50,0x50,0x4f,0x4f,0x4e,0x4e,0x4d,0x4d, /* sqrt(1/128)..sqrt(1/12f) */`
Packit	5c3484	`0x4c,0x4b,0x4b,0x4a,0x4a,0x49,0x49,0x48, /* sqrt(1/130)..sqrt(1/137) */`
Packit	5c3484	`0x48,0x47,0x47,0x46,0x46,0x45,0x45,0x44, /* sqrt(1/138)..sqrt(1/13f) */`
Packit	5c3484	`0x44,0x43,0x43,0x42,0x42,0x41,0x41,0x40, /* sqrt(1/140)..sqrt(1/147) */`
Packit	5c3484	`0x40,0x3f,0x3f,0x3e,0x3e,0x3d,0x3d,0x3c, /* sqrt(1/148)..sqrt(1/14f) */`
Packit	5c3484	`0x3c,0x3b,0x3b,0x3a,0x3a,0x39,0x39,0x39, /* sqrt(1/150)..sqrt(1/157) */`
Packit	5c3484	`0x38,0x38,0x37,0x37,0x36,0x36,0x35,0x35, /* sqrt(1/158)..sqrt(1/15f) */`
Packit	5c3484	`0x35,0x34,0x34,0x33,0x33,0x32,0x32,0x32, /* sqrt(1/160)..sqrt(1/167) */`
Packit	5c3484	`0x31,0x31,0x30,0x30,0x2f,0x2f,0x2f,0x2e, /* sqrt(1/168)..sqrt(1/16f) */`
Packit	5c3484	`0x2e,0x2d,0x2d,0x2d,0x2c,0x2c,0x2b,0x2b, /* sqrt(1/170)..sqrt(1/177) */`
Packit	5c3484	`0x2b,0x2a,0x2a,0x29,0x29,0x29,0x28,0x28, /* sqrt(1/178)..sqrt(1/17f) */`
Packit	5c3484	`0x27,0x27,0x27,0x26,0x26,0x26,0x25,0x25, /* sqrt(1/180)..sqrt(1/187) */`
Packit	5c3484	`0x24,0x24,0x24,0x23,0x23,0x23,0x22,0x22, /* sqrt(1/188)..sqrt(1/18f) */`
Packit	5c3484	`0x21,0x21,0x21,0x20,0x20,0x20,0x1f,0x1f, /* sqrt(1/190)..sqrt(1/197) */`
Packit	5c3484	`0x1f,0x1e,0x1e,0x1e,0x1d,0x1d,0x1d,0x1c, /* sqrt(1/198)..sqrt(1/19f) */`
Packit	5c3484	`0x1c,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x19, /* sqrt(1/1a0)..sqrt(1/1a7) */`
Packit	5c3484	`0x19,0x19,0x18,0x18,0x18,0x18,0x17,0x17, /* sqrt(1/1a8)..sqrt(1/1af) */`
Packit	5c3484	`0x17,0x16,0x16,0x16,0x15,0x15,0x15,0x14, /* sqrt(1/1b0)..sqrt(1/1b7) */`
Packit	5c3484	`0x14,0x14,0x13,0x13,0x13,0x12,0x12,0x12, /* sqrt(1/1b8)..sqrt(1/1bf) */`
Packit	5c3484	`0x12,0x11,0x11,0x11,0x10,0x10,0x10,0x0f, /* sqrt(1/1c0)..sqrt(1/1c7) */`
Packit	5c3484	`0x0f,0x0f,0x0f,0x0e,0x0e,0x0e,0x0d,0x0d, /* sqrt(1/1c8)..sqrt(1/1cf) */`
Packit	5c3484	`0x0d,0x0c,0x0c,0x0c,0x0c,0x0b,0x0b,0x0b, /* sqrt(1/1d0)..sqrt(1/1d7) */`
Packit	5c3484	`0x0a,0x0a,0x0a,0x0a,0x09,0x09,0x09,0x09, /* sqrt(1/1d8)..sqrt(1/1df) */`
Packit	5c3484	`0x08,0x08,0x08,0x07,0x07,0x07,0x07,0x06, /* sqrt(1/1e0)..sqrt(1/1e7) */`
Packit	5c3484	`0x06,0x06,0x06,0x05,0x05,0x05,0x04,0x04, /* sqrt(1/1e8)..sqrt(1/1ef) */`
Packit	5c3484	`0x04,0x04,0x03,0x03,0x03,0x03,0x02,0x02, /* sqrt(1/1f0)..sqrt(1/1f7) */`
Packit	5c3484	`0x02,0x02,0x01,0x01,0x01,0x01,0x00,0x00 /* sqrt(1/1f8)..sqrt(1/1ff) */`
Packit	5c3484	`};`
Packit	5c3484
Packit	5c3484	`/* Compute s = floor(sqrt(a0)), and rp = a0 - s^2. /`
Packit	5c3484
Packit	5c3484	`#if GMP_NUMB_BITS > 32`
Packit	5c3484	`#define MAGIC CNST_LIMB(0x10000000000) /* 0xffe7debbfc < MAGIC < 0x232b1850f410 */`
Packit	5c3484	`#else`
Packit	5c3484	`#define MAGIC CNST_LIMB(0x100000) /* 0xfee6f < MAGIC < 0x29cbc8 */`
Packit	5c3484	`#endif`
Packit	5c3484
Packit	5c3484	`static mp_limb_t`
Packit	5c3484	`mpn_sqrtrem1 (mp_ptr rp, mp_limb_t a0)`
Packit	5c3484	`{`
Packit	5c3484	`#if GMP_NUMB_BITS > 32`
Packit	5c3484	`mp_limb_t a1;`
Packit	5c3484	`#endif`
Packit	5c3484	`mp_limb_t x0, t2, t, x2;`
Packit	5c3484	`unsigned abits;`
Packit	5c3484
Packit	5c3484	`ASSERT_ALWAYS (GMP_NAIL_BITS == 0);`
Packit	5c3484	`ASSERT_ALWAYS (GMP_LIMB_BITS == 32 \|\| GMP_LIMB_BITS == 64);`
Packit	5c3484	`ASSERT (a0 >= GMP_NUMB_HIGHBIT / 2);`
Packit	5c3484
Packit	5c3484	`/* Use Newton iterations for approximating 1/sqrt(a) instead of sqrt(a),`
Packit	5c3484	`since we can do the former without division. As part of the last`
Packit	5c3484	`iteration convert from 1/sqrt(a) to sqrt(a). */`
Packit	5c3484
Packit	5c3484	`abits = a0 >> (GMP_LIMB_BITS - 1 - 8); /* extract bits for table lookup */`
Packit	5c3484	`x0 = 0x100 \| invsqrttab[abits - 0x80]; /* initial 1/sqrt(a) */`
Packit	5c3484
Packit	5c3484	`/* x0 is now an 8 bits approximation of 1/sqrt(a0) */`
Packit	5c3484
Packit	5c3484	`#if GMP_NUMB_BITS > 32`
Packit	5c3484	`a1 = a0 >> (GMP_LIMB_BITS - 1 - 32);`
Packit	5c3484	`t = (mp_limb_signed_t) (CNST_LIMB(0x2000000000000) - 0x30000 - a1 * x0 * x0) >> 16;`
Packit	5c3484	`x0 = (x0 << 16) + ((mp_limb_signed_t) (x0 * t) >> (16+2));`
Packit	5c3484
Packit	5c3484	`/* x0 is now a 16 bits approximation of 1/sqrt(a0) */`
Packit	5c3484
Packit	5c3484	`t2 = x0 * (a0 >> (32-8));`
Packit	5c3484	`t = t2 >> 25;`
Packit	5c3484	`t = ((mp_limb_signed_t) ((a0 << 14) - t * t - MAGIC) >> (32-8));`
Packit	5c3484	`x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 15);`
Packit	5c3484	`x0 >>= 32;`
Packit	5c3484	`#else`
Packit	5c3484	`t2 = x0 * (a0 >> (16-8));`
Packit	5c3484	`t = t2 >> 13;`
Packit	5c3484	`t = ((mp_limb_signed_t) ((a0 << 6) - t * t - MAGIC) >> (16-8));`
Packit	5c3484	`x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 7);`
Packit	5c3484	`x0 >>= 16;`
Packit	5c3484	`#endif`
Packit	5c3484
Packit	5c3484	`/* x0 is now a full limb approximation of sqrt(a0) */`
Packit	5c3484
Packit	5c3484	`x2 = x0 * x0;`
Packit	5c3484	`if (x2 + 2*x0 <= a0 - 1)`
Packit	5c3484	`{`
Packit	5c3484	`x2 += 2*x0 + 1;`
Packit	5c3484	`x0++;`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484	`*rp = a0 - x2;`
Packit	5c3484	`return x0;`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484
Packit	5c3484	`#define Prec (GMP_NUMB_BITS >> 1)`
Packit	5c3484
Packit	5c3484	`/* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized`
Packit	5c3484	`return cc such that {np, 2} = sp[0]^2 + cc2^GMP_NUMB_BITS + rp[0] /`
Packit	5c3484	`static mp_limb_t`
Packit	5c3484	`mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)`
Packit	5c3484	`{`
Packit	5c3484	`mp_limb_t q, u, np0, sp0, rp0, q2;`
Packit	5c3484	`int cc;`
Packit	5c3484
Packit	5c3484	`ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);`
Packit	5c3484
Packit	5c3484	`np0 = np[0];`
Packit	5c3484	`sp0 = mpn_sqrtrem1 (rp, np[1]);`
Packit	5c3484	`rp0 = rp[0];`
Packit	5c3484	`/* rp0 <= 2sp0 < 2^(Prec + 1) /`
Packit	5c3484	`rp0 = (rp0 << (Prec - 1)) + (np0 >> (Prec + 1));`
Packit	5c3484	`q = rp0 / sp0;`
Packit	5c3484	`/* q <= 2^Prec, if q = 2^Prec, reduce the overestimate. */`
Packit	5c3484	`q -= q >> Prec;`
Packit	5c3484	`/* now we have q < 2^Prec */`
Packit	5c3484	`u = rp0 - q * sp0;`
Packit	5c3484	`/* now we have (rp[0]<<Prec + np0>>Prec)/2 = q * sp0 + u */`
Packit	5c3484	`sp0 = (sp0 << Prec) \| q;`
Packit	5c3484	`cc = u >> (Prec - 1);`
Packit	5c3484	`rp0 = ((u << (Prec + 1)) & GMP_NUMB_MASK) + (np0 & ((CNST_LIMB (1) << (Prec + 1)) - 1));`
Packit	5c3484	`/* subtract q * q from rp */`
Packit	5c3484	`q2 = q * q;`
Packit	5c3484	`cc -= rp0 < q2;`
Packit	5c3484	`rp0 -= q2;`
Packit	5c3484	`if (cc < 0)`
Packit	5c3484	`{`
Packit	5c3484	`rp0 += sp0;`
Packit	5c3484	`cc += rp0 < sp0;`
Packit	5c3484	`--sp0;`
Packit	5c3484	`rp0 += sp0;`
Packit	5c3484	`cc += rp0 < sp0;`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484	`rp[0] = rp0;`
Packit	5c3484	`sp[0] = sp0;`
Packit	5c3484	`return cc;`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484	`/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},`
Packit	5c3484	`and in {np, n} the low n limbs of the remainder, returns the high`
Packit	5c3484	`limb of the remainder (which is 0 or 1).`
Packit	5c3484	`Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4`
Packit	5c3484	`where B=2^GMP_NUMB_BITS.`
Packit	5c3484	`Needs a scratch of n/2+1 limbs. */`
Packit	5c3484	`static mp_limb_t`
Packit	5c3484	`mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n, mp_limb_t approx, mp_ptr scratch)`
Packit	5c3484	`{`
Packit	5c3484	`mp_limb_t q; /* carry out of {sp, n} */`
Packit	5c3484	`int c, b; /* carry out of remainder */`
Packit	5c3484	`mp_size_t l, h;`
Packit	5c3484
Packit	5c3484	`ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);`
Packit	5c3484
Packit	5c3484	`if (n == 1)`
Packit	5c3484	`c = mpn_sqrtrem2 (sp, np, np);`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`l = n / 2;`
Packit	5c3484	`h = n - l;`
Packit	5c3484	`q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h, 0, scratch);`
Packit	5c3484	`if (q != 0)`
Packit	5c3484	`ASSERT_CARRY (mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h));`
Packit	5c3484	`TRACE(printf("tdiv_qr(,,,,%u,,%u) -> %u\n", (unsigned) n, (unsigned) h, (unsigned) (n - h + 1)));`
Packit	5c3484	`mpn_tdiv_qr (scratch, np + l, 0, np + l, n, sp + l, h);`
Packit	5c3484	`q += scratch[l];`
Packit	5c3484	`c = scratch[0] & 1;`
Packit	5c3484	`mpn_rshift (sp, scratch, l, 1);`
Packit	5c3484	`sp[l - 1] \|= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;`
Packit	5c3484	`if (UNLIKELY ((sp[0] & approx) != 0)) /* (sp[0] & mask) > 1 */`
Packit	5c3484	`return 1; /* Remainder is non-zero */`
Packit	5c3484	`q >>= 1;`
Packit	5c3484	`if (c != 0)`
Packit	5c3484	`c = mpn_add_n (np + l, np + l, sp + l, h);`
Packit	5c3484	`TRACE(printf("sqr(,,%u)\n", (unsigned) l));`
Packit	5c3484	`mpn_sqr (np + n, sp, l);`
Packit	5c3484	`b = q + mpn_sub_n (np, np, np + n, 2 * l);`
Packit	5c3484	`c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);`
Packit	5c3484
Packit	5c3484	`if (c < 0)`
Packit	5c3484	`{`
Packit	5c3484	`q = mpn_add_1 (sp + l, sp + l, h, q);`
Packit	5c3484	`#if HAVE_NATIVE_mpn_addlsh1_n_ip1 \|\| HAVE_NATIVE_mpn_addlsh1_n`
Packit	5c3484	`c += mpn_addlsh1_n_ip1 (np, sp, n) + 2 * q;`
Packit	5c3484	`#else`
Packit	5c3484	`c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;`
Packit	5c3484	`#endif`
Packit	5c3484	`c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));`
Packit	5c3484	`q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));`
Packit	5c3484	`}`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484	`return c;`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484	`#if USE_DIVAPPR_Q`
Packit	5c3484	`static void`
Packit	5c3484	`mpn_divappr_q (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_ptr scratch)`
Packit	5c3484	`{`
Packit	5c3484	`gmp_pi1_t inv;`
Packit	5c3484	`mp_limb_t qh;`
Packit	5c3484	`ASSERT (dn > 2);`
Packit	5c3484	`ASSERT (nn >= dn);`
Packit	5c3484	`ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);`
Packit	5c3484
Packit	5c3484	`MPN_COPY (scratch, np, nn);`
Packit	5c3484	`invert_pi1 (inv, dp[dn-1], dp[dn-2]);`
Packit	5c3484	`if (BELOW_THRESHOLD (dn, DC_DIVAPPR_Q_THRESHOLD))`
Packit	5c3484	`qh = mpn_sbpi1_divappr_q (qp, scratch, nn, dp, dn, inv.inv32);`
Packit	5c3484	`else if (BELOW_THRESHOLD (dn, MU_DIVAPPR_Q_THRESHOLD))`
Packit	5c3484	`qh = mpn_dcpi1_divappr_q (qp, scratch, nn, dp, dn, &inv;;`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`mp_size_t itch = mpn_mu_divappr_q_itch (nn, dn, 0);`
Packit	5c3484	`TMP_DECL;`
Packit	5c3484	`TMP_MARK;`
Packit	5c3484	`/* Sadly, scratch is too small. */`
Packit	5c3484	`qh = mpn_mu_divappr_q (qp, np, nn, dp, dn, TMP_ALLOC_LIMBS (itch));`
Packit	5c3484	`TMP_FREE;`
Packit	5c3484	`}`
Packit	5c3484	`qp [nn - dn] = qh;`
Packit	5c3484	`}`
Packit	5c3484	`#endif`
Packit	5c3484
Packit	5c3484	`/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n-odd},`
Packit	5c3484	`returns zero if the operand was a perfect square, one otherwise.`
Packit	5c3484	`Assumes {np, 2n-odd}4^nsh is normalized, i.e. B > np[2n-1-odd]4^nsh >= B/4`
Packit	5c3484	`where B=2^GMP_NUMB_BITS.`
Packit	5c3484	`THINK: In the odd case, three more (dummy) limbs are taken into account,`
Packit	5c3484	`when nsh is maximal, two limbs are discarded from the result of the`
Packit	5c3484	`division. Too much? Is a single dummy limb enough? */`
Packit	5c3484	`static int`
Packit	5c3484	`mpn_dc_sqrt (mp_ptr sp, mp_srcptr np, mp_size_t n, unsigned nsh, unsigned odd)`
Packit	5c3484	`{`
Packit	5c3484	`mp_limb_t q; /* carry out of {sp, n} */`
Packit	5c3484	`int c; /* carry out of remainder */`
Packit	5c3484	`mp_size_t l, h;`
Packit	5c3484	`mp_ptr qp, tp, scratch;`
Packit	5c3484	`TMP_DECL;`
Packit	5c3484	`TMP_MARK;`
Packit	5c3484
Packit	5c3484	`ASSERT (np[2 * n - 1 - odd] != 0);`
Packit	5c3484	`ASSERT (n > 4);`
Packit	5c3484	`ASSERT (nsh < GMP_NUMB_BITS / 2);`
Packit	5c3484
Packit	5c3484	`l = (n - 1) / 2;`
Packit	5c3484	`h = n - l;`
Packit	5c3484	`ASSERT (n >= l + 2 && l + 2 >= h && h > l && l >= 1 + odd);`
Packit	5c3484	`scratch = TMP_ALLOC_LIMBS (l + 2 * n + 5 - USE_DIVAPPR_Q); /* n + 2-USE_DIVAPPR_Q */`
Packit	5c3484	`tp = scratch + n + 2 - USE_DIVAPPR_Q; /* n + h + 1, but tp [-1] is writable */`
Packit	5c3484	`if (nsh != 0)`
Packit	5c3484	`{`
Packit	5c3484	`/* o is used to exactly set the lowest bits of the dividend, is it needed? */`
Packit	5c3484	`int o = l > (1 + odd);`
Packit	5c3484	`ASSERT_NOCARRY (mpn_lshift (tp - o, np + l - 1 - o - odd, n + h + 1 + o, 2 * nsh));`
Packit	5c3484	`}`
Packit	5c3484	`else`
Packit	5c3484	`MPN_COPY (tp, np + l - 1 - odd, n + h + 1);`
Packit	5c3484	`q = mpn_dc_sqrtrem (sp + l, tp + l + 1, h, 0, scratch);`
Packit	5c3484	`if (q != 0)`
Packit	5c3484	`ASSERT_CARRY (mpn_sub_n (tp + l + 1, tp + l + 1, sp + l, h));`
Packit	5c3484	`qp = tp + n + 1; /* l + 2 */`
Packit	5c3484	`TRACE(printf("div(appr)_q(,,%u,,%u) -> %u \n", (unsigned) n+1, (unsigned) h, (unsigned) (n + 1 - h + 1)));`
Packit	5c3484	`#if USE_DIVAPPR_Q`
Packit	5c3484	`mpn_divappr_q (qp, tp, n + 1, sp + l, h, scratch);`
Packit	5c3484	`#else`
Packit	5c3484	`mpn_div_q (qp, tp, n + 1, sp + l, h, scratch);`
Packit	5c3484	`#endif`
Packit	5c3484	`q += qp [l + 1];`
Packit	5c3484	`c = 1;`
Packit	5c3484	`if (q > 1)`
Packit	5c3484	`{`
Packit	5c3484	`/* FIXME: if s!=0 we will shift later, a noop on this area. */`
Packit	5c3484	`MPN_FILL (sp, l, GMP_NUMB_MAX);`
Packit	5c3484	`}`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`/* FIXME: if s!=0 we will shift again later, shift just once. */`
Packit	5c3484	`mpn_rshift (sp, qp + 1, l, 1);`
Packit	5c3484	`sp[l - 1] \|= q << (GMP_NUMB_BITS - 1);`
Packit	5c3484	`if (((qp[0] >> (2 + USE_DIVAPPR_Q)) \| /* < 3 + 4USE_DIVAPPR_Q /`
Packit	5c3484	`(qp[1] & (GMP_NUMB_MASK >> ((GMP_NUMB_BITS >> odd)- nsh - 1)))) == 0)`
Packit	5c3484	`{`
Packit	5c3484	`mp_limb_t cy;`
Packit	5c3484	`/* Approximation is not good enough, the extra limb(+ nsh bits)`
Packit	5c3484	`is smaller than needed to absorb the possible error. */`
Packit	5c3484	`/* {qp + 1, l + 1} equals 2{sp, l} /`
Packit	5c3484	`/* FIXME: use mullo or wrap-around, or directly evaluate`
Packit	5c3484	`remainder with a single sqrmod_bnm1. */`
Packit	5c3484	`TRACE(printf("mul(,,%u,,%u)\n", (unsigned) h, (unsigned) (l+1)));`
Packit	5c3484	`ASSERT_NOCARRY (mpn_mul (scratch, sp + l, h, qp + 1, l + 1));`
Packit	5c3484	`/* Compute the remainder of the previous mpn_div(appr)_q. */`
Packit	5c3484	`cy = mpn_sub_n (tp + 1, tp + 1, scratch, h);`
Packit	5c3484	`#if USE_DIVAPPR_Q \|\| WANT_ASSERT`
Packit	5c3484	`MPN_DECR_U (tp + 1 + h, l, cy);`
Packit	5c3484	`#if USE_DIVAPPR_Q`
Packit	5c3484	`ASSERT (mpn_cmp (tp + 1 + h, scratch + h, l) <= 0);`
Packit	5c3484	`if (mpn_cmp (tp + 1 + h, scratch + h, l) < 0)`
Packit	5c3484	`{`
Packit	5c3484	`/* May happen only if div result was not exact. */`
Packit	5c3484	`#if HAVE_NATIVE_mpn_addlsh1_n_ip1 \|\| HAVE_NATIVE_mpn_addlsh1_n`
Packit	5c3484	`cy = mpn_addlsh1_n_ip1 (tp + 1, sp + l, h);`
Packit	5c3484	`#else`
Packit	5c3484	`cy = mpn_addmul_1 (tp + 1, sp + l, h, CNST_LIMB(2));`
Packit	5c3484	`#endif`
Packit	5c3484	`ASSERT_NOCARRY (mpn_add_1 (tp + 1 + h, tp + 1 + h, l, cy));`
Packit	5c3484	`MPN_DECR_U (sp, l, 1);`
Packit	5c3484	`}`
Packit	5c3484	`/* Can the root be exact when a correction was needed? We`
Packit	5c3484	`did not find an example, but it depends on divappr`
Packit	5c3484	`internals, and we can not assume it true in general...*/`
Packit	5c3484	`/* else */`
Packit	5c3484	`#else /* WANT_ASSERT */`
Packit	5c3484	`ASSERT (mpn_cmp (tp + 1 + h, scratch + h, l) == 0);`
Packit	5c3484	`#endif`
Packit	5c3484	`#endif`
Packit	5c3484	`if (mpn_zero_p (tp + l + 1, h - l))`
Packit	5c3484	`{`
Packit	5c3484	`TRACE(printf("sqr(,,%u)\n", (unsigned) l));`
Packit	5c3484	`mpn_sqr (scratch, sp, l);`
Packit	5c3484	`c = mpn_cmp (tp + 1, scratch + l, l);`
Packit	5c3484	`if (c == 0)`
Packit	5c3484	`{`
Packit	5c3484	`if (nsh != 0)`
Packit	5c3484	`{`
Packit	5c3484	`mpn_lshift (tp, np, l, 2 * nsh);`
Packit	5c3484	`np = tp;`
Packit	5c3484	`}`
Packit	5c3484	`c = mpn_cmp (np, scratch + odd, l - odd);`
Packit	5c3484	`}`
Packit	5c3484	`if (c < 0)`
Packit	5c3484	`{`
Packit	5c3484	`MPN_DECR_U (sp, l, 1);`
Packit	5c3484	`c = 1;`
Packit	5c3484	`}`
Packit	5c3484	`}`
Packit	5c3484	`}`
Packit	5c3484	`}`
Packit	5c3484	`TMP_FREE;`
Packit	5c3484
Packit	5c3484	`if ((odd \| nsh) != 0)`
Packit	5c3484	`mpn_rshift (sp, sp, n, nsh + (odd ? GMP_NUMB_BITS / 2 : 0));`
Packit	5c3484	`return c;`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484
Packit	5c3484	`mp_size_t`
Packit	5c3484	`mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)`
Packit	5c3484	`{`
Packit	5c3484	`mp_limb_t *tp, s0[1], cc, high, rl;`
Packit	5c3484	`int c;`
Packit	5c3484	`mp_size_t rn, tn;`
Packit	5c3484	`TMP_DECL;`
Packit	5c3484
Packit	5c3484	`ASSERT (nn > 0);`
Packit	5c3484	`ASSERT_MPN (np, nn);`
Packit	5c3484
Packit	5c3484	`ASSERT (np[nn - 1] != 0);`
Packit	5c3484	`ASSERT (rp == NULL \|\| MPN_SAME_OR_SEPARATE_P (np, rp, nn));`
Packit	5c3484	`ASSERT (rp == NULL \|\| ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));`
Packit	5c3484	`ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));`
Packit	5c3484
Packit	5c3484	`high = np[nn - 1];`
Packit	5c3484	`if (high & (GMP_NUMB_HIGHBIT \| (GMP_NUMB_HIGHBIT / 2)))`
Packit	5c3484	`c = 0;`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`count_leading_zeros (c, high);`
Packit	5c3484	`c -= GMP_NAIL_BITS;`
Packit	5c3484
Packit	5c3484	`c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */`
Packit	5c3484	`}`
Packit	5c3484	`if (nn == 1) {`
Packit	5c3484	`if (c == 0)`
Packit	5c3484	`{`
Packit	5c3484	`sp[0] = mpn_sqrtrem1 (&rl, high);`
Packit	5c3484	`if (rp != NULL)`
Packit	5c3484	`rp[0] = rl;`
Packit	5c3484	`}`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`cc = mpn_sqrtrem1 (&rl, high << (2*c)) >> c;`
Packit	5c3484	`sp[0] = cc;`
Packit	5c3484	`if (rp != NULL)`
Packit	5c3484	`rp[0] = rl = high - cc*cc;`
Packit	5c3484	`}`
Packit	5c3484	`return rl != 0;`
Packit	5c3484	`}`
Packit	5c3484	`tn = (nn + 1) / 2; /* 2tn is the smallest even integer >= nn /`
Packit	5c3484
Packit	5c3484	`if ((rp == NULL) && (nn > 8))`
Packit	5c3484	`return mpn_dc_sqrt (sp, np, tn, c, nn & 1);`
Packit	5c3484	`TMP_MARK;`
Packit	5c3484	`if (((nn & 1) \| c) != 0)`
Packit	5c3484	`{`
Packit	5c3484	`mp_limb_t mask;`
Packit	5c3484	`mp_ptr scratch;`
Packit	5c3484	`TMP_ALLOC_LIMBS_2 (tp, 2 * tn, scratch, tn / 2 + 1);`
Packit	5c3484	`tp[0] = 0; /* needed only when 2tn > nn, but saves a test /`
Packit	5c3484	`if (c != 0)`
Packit	5c3484	`mpn_lshift (tp + (nn & 1), np, nn, 2 * c);`
Packit	5c3484	`else`
Packit	5c3484	`MPN_COPY (tp + (nn & 1), np, nn);`
Packit	5c3484	`c += (nn & 1) ? GMP_NUMB_BITS / 2 : 0; /* c now represents k */`
Packit	5c3484	`mask = (CNST_LIMB (1) << c) - 1;`
Packit	5c3484	`rl = mpn_dc_sqrtrem (sp, tp, tn, (rp == NULL) ? mask - 1 : 0, scratch);`
Packit	5c3484	`/* We have 2^(2k)N = S^2 + R where k = c + (2tn-nn)GMP_NUMB_BITS/2,`
Packit	5c3484	`thus 2^(2k)N = (S-s0)^2 + 2Ss0 - s0^2 + R where s0=S mod 2^k /`
Packit	5c3484	`s0[0] = sp[0] & mask; /* S mod 2^k */`
Packit	5c3484	`rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]); /* R = R + 2s0S */`
Packit	5c3484	`cc = mpn_submul_1 (tp, s0, 1, s0[0]);`
Packit	5c3484	`rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;`
Packit	5c3484	`mpn_rshift (sp, sp, tn, c);`
Packit	5c3484	`tp[tn] = rl;`
Packit	5c3484	`if (rp == NULL)`
Packit	5c3484	`rp = tp;`
Packit	5c3484	`c = c << 1;`
Packit	5c3484	`if (c < GMP_NUMB_BITS)`
Packit	5c3484	`tn++;`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`tp++;`
Packit	5c3484	`c -= GMP_NUMB_BITS;`
Packit	5c3484	`}`
Packit	5c3484	`if (c != 0)`
Packit	5c3484	`mpn_rshift (rp, tp, tn, c);`
Packit	5c3484	`else`
Packit	5c3484	`MPN_COPY_INCR (rp, tp, tn);`
Packit	5c3484	`rn = tn;`
Packit	5c3484	`}`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`if (rp != np)`
Packit	5c3484	`{`
Packit	5c3484	`if (rp == NULL) /* nn <= 8 */`
Packit	5c3484	`rp = TMP_SALLOC_LIMBS (nn);`
Packit	5c3484	`MPN_COPY (rp, np, nn);`
Packit	5c3484	`}`
Packit	5c3484	`rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn, 0, TMP_ALLOC_LIMBS(tn / 2 + 1)));`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484	`MPN_NORMALIZE (rp, rn);`
Packit	5c3484
Packit	5c3484	`TMP_FREE;`
Packit	5c3484	`return rn;`
Packit	5c3484	`}`

source-git / gmp

Source Code

Blame mpn/generic/sqrtrem.c