Tree - source-git/gmp - CentOS Git server

source-git / gmp

Blame mpn/sparc64/mode1o.c

Blob History Raw

Packit	5c3484	`/* UltraSPARC 64 mpn_modexact_1c_odd -- mpn by limb exact style remainder.`
Packit	5c3484
Packit	5c3484	`THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST`
Packit	5c3484	`CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN`
Packit	5c3484	`FUTURE GNU MP RELEASES.`
Packit	5c3484
Packit	5c3484	`Copyright 2000-2003 Free Software Foundation, Inc.`
Packit	5c3484
Packit	5c3484	`This file is part of the GNU MP Library.`
Packit	5c3484
Packit	5c3484	`The GNU MP Library is free software; you can redistribute it and/or modify`
Packit	5c3484	`it under the terms of either:`
Packit	5c3484
Packit	5c3484	`* the GNU Lesser General Public License as published by the Free`
Packit	5c3484	`Software Foundation; either version 3 of the License, or (at your`
Packit	5c3484	`option) any later version.`
Packit	5c3484
Packit	5c3484	`or`
Packit	5c3484
Packit	5c3484	`* the GNU General Public License as published by the Free Software`
Packit	5c3484	`Foundation; either version 2 of the License, or (at your option) any`
Packit	5c3484	`later version.`
Packit	5c3484
Packit	5c3484	`or both in parallel, as here.`
Packit	5c3484
Packit	5c3484	`The GNU MP Library is distributed in the hope that it will be useful, but`
Packit	5c3484	`WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY`
Packit	5c3484	`or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
Packit	5c3484	`for more details.`
Packit	5c3484
Packit	5c3484	`You should have received copies of the GNU General Public License and the`
Packit	5c3484	`GNU Lesser General Public License along with the GNU MP Library. If not,`
Packit	5c3484	`see https://www.gnu.org/licenses/. */`
Packit	5c3484
Packit	5c3484	`#include "gmp.h"`
Packit	5c3484	`#include "gmp-impl.h"`
Packit	5c3484	`#include "longlong.h"`
Packit	5c3484
Packit	5c3484	`#include "mpn/sparc64/sparc64.h"`
Packit	5c3484
Packit	5c3484
Packit	5c3484	`/* 64-bit divisor 32-bit divisor`
Packit	5c3484	`cycles/limb cycles/limb`
Packit	5c3484	`(approx) (approx)`
Packit	5c3484	`Ultrasparc 2i: ? ?`
Packit	5c3484	`*/`
Packit	5c3484
Packit	5c3484
Packit	5c3484	`/* This implementation reduces the number of multiplies done, knowing that`
Packit	5c3484	`on ultrasparc 1 and 2 the mulx instruction stalls the whole chip.`
Packit	5c3484
Packit	5c3484	`The key idea is to use the fact that the low limb of q*d equals l, this`
Packit	5c3484	`being the whole purpose of the q calculated. It means there's no need to`
Packit	5c3484	`calculate the lowest 32x32->64 part of the q*d, instead it can be`
Packit	5c3484	`inferred from l and the other three 32x32->64 parts. See sparc64.h for`
Packit	5c3484	`details.`
Packit	5c3484
Packit	5c3484	`When d is 32-bits, the same applies, but in this case there's only one`
Packit	5c3484	`other 32x32->64 part (ie. HIGH(q)*d).`
Packit	5c3484
Packit	5c3484	`The net effect is that for 64-bit divisor each limb is 4 mulx, or for`
Packit	5c3484	`32-bit divisor each is 2 mulx.`
Packit	5c3484
Packit	5c3484	`Enhancements:`
Packit	5c3484
Packit	5c3484	`No doubt this could be done in assembler, if that helped the scheduling,`
Packit	5c3484	`or perhaps guaranteed good code irrespective of the compiler.`
Packit	5c3484
Packit	5c3484	`Alternatives:`
Packit	5c3484
Packit	5c3484	`It might be possibly to use floating point. The loop is dominated by`
Packit	5c3484	`multiply latency, so not sure if floats would improve that. One`
Packit	5c3484	`possibility would be to take two limbs at a time, with a 128 bit inverse,`
Packit	5c3484	`if there's enough registers, which could effectively use float throughput`
Packit	5c3484	`to reduce total latency across two limbs. */`
Packit	5c3484
Packit	5c3484	`#define ASSERT_RETVAL(r) \`
Packit	5c3484	`ASSERT (orig_c < d ? r < d : r <= d)`
Packit	5c3484
Packit	5c3484	`mp_limb_t`
Packit	5c3484	`mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t orig_c)`
Packit	5c3484	`{`
Packit	5c3484	`mp_limb_t c = orig_c;`
Packit	5c3484	`mp_limb_t s, l, q, h, inverse;`
Packit	5c3484
Packit	5c3484	`ASSERT (size >= 1);`
Packit	5c3484	`ASSERT (d & 1);`
Packit	5c3484	`ASSERT_MPN (src, size);`
Packit	5c3484	`ASSERT_LIMB (d);`
Packit	5c3484	`ASSERT_LIMB (c);`
Packit	5c3484
Packit	5c3484	`/* udivx is faster than 10 or 12 mulx's for one limb via an inverse */`
Packit	5c3484	`if (size == 1)`
Packit	5c3484	`{`
Packit	5c3484	`s = src[0];`
Packit	5c3484	`if (s > c)`
Packit	5c3484	`{`
Packit	5c3484	`l = s-c;`
Packit	5c3484	`h = l % d;`
Packit	5c3484	`if (h != 0)`
Packit	5c3484	`h = d - h;`
Packit	5c3484	`}`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`l = c-s;`
Packit	5c3484	`h = l % d;`
Packit	5c3484	`}`
Packit	5c3484	`return h;`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484	`binvert_limb (inverse, d);`
Packit	5c3484
Packit	5c3484	`if (d <= 0xFFFFFFFF)`
Packit	5c3484	`{`
Packit	5c3484	`s = *src++;`
Packit	5c3484	`size--;`
Packit	5c3484	`do`
Packit	5c3484	`{`
Packit	5c3484	`SUBC_LIMB (c, l, s, c);`
Packit	5c3484	`s = *src++;`
Packit	5c3484	`q = l * inverse;`
Packit	5c3484	`umul_ppmm_half_lowequal (h, q, d, l);`
Packit	5c3484	`c += h;`
Packit	5c3484	`size--;`
Packit	5c3484	`}`
Packit	5c3484	`while (size != 0);`
Packit	5c3484
Packit	5c3484	`if (s <= d)`
Packit	5c3484	`{`
Packit	5c3484	`/* With high s <= d the final step can be a subtract and addback.`
Packit	5c3484	`If c==0 then the addback will restore to l>=0. If c==d then`
Packit	5c3484	`will get l==d if s==0, but that's ok per the function`
Packit	5c3484	`definition. */`
Packit	5c3484
Packit	5c3484	`l = c - s;`
Packit	5c3484	`l += (l > c ? d : 0);`
Packit	5c3484
Packit	5c3484	`ASSERT_RETVAL (l);`
Packit	5c3484	`return l;`
Packit	5c3484	`}`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`/* Can't skip a divide, just do the loop code once more. */`
Packit	5c3484	`SUBC_LIMB (c, l, s, c);`
Packit	5c3484	`q = l * inverse;`
Packit	5c3484	`umul_ppmm_half_lowequal (h, q, d, l);`
Packit	5c3484	`c += h;`
Packit	5c3484
Packit	5c3484	`ASSERT_RETVAL (c);`
Packit	5c3484	`return c;`
Packit	5c3484	`}`
Packit	5c3484	`}`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`mp_limb_t dl = LOW32 (d);`
Packit	5c3484	`mp_limb_t dh = HIGH32 (d);`
Packit	5c3484	`long i;`
Packit	5c3484
Packit	5c3484	`s = *src++;`
Packit	5c3484	`size--;`
Packit	5c3484	`do`
Packit	5c3484	`{`
Packit	5c3484	`SUBC_LIMB (c, l, s, c);`
Packit	5c3484	`s = *src++;`
Packit	5c3484	`q = l * inverse;`
Packit	5c3484	`umul_ppmm_lowequal (h, q, d, dh, dl, l);`
Packit	5c3484	`c += h;`
Packit	5c3484	`size--;`
Packit	5c3484	`}`
Packit	5c3484	`while (size != 0);`
Packit	5c3484
Packit	5c3484	`if (s <= d)`
Packit	5c3484	`{`
Packit	5c3484	`/* With high s <= d the final step can be a subtract and addback.`
Packit	5c3484	`If c==0 then the addback will restore to l>=0. If c==d then`
Packit	5c3484	`will get l==d if s==0, but that's ok per the function`
Packit	5c3484	`definition. */`
Packit	5c3484
Packit	5c3484	`l = c - s;`
Packit	5c3484	`l += (l > c ? d : 0);`
Packit	5c3484
Packit	5c3484	`ASSERT_RETVAL (l);`
Packit	5c3484	`return l;`
Packit	5c3484	`}`
Packit	5c3484	`else`
Packit	5c3484	`{`
Packit	5c3484	`/* Can't skip a divide, just do the loop code once more. */`
Packit	5c3484	`SUBC_LIMB (c, l, s, c);`
Packit	5c3484	`q = l * inverse;`
Packit	5c3484	`umul_ppmm_lowequal (h, q, d, dh, dl, l);`
Packit	5c3484	`c += h;`
Packit	5c3484
Packit	5c3484	`ASSERT_RETVAL (c);`
Packit	5c3484	`return c;`
Packit	5c3484	`}`
Packit	5c3484	`}`
Packit	5c3484	`}`

source-git / gmp

Source Code

Blame mpn/sparc64/mode1o.c