Tree - source-git/gmp - CentOS Git server

source-git / gmp

Blame mpn/cray/ieee/submul_1.c

Blob History Raw

Packit	5c3484	`/* Cray PVP/IEEE mpn_submul_1 -- multiply a limb vector with a limb and`
Packit	5c3484	`subtract the result from a second limb vector.`
Packit	5c3484
Packit	5c3484	`Copyright 2000-2002 Free Software Foundation, Inc.`
Packit	5c3484
Packit	5c3484	`This file is part of the GNU MP Library.`
Packit	5c3484
Packit	5c3484	`The GNU MP Library is free software; you can redistribute it and/or modify`
Packit	5c3484	`it under the terms of either:`
Packit	5c3484
Packit	5c3484	`* the GNU Lesser General Public License as published by the Free`
Packit	5c3484	`Software Foundation; either version 3 of the License, or (at your`
Packit	5c3484	`option) any later version.`
Packit	5c3484
Packit	5c3484	`or`
Packit	5c3484
Packit	5c3484	`* the GNU General Public License as published by the Free Software`
Packit	5c3484	`Foundation; either version 2 of the License, or (at your option) any`
Packit	5c3484	`later version.`
Packit	5c3484
Packit	5c3484	`or both in parallel, as here.`
Packit	5c3484
Packit	5c3484	`The GNU MP Library is distributed in the hope that it will be useful, but`
Packit	5c3484	`WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY`
Packit	5c3484	`or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License`
Packit	5c3484	`for more details.`
Packit	5c3484
Packit	5c3484	`You should have received copies of the GNU General Public License and the`
Packit	5c3484	`GNU Lesser General Public License along with the GNU MP Library. If not,`
Packit	5c3484	`see https://www.gnu.org/licenses/. */`
Packit	5c3484
Packit	5c3484	`/* This code runs at just under 9 cycles/limb on a T90. That is not perfect,`
Packit	5c3484	`mainly due to vector register shortage in the main loop. Assembly code`
Packit	5c3484	`should bring it down to perhaps 7 cycles/limb. */`
Packit	5c3484
Packit	5c3484	`#include <intrinsics.h>`
Packit	5c3484	`#include "gmp.h"`
Packit	5c3484	`#include "gmp-impl.h"`
Packit	5c3484
Packit	5c3484	`mp_limb_t`
Packit	5c3484	`mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)`
Packit	5c3484	`{`
Packit	5c3484	`mp_limb_t cy[n];`
Packit	5c3484	`mp_limb_t a, b, r, s0, s1, c0, c1;`
Packit	5c3484	`mp_size_t i;`
Packit	5c3484	`int more_carries;`
Packit	5c3484
Packit	5c3484	`if (up == rp)`
Packit	5c3484	`{`
Packit	5c3484	`/* The algorithm used below cannot handle overlap. Handle it here by`
Packit	5c3484	`making a temporary copy of the source vector, then call ourselves. */`
Packit	5c3484	`mp_limb_t xp[n];`
Packit	5c3484	`MPN_COPY (xp, up, n);`
Packit	5c3484	`return mpn_submul_1 (rp, xp, n, vl);`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484	`a = up[0] * vl;`
Packit	5c3484	`r = rp[0];`
Packit	5c3484	`s0 = r - a;`
Packit	5c3484	`rp[0] = s0;`
Packit	5c3484	`c1 = ((s0 & a) \| ((s0 \| a) & ~r)) >> 63;`
Packit	5c3484	`cy[0] = c1;`
Packit	5c3484
Packit	5c3484	`/* Main multiply loop. Generate a raw accumulated output product in rp[]`
Packit	5c3484	`and a carry vector in cy[]. */`
Packit	5c3484	`#pragma _CRI ivdep`
Packit	5c3484	`for (i = 1; i < n; i++)`
Packit	5c3484	`{`
Packit	5c3484	`a = up[i] * vl;`
Packit	5c3484	`b = _int_mult_upper (up[i - 1], vl);`
Packit	5c3484	`s0 = a + b;`
Packit	5c3484	`c0 = ((a & b) \| ((a \| b) & ~s0)) >> 63;`
Packit	5c3484	`r = rp[i];`
Packit	5c3484	`s1 = r - s0;`
Packit	5c3484	`rp[i] = s1;`
Packit	5c3484	`c1 = ((s1 & s0) \| ((s1 \| s0) & ~r)) >> 63;`
Packit	5c3484	`cy[i] = c0 + c1;`
Packit	5c3484	`}`
Packit	5c3484	`/* Carry subtract loop. Subtract the carry vector cy[] from the raw result`
Packit	5c3484	`rp[] and store the new result back to rp[]. */`
Packit	5c3484	`more_carries = 0;`
Packit	5c3484	`#pragma _CRI ivdep`
Packit	5c3484	`for (i = 1; i < n; i++)`
Packit	5c3484	`{`
Packit	5c3484	`r = rp[i];`
Packit	5c3484	`c0 = cy[i - 1];`
Packit	5c3484	`s0 = r - c0;`
Packit	5c3484	`rp[i] = s0;`
Packit	5c3484	`c0 = (s0 & ~r) >> 63;`
Packit	5c3484	`more_carries += c0;`
Packit	5c3484	`}`
Packit	5c3484	`/* If that second loop generated carry, handle that in scalar loop. */`
Packit	5c3484	`if (more_carries)`
Packit	5c3484	`{`
Packit	5c3484	`mp_limb_t cyrec = 0;`
Packit	5c3484	`/* Look for places where rp[k] == ~0 and cy[k-1] == 1 or`
Packit	5c3484	`rp[k] == ~1 and cy[k-1] == 2.`
Packit	5c3484	`These are where we got a recurrency carry. */`
Packit	5c3484	`for (i = 1; i < n; i++)`
Packit	5c3484	`{`
Packit	5c3484	`r = rp[i];`
Packit	5c3484	`c0 = ~r < cy[i - 1];`
Packit	5c3484	`s0 = r - cyrec;`
Packit	5c3484	`rp[i] = s0;`
Packit	5c3484	`c1 = (s0 & ~r) >> 63;`
Packit	5c3484	`cyrec = c0 \| c1;`
Packit	5c3484	`}`
Packit	5c3484	`return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1];`
Packit	5c3484	`}`
Packit	5c3484
Packit	5c3484	`return _int_mult_upper (up[n - 1], vl) + cy[n - 1];`
Packit	5c3484	`}`

source-git / gmp

Source Code

Blame mpn/cray/ieee/submul_1.c