|
Packit |
5c3484 |
All current (2001) S/390 and z/Architecture machines are single-issue,
|
|
Packit |
5c3484 |
but some newer machines have a deep pipeline. Software-pipelining is
|
|
Packit |
5c3484 |
therefore beneficial.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* mpn_add_n, mpn_sub_n: Use code along the lines below. Two-way unrolling
|
|
Packit |
5c3484 |
would be adequate.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mp_limb_t
|
|
Packit |
5c3484 |
mpn_add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
mp_limb_t a, b, r, cy;
|
|
Packit |
5c3484 |
mp_size_t i;
|
|
Packit |
5c3484 |
mp_limb_t mm = -1;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
cy = 0;
|
|
Packit |
5c3484 |
up += n;
|
|
Packit |
5c3484 |
vp += n;
|
|
Packit |
5c3484 |
rp += n;
|
|
Packit |
5c3484 |
i = -n;
|
|
Packit |
5c3484 |
do
|
|
Packit |
5c3484 |
{
|
|
Packit |
5c3484 |
a = up[i];
|
|
Packit |
5c3484 |
b = vp[i];
|
|
Packit |
5c3484 |
r = a + b + cy;
|
|
Packit |
5c3484 |
rp[i] = r;
|
|
Packit |
5c3484 |
cy = (((a & b) | ((a | b) & (r ^ mm)))) >> 31;
|
|
Packit |
5c3484 |
i++;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
while (i < 0);
|
|
Packit |
5c3484 |
return cy;
|
|
Packit |
5c3484 |
}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* mpn_lshift, mpn_rshift: Use SLDL/SRDL, and two-way unrolling.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
* mpn_mul_1, mpn_addmul_1, mpn_submul_1: For machines with just signed
|
|
Packit |
5c3484 |
multiply (MR), use two loops, similar to the corresponding VAX or
|
|
Packit |
5c3484 |
POWER functions. Handle carry like for mpn_add_n.
|