|
Packit |
5c3484 |
dnl IA-64 mpn_add_n_sub_n -- mpn parallel addition and subtraction.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Contributed to the GNU project by Torbjorn Granlund.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 2010 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of either:
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
dnl Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
dnl option) any later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
dnl later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or both in parallel, as here.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
dnl for more details.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
dnl see https://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C cycles/limb
|
|
Packit |
5c3484 |
C Itanium: ?
|
|
Packit |
5c3484 |
C Itanium 2: 2.25
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C INPUT PARAMETERS
|
|
Packit |
5c3484 |
define(`sp', `r32')
|
|
Packit |
5c3484 |
define(`dp', `r33')
|
|
Packit |
5c3484 |
define(`up', `r34')
|
|
Packit |
5c3484 |
define(`vp', `r35')
|
|
Packit |
5c3484 |
define(`n', `r36')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C Some useful aliases for registers we use
|
|
Packit |
5c3484 |
define(`u0',`r16') define(`u1',`r17') define(`u2',`r18') define(`u3',`r19')
|
|
Packit |
5c3484 |
define(`v0',`r20') define(`v1',`r21') define(`v2',`r22') define(`v3',`r23')
|
|
Packit |
5c3484 |
define(`s0',`r24') define(`s1',`r25') define(`s2',`r26') define(`s3',`r27')
|
|
Packit |
5c3484 |
define(`d0',`r28') define(`d1',`r29') define(`d2',`r30') define(`d3',`r31')
|
|
Packit |
5c3484 |
define(`up0',`up')
|
|
Packit |
5c3484 |
define(`up1',`r14')
|
|
Packit |
5c3484 |
define(`vp0',`vp')
|
|
Packit |
5c3484 |
define(`vp1',`r15')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASM_START()
|
|
Packit |
5c3484 |
PROLOGUE(mpn_add_n_sub_n)
|
|
Packit |
5c3484 |
.prologue
|
|
Packit |
5c3484 |
.save ar.lc, r2
|
|
Packit |
5c3484 |
.body
|
|
Packit |
5c3484 |
ifdef(`HAVE_ABI_32',`
|
|
Packit |
5c3484 |
addp4 sp = 0, sp C M I
|
|
Packit |
5c3484 |
addp4 dp = 0, dp C M I
|
|
Packit |
5c3484 |
nop.i 0
|
|
Packit |
5c3484 |
addp4 up = 0, up C M I
|
|
Packit |
5c3484 |
addp4 vp = 0, vp C M I
|
|
Packit |
5c3484 |
zxt4 n = n C I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
and r9 = 3, n C M I
|
|
Packit |
5c3484 |
mov.i r2 = ar.lc C I0
|
|
Packit |
5c3484 |
add up1 = 8, up0 C M I
|
|
Packit |
5c3484 |
add vp1 = 8, vp0 C M I
|
|
Packit |
5c3484 |
add r8 = -2, n C M I
|
|
Packit |
5c3484 |
add r10 = 256, up C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
shr.u r8 = r8, 2 C I0
|
|
Packit |
5c3484 |
cmp.eq p10, p0 = 0, r9 C M I
|
|
Packit |
5c3484 |
cmp.eq p11, p0 = 2, r9 C M I
|
|
Packit |
5c3484 |
cmp.eq p12, p0 = 3, r9 C M I
|
|
Packit |
5c3484 |
add r11 = 256, vp C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
mov.i ar.lc = r8 C I0
|
|
Packit |
5c3484 |
(p10) br L(b0) C B
|
|
Packit |
5c3484 |
(p11) br L(b2) C B
|
|
Packit |
5c3484 |
(p12) br L(b3) C B
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(b1): ld8 u3 = [up0], 8 C M01
|
|
Packit |
5c3484 |
add up1 = 8, up1 C M I
|
|
Packit |
5c3484 |
cmpltu p14, p15 = 4, n C M I
|
|
Packit |
5c3484 |
ld8 v3 = [vp0], 8 C M01
|
|
Packit |
5c3484 |
add vp1 = 8, vp1 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
add s3 = u3, v3 C M I
|
|
Packit |
5c3484 |
sub d3 = u3, v3 C M I
|
|
Packit |
5c3484 |
mov r8 = 0 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
cmpltu p9, p0 = s3, v3 C carry from add3 M I
|
|
Packit |
5c3484 |
cmpltu p13, p0 = u3, v3 C borrow from sub3 M I
|
|
Packit |
5c3484 |
(p15) br L(cj1) C B
|
|
Packit |
5c3484 |
st8 [sp] = s3, 8 C M23
|
|
Packit |
5c3484 |
st8 [dp] = d3, 8 C M23
|
|
Packit |
5c3484 |
br L(c0) C B
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(b0): cmp.ne p9, p0 = r0, r0 C M I
|
|
Packit |
5c3484 |
cmp.ne p13, p0 = r0, r0 C M I
|
|
Packit |
5c3484 |
L(c0): ld8 u0 = [up0], 16 C M01
|
|
Packit |
5c3484 |
ld8 u1 = [up1], 16 C M01
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 v0 = [vp0], 16 C M01
|
|
Packit |
5c3484 |
ld8 v1 = [vp1], 16 C M01
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 u2 = [up0], 16 C M01
|
|
Packit |
5c3484 |
ld8 u3 = [up1], 16 C M01
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 v2 = [vp0], 16 C M01
|
|
Packit |
5c3484 |
ld8 v3 = [vp1], 16 C M01
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
add s0 = u0, v0 C M I
|
|
Packit |
5c3484 |
add s1 = u1, v1 C M I
|
|
Packit |
5c3484 |
sub d0 = u0, v0 C M I
|
|
Packit |
5c3484 |
sub d1 = u1, v1 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
cmpltu p6, p0 = s0, v0 C carry from add0 M I
|
|
Packit |
5c3484 |
cmpltu p7, p0 = s1, v1 C carry from add1 M I
|
|
Packit |
5c3484 |
cmpltu p10, p0 = u0, v0 C borrow from sub0 M I
|
|
Packit |
5c3484 |
cmpltu p11, p0 = u1, v1 C borrow from sub1 M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
nop 0 C
|
|
Packit |
5c3484 |
br.cloop.dptk L(top) C B
|
|
Packit |
5c3484 |
br L(end) C B
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(b3): ld8 u1 = [up0], 8 C M01
|
|
Packit |
5c3484 |
add up1 = 8, up1 C M I
|
|
Packit |
5c3484 |
ld8 v1 = [vp0], 8 C M01
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
add vp1 = 8, vp1 C M I
|
|
Packit |
5c3484 |
add s1 = u1, v1 C M I
|
|
Packit |
5c3484 |
sub d1 = u1, v1 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
cmpltu p7, p0 = s1, v1 C carry from add1 M I
|
|
Packit |
5c3484 |
cmpltu p11, p0 = u1, v1 C borrow from sub1 M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
st8 [sp] = s1, 8 C M23
|
|
Packit |
5c3484 |
st8 [dp] = d1, 8 C M23
|
|
Packit |
5c3484 |
br L(c2) C B
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ALIGN(32)
|
|
Packit |
5c3484 |
L(b2): cmp.ne p7, p0 = r0, r0 C M I
|
|
Packit |
5c3484 |
cmp.ne p11, p0 = r0, r0 C M I
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
L(c2): ld8 u2 = [up0], 16 C M01
|
|
Packit |
5c3484 |
ld8 u3 = [up1], 16 C M01
|
|
Packit |
5c3484 |
cmpltu p14, p0 = 4, n C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 v2 = [vp0], 16 C M01
|
|
Packit |
5c3484 |
ld8 v3 = [vp1], 16 C M01
|
|
Packit |
5c3484 |
(p14) br L(gt4) C B
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
add s2 = u2, v2 C M I
|
|
Packit |
5c3484 |
add s3 = u3, v3 C M I
|
|
Packit |
5c3484 |
sub d2 = u2, v2 C M I
|
|
Packit |
5c3484 |
sub d3 = u3, v3 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
cmpltu p8, p0 = s2, v2 C carry from add0 M I
|
|
Packit |
5c3484 |
cmpltu p9, p0 = s3, v3 C carry from add3 M I
|
|
Packit |
5c3484 |
cmpltu p12, p0 = u2, v2 C borrow from sub2 M I
|
|
Packit |
5c3484 |
cmpltu p13, p0 = u3, v3 C borrow from sub3 M I
|
|
Packit |
5c3484 |
br L(cj2) C B
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
L(gt4): ld8 u0 = [up0], 16 C M01
|
|
Packit |
5c3484 |
ld8 u1 = [up1], 16 C M01
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 v0 = [vp0], 16 C M01
|
|
Packit |
5c3484 |
ld8 v1 = [vp1], 16 C M01
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
add s2 = u2, v2 C M I
|
|
Packit |
5c3484 |
add s3 = u3, v3 C M I
|
|
Packit |
5c3484 |
sub d2 = u2, v2 C M I
|
|
Packit |
5c3484 |
sub d3 = u3, v3 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
cmpltu p8, p0 = s2, v2 C carry from add0 M I
|
|
Packit |
5c3484 |
cmpltu p9, p0 = s3, v3 C carry from add1 M I
|
|
Packit |
5c3484 |
cmpltu p12, p0 = u2, v2 C borrow from sub0 M I
|
|
Packit |
5c3484 |
cmpltu p13, p0 = u3, v3 C borrow from sub1 M I
|
|
Packit |
5c3484 |
br.cloop.dptk L(mid) C B
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ALIGN(32)
|
|
Packit |
5c3484 |
L(top):
|
|
Packit |
5c3484 |
ld8 u0 = [up0], 16 C M01
|
|
Packit |
5c3484 |
ld8 u1 = [up1], 16 C M01
|
|
Packit |
5c3484 |
(p9) cmpeqor p6, p0 = -1, s0 C M I
|
|
Packit |
5c3484 |
(p9) add s0 = 1, s0 C M I
|
|
Packit |
5c3484 |
(p13) cmpeqor p10, p0 = 0, d0 C M I
|
|
Packit |
5c3484 |
(p13) add d0 = -1, d0 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 v0 = [vp0], 16 C M01
|
|
Packit |
5c3484 |
ld8 v1 = [vp1], 16 C M01
|
|
Packit |
5c3484 |
(p6) cmpeqor p7, p0 = -1, s1 C M I
|
|
Packit |
5c3484 |
(p6) add s1 = 1, s1 C M I
|
|
Packit |
5c3484 |
(p10) cmpeqor p11, p0 = 0, d1 C M I
|
|
Packit |
5c3484 |
(p10) add d1 = -1, d1 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
st8 [sp] = s0, 8 C M23
|
|
Packit |
5c3484 |
st8 [dp] = d0, 8 C M23
|
|
Packit |
5c3484 |
add s2 = u2, v2 C M I
|
|
Packit |
5c3484 |
add s3 = u3, v3 C M I
|
|
Packit |
5c3484 |
sub d2 = u2, v2 C M I
|
|
Packit |
5c3484 |
sub d3 = u3, v3 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
st8 [sp] = s1, 8 C M23
|
|
Packit |
5c3484 |
st8 [dp] = d1, 8 C M23
|
|
Packit |
5c3484 |
cmpltu p8, p0 = s2, v2 C carry from add2 M I
|
|
Packit |
5c3484 |
cmpltu p9, p0 = s3, v3 C carry from add3 M I
|
|
Packit |
5c3484 |
cmpltu p12, p0 = u2, v2 C borrow from sub2 M I
|
|
Packit |
5c3484 |
cmpltu p13, p0 = u3, v3 C borrow from sub3 M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
L(mid):
|
|
Packit |
5c3484 |
ld8 u2 = [up0], 16 C M01
|
|
Packit |
5c3484 |
ld8 u3 = [up1], 16 C M01
|
|
Packit |
5c3484 |
(p7) cmpeqor p8, p0 = -1, s2 C M I
|
|
Packit |
5c3484 |
(p7) add s2 = 1, s2 C M I
|
|
Packit |
5c3484 |
(p11) cmpeqor p12, p0 = 0, d2 C M I
|
|
Packit |
5c3484 |
(p11) add d2 = -1, d2 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
ld8 v2 = [vp0], 16 C M01
|
|
Packit |
5c3484 |
ld8 v3 = [vp1], 16 C M01
|
|
Packit |
5c3484 |
(p8) cmpeqor p9, p0 = -1, s3 C M I
|
|
Packit |
5c3484 |
(p8) add s3 = 1, s3 C M I
|
|
Packit |
5c3484 |
(p12) cmpeqor p13, p0 = 0, d3 C M I
|
|
Packit |
5c3484 |
(p12) add d3 = -1, d3 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
st8 [sp] = s2, 8 C M23
|
|
Packit |
5c3484 |
st8 [dp] = d2, 8 C M23
|
|
Packit |
5c3484 |
add s0 = u0, v0 C M I
|
|
Packit |
5c3484 |
add s1 = u1, v1 C M I
|
|
Packit |
5c3484 |
sub d0 = u0, v0 C M I
|
|
Packit |
5c3484 |
sub d1 = u1, v1 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
st8 [sp] = s3, 8 C M23
|
|
Packit |
5c3484 |
st8 [dp] = d3, 8 C M23
|
|
Packit |
5c3484 |
cmpltu p6, p0 = s0, v0 C carry from add0 M I
|
|
Packit |
5c3484 |
cmpltu p7, p0 = s1, v1 C carry from add1 M I
|
|
Packit |
5c3484 |
cmpltu p10, p0 = u0, v0 C borrow from sub0 M I
|
|
Packit |
5c3484 |
cmpltu p11, p0 = u1, v1 C borrow from sub1 M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
lfetch [r10], 32 C M?
|
|
Packit |
5c3484 |
lfetch [r11], 32 C M?
|
|
Packit |
5c3484 |
br.cloop.dptk L(top) C B
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(end):
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
(p9) cmpeqor p6, p0 = -1, s0 C M I
|
|
Packit |
5c3484 |
(p9) add s0 = 1, s0 C M I
|
|
Packit |
5c3484 |
(p13) cmpeqor p10, p0 = 0, d0 C M I
|
|
Packit |
5c3484 |
(p13) add d0 = -1, d0 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
(p6) cmpeqor p7, p0 = -1, s1 C M I
|
|
Packit |
5c3484 |
(p6) add s1 = 1, s1 C M I
|
|
Packit |
5c3484 |
(p10) cmpeqor p11, p0 = 0, d1 C M I
|
|
Packit |
5c3484 |
(p10) add d1 = -1, d1 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
st8 [sp] = s0, 8 C M23
|
|
Packit |
5c3484 |
st8 [dp] = d0, 8 C M23
|
|
Packit |
5c3484 |
add s2 = u2, v2 C M I
|
|
Packit |
5c3484 |
add s3 = u3, v3 C M I
|
|
Packit |
5c3484 |
sub d2 = u2, v2 C M I
|
|
Packit |
5c3484 |
sub d3 = u3, v3 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
st8 [sp] = s1, 8 C M23
|
|
Packit |
5c3484 |
st8 [dp] = d1, 8 C M23
|
|
Packit |
5c3484 |
cmpltu p8, p0 = s2, v2 C carry from add2 M I
|
|
Packit |
5c3484 |
cmpltu p9, p0 = s3, v3 C carry from add3 M I
|
|
Packit |
5c3484 |
cmpltu p12, p0 = u2, v2 C borrow from sub2 M I
|
|
Packit |
5c3484 |
cmpltu p13, p0 = u3, v3 C borrow from sub3 M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
L(cj2):
|
|
Packit |
5c3484 |
(p7) cmpeqor p8, p0 = -1, s2 C M I
|
|
Packit |
5c3484 |
(p7) add s2 = 1, s2 C M I
|
|
Packit |
5c3484 |
(p11) cmpeqor p12, p0 = 0, d2 C M I
|
|
Packit |
5c3484 |
(p11) add d2 = -1, d2 C M I
|
|
Packit |
5c3484 |
mov r8 = 0 C M I
|
|
Packit |
5c3484 |
nop 0
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
st8 [sp] = s2, 8 C M23
|
|
Packit |
5c3484 |
st8 [dp] = d2, 8 C M23
|
|
Packit |
5c3484 |
(p8) cmpeqor p9, p0 = -1, s3 C M I
|
|
Packit |
5c3484 |
(p8) add s3 = 1, s3 C M I
|
|
Packit |
5c3484 |
(p12) cmpeqor p13, p0 = 0, d3 C M I
|
|
Packit |
5c3484 |
(p12) add d3 = -1, d3 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
L(cj1):
|
|
Packit |
5c3484 |
(p9) mov r8 = 2 C M I
|
|
Packit |
5c3484 |
;;
|
|
Packit |
5c3484 |
mov.i ar.lc = r2 C I0
|
|
Packit |
5c3484 |
(p13) add r8 = 1, r8 C M I
|
|
Packit |
5c3484 |
st8 [sp] = s3 C M23
|
|
Packit |
5c3484 |
st8 [dp] = d3 C M23
|
|
Packit |
5c3484 |
br.ret.sptk.many b0 C B
|
|
Packit |
5c3484 |
EPILOGUE()
|
|
Packit |
5c3484 |
ASM_END()
|