|
Packit |
5c3484 |
dnl Alpha ev6 nails mpn_addmul_2.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of either:
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
dnl Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
dnl option) any later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
dnl later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or both in parallel, as here.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
dnl for more details.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
dnl see https://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C Runs at 4.0 cycles/limb.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C We could either go for 2-way unrolling over 11 cycles, or 2.75 c/l,
|
|
Packit |
5c3484 |
C or 4-way unrolling over 20 cycles, for 2.5 c/l.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C INPUT PARAMETERS
|
|
Packit |
5c3484 |
define(`rp',`r16')
|
|
Packit |
5c3484 |
define(`up',`r17')
|
|
Packit |
5c3484 |
define(`n',`r18')
|
|
Packit |
5c3484 |
define(`vp',`r19')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C Useful register aliases
|
|
Packit |
5c3484 |
define(`numb_mask',`r24')
|
|
Packit |
5c3484 |
define(`ulimb',`r25')
|
|
Packit |
5c3484 |
define(`rlimb',`r27')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`m0a',`r0')
|
|
Packit |
5c3484 |
define(`m0b',`r1')
|
|
Packit |
5c3484 |
define(`m1a',`r2')
|
|
Packit |
5c3484 |
define(`m1b',`r3')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`acc0',`r4')
|
|
Packit |
5c3484 |
define(`acc1',`r5')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`v0',`r6')
|
|
Packit |
5c3484 |
define(`v1',`r7')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C Used for temps: r8 r19 r28
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`NAIL_BITS',`GMP_NAIL_BITS')
|
|
Packit |
5c3484 |
define(`NUMB_BITS',`GMP_NUMB_BITS')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C This declaration is munged by configure
|
|
Packit |
5c3484 |
NAILS_SUPPORT(3-63)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASM_START()
|
|
Packit |
5c3484 |
PROLOGUE(mpn_addmul_2)
|
|
Packit |
5c3484 |
lda numb_mask,-1(r31)
|
|
Packit |
5c3484 |
srl numb_mask,NAIL_BITS,numb_mask
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ldq v0, 0(vp)
|
|
Packit |
5c3484 |
ldq v1, 8(vp)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
bis r31, r31, acc0 C zero acc0
|
|
Packit |
5c3484 |
sll v0,NAIL_BITS, v0
|
|
Packit |
5c3484 |
bis r31, r31, acc1 C zero acc1
|
|
Packit |
5c3484 |
sll v1,NAIL_BITS, v1
|
|
Packit |
5c3484 |
bis r31, r31, r19
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ldq ulimb, 0(up)
|
|
Packit |
5c3484 |
lda up, 8(up)
|
|
Packit |
5c3484 |
mulq v0, ulimb, m0a C U1
|
|
Packit |
5c3484 |
umulh v0, ulimb, m0b C U1
|
|
Packit |
5c3484 |
mulq v1, ulimb, m1a C U1
|
|
Packit |
5c3484 |
umulh v1, ulimb, m1b C U1
|
|
Packit |
5c3484 |
lda n, -1(n)
|
|
Packit |
5c3484 |
beq n, L(end) C U0
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ALIGN(16)
|
|
Packit |
5c3484 |
L(top): bis r31, r31, r31 C U1 nop
|
|
Packit |
5c3484 |
addq r19, acc0, acc0 C U0 propagate nail
|
|
Packit |
5c3484 |
ldq rlimb, 0(rp) C L0
|
|
Packit |
5c3484 |
ldq ulimb, 0(up) C L1
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
lda rp, 8(rp) C L1
|
|
Packit |
5c3484 |
srl m0a,NAIL_BITS, r8 C U0
|
|
Packit |
5c3484 |
lda up, 8(up) C L0
|
|
Packit |
5c3484 |
mulq v0, ulimb, m0a C U1
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
addq r8, acc0, r19 C U0
|
|
Packit |
5c3484 |
addq m0b, acc1, acc0 C L1
|
|
Packit |
5c3484 |
umulh v0, ulimb, m0b C U1
|
|
Packit |
5c3484 |
bis r31, r31, r31 C L0 nop
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
addq rlimb, r19, r19 C L1 FINAL PROD-SUM
|
|
Packit |
5c3484 |
srl m1a,NAIL_BITS, r8 C U0
|
|
Packit |
5c3484 |
lda n, -1(n) C L0
|
|
Packit |
5c3484 |
mulq v1, ulimb, m1a C U1
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
addq r8, acc0, acc0 C U0
|
|
Packit |
5c3484 |
bis r31, m1b, acc1 C L1
|
|
Packit |
5c3484 |
umulh v1, ulimb, m1b C U1
|
|
Packit |
5c3484 |
and r19,numb_mask, r28 C L0 extract numb part
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
unop
|
|
Packit |
5c3484 |
srl r19,NUMB_BITS, r19 C U1 extract nail part
|
|
Packit |
5c3484 |
stq r28, -8(rp) C L1
|
|
Packit |
5c3484 |
bne n, L(top) C U0
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(end): ldq rlimb, 0(rp)
|
|
Packit |
5c3484 |
addq r19, acc0, acc0 C propagate nail
|
|
Packit |
5c3484 |
lda rp, 8(rp)
|
|
Packit |
5c3484 |
srl m0a,NAIL_BITS, r8 C U0
|
|
Packit |
5c3484 |
addq r8, acc0, r19
|
|
Packit |
5c3484 |
addq m0b, acc1, acc0
|
|
Packit |
5c3484 |
addq rlimb, r19, r19
|
|
Packit |
5c3484 |
srl m1a,NAIL_BITS, r8 C U0
|
|
Packit |
5c3484 |
addq r8, acc0, acc0
|
|
Packit |
5c3484 |
bis r31, m1b, acc1
|
|
Packit |
5c3484 |
and r19,numb_mask, r28 C extract limb
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
srl r19,NUMB_BITS, r19 C extract nail
|
|
Packit |
5c3484 |
stq r28, -8(rp)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
addq r19, acc0, acc0 C propagate nail
|
|
Packit |
5c3484 |
and acc0,numb_mask, r28
|
|
Packit |
5c3484 |
stq r28, 0(rp)
|
|
Packit |
5c3484 |
srl acc0,NUMB_BITS, r19
|
|
Packit |
5c3484 |
addq r19, acc1, r0
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ret r31, (r26), 1
|
|
Packit |
5c3484 |
EPILOGUE()
|
|
Packit |
5c3484 |
ASM_END()
|