|
Packit |
5c3484 |
dnl SPARC v9 mpn_mul_1 for T3/T4/T5.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Contributed to the GNU project by David Miller and Torbjörn Granlund.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 2013 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of either:
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
dnl Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
dnl option) any later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
dnl later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or both in parallel, as here.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
dnl for more details.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
dnl see https://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C cycles/limb
|
|
Packit |
5c3484 |
C UltraSPARC T3: 23
|
|
Packit |
5c3484 |
C UltraSPARC T4: 3
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C INPUT PARAMETERS
|
|
Packit |
5c3484 |
define(`rp', `%i0')
|
|
Packit |
5c3484 |
define(`up', `%i1')
|
|
Packit |
5c3484 |
define(`n', `%i2')
|
|
Packit |
5c3484 |
define(`v0', `%i3')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASM_START()
|
|
Packit |
5c3484 |
REGISTER(%g2,#scratch)
|
|
Packit |
5c3484 |
REGISTER(%g3,#scratch)
|
|
Packit |
5c3484 |
PROLOGUE(mpn_mul_1)
|
|
Packit |
5c3484 |
save %sp, -176, %sp
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
and n, 3, %g5
|
|
Packit |
5c3484 |
add n, -4, n
|
|
Packit |
5c3484 |
brz %g5, L(b0)
|
|
Packit |
5c3484 |
cmp %g5, 2
|
|
Packit |
5c3484 |
bcs %xcc, L(b1)
|
|
Packit |
5c3484 |
nop
|
|
Packit |
5c3484 |
be %xcc, L(b2)
|
|
Packit |
5c3484 |
nop
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(b3): addcc %g0, %g0, %i5
|
|
Packit |
5c3484 |
ldx [up+0], %l0
|
|
Packit |
5c3484 |
ldx [up+8], %l1
|
|
Packit |
5c3484 |
ldx [up+16], %l2
|
|
Packit |
5c3484 |
mulx %l0, v0, %o0
|
|
Packit |
5c3484 |
umulxhi(%l0, v0, %o1)
|
|
Packit |
5c3484 |
brgz n, L(gt3)
|
|
Packit |
5c3484 |
add rp, -8, rp
|
|
Packit |
5c3484 |
mulx %l1, v0, %o2
|
|
Packit |
5c3484 |
umulxhi(%l1, v0, %o3)
|
|
Packit |
5c3484 |
b L(wd3)
|
|
Packit |
5c3484 |
nop
|
|
Packit |
5c3484 |
L(gt3): ldx [up+24], %l3
|
|
Packit |
5c3484 |
mulx %l1, v0, %o2
|
|
Packit |
5c3484 |
umulxhi(%l1, v0, %o3)
|
|
Packit |
5c3484 |
add up, 24, up
|
|
Packit |
5c3484 |
b L(lo3)
|
|
Packit |
5c3484 |
add n, -3, n
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(b2): addcc %g0, %g0, %o1
|
|
Packit |
5c3484 |
ldx [up+0], %l1
|
|
Packit |
5c3484 |
ldx [up+8], %l2
|
|
Packit |
5c3484 |
brgz n, L(gt2)
|
|
Packit |
5c3484 |
add rp, -16, rp
|
|
Packit |
5c3484 |
mulx %l1, v0, %o2
|
|
Packit |
5c3484 |
umulxhi(%l1, v0, %o3)
|
|
Packit |
5c3484 |
mulx %l2, v0, %o4
|
|
Packit |
5c3484 |
umulxhi(%l2, v0, %o5)
|
|
Packit |
5c3484 |
b L(wd2)
|
|
Packit |
5c3484 |
nop
|
|
Packit |
5c3484 |
L(gt2): ldx [up+16], %l3
|
|
Packit |
5c3484 |
mulx %l1, v0, %o2
|
|
Packit |
5c3484 |
umulxhi(%l1, v0, %o3)
|
|
Packit |
5c3484 |
ldx [up+24], %l0
|
|
Packit |
5c3484 |
mulx %l2, v0, %o4
|
|
Packit |
5c3484 |
umulxhi(%l2, v0, %o5)
|
|
Packit |
5c3484 |
add up, 16, up
|
|
Packit |
5c3484 |
b L(lo2)
|
|
Packit |
5c3484 |
add n, -2, n
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(b1): addcc %g0, %g0, %o3
|
|
Packit |
5c3484 |
ldx [up+0], %l2
|
|
Packit |
5c3484 |
brgz n, L(gt1)
|
|
Packit |
5c3484 |
nop
|
|
Packit |
5c3484 |
mulx %l2, v0, %o4
|
|
Packit |
5c3484 |
stx %o4, [rp+0]
|
|
Packit |
5c3484 |
umulxhi(%l2, v0, %i0)
|
|
Packit |
5c3484 |
ret
|
|
Packit |
5c3484 |
restore
|
|
Packit |
5c3484 |
L(gt1): ldx [up+8], %l3
|
|
Packit |
5c3484 |
ldx [up+16], %l0
|
|
Packit |
5c3484 |
mulx %l2, v0, %o4
|
|
Packit |
5c3484 |
umulxhi(%l2, v0, %o5)
|
|
Packit |
5c3484 |
ldx [up+24], %l1
|
|
Packit |
5c3484 |
mulx %l3, v0, %i4
|
|
Packit |
5c3484 |
umulxhi(%l3, v0, %i5)
|
|
Packit |
5c3484 |
add rp, -24, rp
|
|
Packit |
5c3484 |
add up, 8, up
|
|
Packit |
5c3484 |
b L(lo1)
|
|
Packit |
5c3484 |
add n, -1, n
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(b0): addcc %g0, %g0, %o5
|
|
Packit |
5c3484 |
ldx [up+0], %l3
|
|
Packit |
5c3484 |
ldx [up+8], %l0
|
|
Packit |
5c3484 |
ldx [up+16], %l1
|
|
Packit |
5c3484 |
mulx %l3, v0, %i4
|
|
Packit |
5c3484 |
umulxhi(%l3, v0, %i5)
|
|
Packit |
5c3484 |
ldx [up+24], %l2
|
|
Packit |
5c3484 |
mulx %l0, v0, %o0
|
|
Packit |
5c3484 |
umulxhi(%l0, v0, %o1)
|
|
Packit |
5c3484 |
b L(lo0)
|
|
Packit |
5c3484 |
nop
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ALIGN(16)
|
|
Packit |
5c3484 |
L(top): ldx [up+0], %l3 C 0
|
|
Packit |
5c3484 |
addxccc(%i4, %o5, %i4) C 0
|
|
Packit |
5c3484 |
mulx %l1, v0, %o2 C 1
|
|
Packit |
5c3484 |
stx %i4, [rp+0] C 1
|
|
Packit |
5c3484 |
umulxhi(%l1, v0, %o3) C 2
|
|
Packit |
5c3484 |
L(lo3): ldx [up+8], %l0 C 2
|
|
Packit |
5c3484 |
addxccc(%o0, %i5, %o0) C 3
|
|
Packit |
5c3484 |
mulx %l2, v0, %o4 C 3
|
|
Packit |
5c3484 |
stx %o0, [rp+8] C 4
|
|
Packit |
5c3484 |
umulxhi(%l2, v0, %o5) C 4
|
|
Packit |
5c3484 |
L(lo2): ldx [up+16], %l1 C 5
|
|
Packit |
5c3484 |
addxccc(%o2, %o1, %o2) C 5
|
|
Packit |
5c3484 |
mulx %l3, v0, %i4 C 6
|
|
Packit |
5c3484 |
stx %o2, [rp+16] C 6
|
|
Packit |
5c3484 |
umulxhi(%l3, v0, %i5) C 7
|
|
Packit |
5c3484 |
L(lo1): ldx [up+24], %l2 C 7
|
|
Packit |
5c3484 |
addxccc(%o4, %o3, %o4) C 8
|
|
Packit |
5c3484 |
mulx %l0, v0, %o0 C 8
|
|
Packit |
5c3484 |
stx %o4, [rp+24] C 9
|
|
Packit |
5c3484 |
umulxhi(%l0, v0, %o1) C 9
|
|
Packit |
5c3484 |
add rp, 32, rp C 10
|
|
Packit |
5c3484 |
L(lo0): add up, 32, up C 10
|
|
Packit |
5c3484 |
brgz n, L(top) C 11
|
|
Packit |
5c3484 |
add n, -4, n C 11
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(end): addxccc(%i4, %o5, %i4)
|
|
Packit |
5c3484 |
mulx %l1, v0, %o2
|
|
Packit |
5c3484 |
stx %i4, [rp+0]
|
|
Packit |
5c3484 |
umulxhi(%l1, v0, %o3)
|
|
Packit |
5c3484 |
addxccc(%o0, %i5, %o0)
|
|
Packit |
5c3484 |
L(wd3): mulx %l2, v0, %o4
|
|
Packit |
5c3484 |
stx %o0, [rp+8]
|
|
Packit |
5c3484 |
umulxhi(%l2, v0, %o5)
|
|
Packit |
5c3484 |
addxccc(%o2, %o1, %o2)
|
|
Packit |
5c3484 |
L(wd2): stx %o2, [rp+16]
|
|
Packit |
5c3484 |
addxccc(%o4, %o3, %o4)
|
|
Packit |
5c3484 |
stx %o4, [rp+24]
|
|
Packit |
5c3484 |
addxc( %g0, %o5, %i0)
|
|
Packit |
5c3484 |
ret
|
|
Packit |
5c3484 |
restore
|
|
Packit |
5c3484 |
EPILOGUE()
|