|
Packit |
5c3484 |
dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of either:
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
dnl Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
dnl option) any later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
dnl later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or both in parallel, as here.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
dnl for more details.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
dnl see https://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C cycles/limb
|
|
Packit |
5c3484 |
C POWER3/PPC630 ?
|
|
Packit |
5c3484 |
C POWER4/PPC970 ?
|
|
Packit |
5c3484 |
C POWER5 2.25
|
|
Packit |
5c3484 |
C POWER6 4
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C TODO
|
|
Packit |
5c3484 |
C * Micro-optimise header code
|
|
Packit |
5c3484 |
C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236
|
|
Packit |
5c3484 |
C bytes, 4-way code would become about 50% larger.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C INPUT PARAMETERS
|
|
Packit |
5c3484 |
define(`rp_param', `r3')
|
|
Packit |
5c3484 |
define(`up', `r4')
|
|
Packit |
5c3484 |
define(`n', `r5')
|
|
Packit |
5c3484 |
define(`cnt', `r6')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`tnc',`r0')
|
|
Packit |
5c3484 |
define(`retval',`r3')
|
|
Packit |
5c3484 |
define(`rp', `r7')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASM_START()
|
|
Packit |
5c3484 |
PROLOGUE(mpn_lshiftc,toc)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ifdef(`HAVE_ABI_mode32',`
|
|
Packit |
5c3484 |
rldicl n, n, 0,32 C FIXME: avoid this zero extend
|
|
Packit |
5c3484 |
')
|
|
Packit |
5c3484 |
mflr r12
|
|
Packit |
5c3484 |
sldi r8, n, 3
|
|
Packit |
5c3484 |
sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
|
|
Packit |
5c3484 |
LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1)
|
|
Packit |
5c3484 |
add up, up, r8 C make up point at end of up[]
|
|
Packit |
5c3484 |
add r11, r11, r10 C address of L(oN) for N = cnt
|
|
Packit |
5c3484 |
srdi r10, n, 1
|
|
Packit |
5c3484 |
add rp, rp_param, r8 C make rp point at end of rp[]
|
|
Packit |
5c3484 |
subfic tnc, cnt, 64
|
|
Packit |
5c3484 |
rlwinm. r8, n, 0,31,31 C extract bit 0
|
|
Packit |
5c3484 |
mtctr r10
|
|
Packit |
5c3484 |
beq L(evn)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(odd): ld r9, -8(up)
|
|
Packit |
5c3484 |
cmpdi cr0, n, 1 C n = 1?
|
|
Packit |
5c3484 |
beq L(1)
|
|
Packit |
5c3484 |
ld r8, -16(up)
|
|
Packit |
5c3484 |
addi r11, r11, -88 C L(o1) - L(e1) - 64
|
|
Packit |
5c3484 |
mtlr r11
|
|
Packit |
5c3484 |
srd r3, r9, tnc C retval
|
|
Packit |
5c3484 |
addi up, up, 8
|
|
Packit |
5c3484 |
addi rp, rp, -8
|
|
Packit |
5c3484 |
blr C branch to L(oN)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(evn): ld r8, -8(up)
|
|
Packit |
5c3484 |
ld r9, -16(up)
|
|
Packit |
5c3484 |
addi r11, r11, -64
|
|
Packit |
5c3484 |
mtlr r11
|
|
Packit |
5c3484 |
srd r3, r8, tnc C retval
|
|
Packit |
5c3484 |
blr C branch to L(eN)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(1): srd r3, r9, tnc C retval
|
|
Packit |
5c3484 |
sld r8, r9, cnt
|
|
Packit |
5c3484 |
nor r8, r8, r8
|
|
Packit |
5c3484 |
std r8, -8(rp)
|
|
Packit |
5c3484 |
mtlr r12
|
|
Packit |
5c3484 |
ifdef(`HAVE_ABI_mode32',
|
|
Packit |
5c3484 |
` mr r4, r3
|
|
Packit |
5c3484 |
srdi r3, r3, 32
|
|
Packit |
5c3484 |
')
|
|
Packit |
5c3484 |
blr
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(SHIFT,`
|
|
Packit |
5c3484 |
L(lo$1):ld r8, -24(up)
|
|
Packit |
5c3484 |
nor r11, r11, r11
|
|
Packit |
5c3484 |
std r11, -8(rp)
|
|
Packit |
5c3484 |
addi rp, rp, -16
|
|
Packit |
5c3484 |
L(o$1): srdi r10, r8, eval(64-$1)
|
|
Packit |
5c3484 |
rldimi r10, r9, $1, 0
|
|
Packit |
5c3484 |
ld r9, -32(up)
|
|
Packit |
5c3484 |
addi up, up, -16
|
|
Packit |
5c3484 |
nor r10, r10, r10
|
|
Packit |
5c3484 |
std r10, 0(rp)
|
|
Packit |
5c3484 |
L(e$1): srdi r11, r9, eval(64-$1)
|
|
Packit |
5c3484 |
rldimi r11, r8, $1, 0
|
|
Packit |
5c3484 |
bdnz L(lo$1)
|
|
Packit |
5c3484 |
sldi r10, r9, $1
|
|
Packit |
5c3484 |
b L(com)
|
|
Packit |
5c3484 |
nop
|
|
Packit |
5c3484 |
')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ALIGN(64)
|
|
Packit |
5c3484 |
forloop(`i',1,63,`SHIFT(i)')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(com): nor r11, r11, r11
|
|
Packit |
5c3484 |
nor r10, r10, r10
|
|
Packit |
5c3484 |
std r11, -8(rp)
|
|
Packit |
5c3484 |
std r10, -16(rp)
|
|
Packit |
5c3484 |
mtlr r12
|
|
Packit |
5c3484 |
ifdef(`HAVE_ABI_mode32',
|
|
Packit |
5c3484 |
` mr r4, r3
|
|
Packit |
5c3484 |
srdi r3, r3, 32
|
|
Packit |
5c3484 |
')
|
|
Packit |
5c3484 |
blr
|
|
Packit |
5c3484 |
EPILOGUE()
|
|
Packit |
5c3484 |
ASM_END()
|