|
Packit |
5c3484 |
dnl PowerPC-32 mpn_rshift -- Shift a number right.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of either:
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
dnl Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
dnl option) any later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
dnl later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or both in parallel, as here.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
dnl for more details.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
dnl see https://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C cycles/limb
|
|
Packit |
5c3484 |
C 603e: ?
|
|
Packit |
5c3484 |
C 604e: 3.0
|
|
Packit |
5c3484 |
C 75x (G3): 3.0
|
|
Packit |
5c3484 |
C 7400,7410 (G4): 3.0
|
|
Packit |
5c3484 |
C 7445,7455 (G4+): 2.5
|
|
Packit |
5c3484 |
C 7447,7457 (G4+): 2.25
|
|
Packit |
5c3484 |
C power4/ppc970: 2.5
|
|
Packit |
5c3484 |
C power5: 2.5
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C INPUT PARAMETERS
|
|
Packit |
5c3484 |
C rp r3
|
|
Packit |
5c3484 |
C up r4
|
|
Packit |
5c3484 |
C n r5
|
|
Packit |
5c3484 |
C cnt r6
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASM_START()
|
|
Packit |
5c3484 |
PROLOGUE(mpn_rshift)
|
|
Packit |
5c3484 |
cmpwi cr0, r5, 30 C more than 30 limbs?
|
|
Packit |
5c3484 |
addi r7, r3, -4 C dst-4
|
|
Packit |
5c3484 |
bgt L(BIG) C branch if more than 12 limbs
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mtctr r5 C copy size into CTR
|
|
Packit |
5c3484 |
subfic r8, r6, 32
|
|
Packit |
5c3484 |
lwz r11, 0(r4) C load first s1 limb
|
|
Packit |
5c3484 |
slw r3, r11, r8 C compute function return value
|
|
Packit |
5c3484 |
bdz L(end1)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(oop): lwzu r10, 4(r4)
|
|
Packit |
5c3484 |
srw r9, r11, r6
|
|
Packit |
5c3484 |
slw r12, r10, r8
|
|
Packit |
5c3484 |
or r9, r9, r12
|
|
Packit |
5c3484 |
stwu r9, 4(r7)
|
|
Packit |
5c3484 |
bdz L(end2)
|
|
Packit |
5c3484 |
lwzu r11, 4(r4)
|
|
Packit |
5c3484 |
srw r9, r10, r6
|
|
Packit |
5c3484 |
slw r12, r11, r8
|
|
Packit |
5c3484 |
or r9, r9, r12
|
|
Packit |
5c3484 |
stwu r9, 4(r7)
|
|
Packit |
5c3484 |
bdnz L(oop)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(end1):
|
|
Packit |
5c3484 |
srw r0, r11, r6
|
|
Packit |
5c3484 |
stw r0, 4(r7)
|
|
Packit |
5c3484 |
blr
|
|
Packit |
5c3484 |
L(end2):
|
|
Packit |
5c3484 |
srw r0, r10, r6
|
|
Packit |
5c3484 |
stw r0, 4(r7)
|
|
Packit |
5c3484 |
blr
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(BIG):
|
|
Packit |
5c3484 |
stwu r1, -48(r1)
|
|
Packit |
5c3484 |
stmw r24, 8(r1) C save registers we are supposed to preserve
|
|
Packit |
5c3484 |
lwz r9, 0(r4)
|
|
Packit |
5c3484 |
subfic r8, r6, 32
|
|
Packit |
5c3484 |
slw r3, r9, r8 C compute function return value
|
|
Packit |
5c3484 |
srw r0, r9, r6
|
|
Packit |
5c3484 |
addi r5, r5, -1
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
andi. r10, r5, 3 C count for spill loop
|
|
Packit |
5c3484 |
beq L(e)
|
|
Packit |
5c3484 |
mtctr r10
|
|
Packit |
5c3484 |
lwzu r28, 4(r4)
|
|
Packit |
5c3484 |
bdz L(xe0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(loop0):
|
|
Packit |
5c3484 |
srw r12, r28, r6
|
|
Packit |
5c3484 |
slw r24, r28, r8
|
|
Packit |
5c3484 |
lwzu r28, 4(r4)
|
|
Packit |
5c3484 |
or r24, r0, r24
|
|
Packit |
5c3484 |
stwu r24, 4(r7)
|
|
Packit |
5c3484 |
mr r0, r12
|
|
Packit |
5c3484 |
bdnz L(loop0) C taken at most once!
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(xe0): srw r12, r28, r6
|
|
Packit |
5c3484 |
slw r24, r28, r8
|
|
Packit |
5c3484 |
or r24, r0, r24
|
|
Packit |
5c3484 |
stwu r24, 4(r7)
|
|
Packit |
5c3484 |
mr r0, r12
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(e): srwi r5, r5, 2 C count for unrolled loop
|
|
Packit |
5c3484 |
addi r5, r5, -1
|
|
Packit |
5c3484 |
mtctr r5
|
|
Packit |
5c3484 |
lwz r28, 4(r4)
|
|
Packit |
5c3484 |
lwz r29, 8(r4)
|
|
Packit |
5c3484 |
lwz r30, 12(r4)
|
|
Packit |
5c3484 |
lwzu r31, 16(r4)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(loopU):
|
|
Packit |
5c3484 |
srw r9, r28, r6
|
|
Packit |
5c3484 |
slw r24, r28, r8
|
|
Packit |
5c3484 |
lwz r28, 4(r4)
|
|
Packit |
5c3484 |
srw r10, r29, r6
|
|
Packit |
5c3484 |
slw r25, r29, r8
|
|
Packit |
5c3484 |
lwz r29, 8(r4)
|
|
Packit |
5c3484 |
srw r11, r30, r6
|
|
Packit |
5c3484 |
slw r26, r30, r8
|
|
Packit |
5c3484 |
lwz r30, 12(r4)
|
|
Packit |
5c3484 |
srw r12, r31, r6
|
|
Packit |
5c3484 |
slw r27, r31, r8
|
|
Packit |
5c3484 |
lwzu r31, 16(r4)
|
|
Packit |
5c3484 |
or r24, r0, r24
|
|
Packit |
5c3484 |
stw r24, 4(r7)
|
|
Packit |
5c3484 |
or r25, r9, r25
|
|
Packit |
5c3484 |
stw r25, 8(r7)
|
|
Packit |
5c3484 |
or r26, r10, r26
|
|
Packit |
5c3484 |
stw r26, 12(r7)
|
|
Packit |
5c3484 |
or r27, r11, r27
|
|
Packit |
5c3484 |
stwu r27, 16(r7)
|
|
Packit |
5c3484 |
mr r0, r12
|
|
Packit |
5c3484 |
bdnz L(loopU)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
srw r9, r28, r6
|
|
Packit |
5c3484 |
slw r24, r28, r8
|
|
Packit |
5c3484 |
srw r10, r29, r6
|
|
Packit |
5c3484 |
slw r25, r29, r8
|
|
Packit |
5c3484 |
srw r11, r30, r6
|
|
Packit |
5c3484 |
slw r26, r30, r8
|
|
Packit |
5c3484 |
srw r12, r31, r6
|
|
Packit |
5c3484 |
slw r27, r31, r8
|
|
Packit |
5c3484 |
or r24, r0, r24
|
|
Packit |
5c3484 |
stw r24, 4(r7)
|
|
Packit |
5c3484 |
or r25, r9, r25
|
|
Packit |
5c3484 |
stw r25, 8(r7)
|
|
Packit |
5c3484 |
or r26, r10, r26
|
|
Packit |
5c3484 |
stw r26, 12(r7)
|
|
Packit |
5c3484 |
or r27, r11, r27
|
|
Packit |
5c3484 |
stw r27, 16(r7)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
stw r12, 20(r7)
|
|
Packit |
5c3484 |
lmw r24, 8(r1) C restore registers
|
|
Packit |
5c3484 |
addi r1, r1, 48
|
|
Packit |
5c3484 |
blr
|
|
Packit |
5c3484 |
EPILOGUE()
|