dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc. dnl This file is part of the GNU MP Library. dnl dnl The GNU MP Library is free software; you can redistribute it and/or modify dnl it under the terms of either: dnl dnl * the GNU Lesser General Public License as published by the Free dnl Software Foundation; either version 3 of the License, or (at your dnl option) any later version. dnl dnl or dnl dnl * the GNU General Public License as published by the Free Software dnl Foundation; either version 2 of the License, or (at your option) any dnl later version. dnl dnl or both in parallel, as here. dnl dnl The GNU MP Library is distributed in the hope that it will be useful, but dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License dnl for more details. dnl dnl You should have received copies of the GNU General Public License and the dnl GNU Lesser General Public License along with the GNU MP Library. If not, dnl see https://www.gnu.org/licenses/. include(`../config.m4') C cycles/limb C POWER3/PPC630 ? C POWER4/PPC970 ? C POWER5 2.25 C POWER6 9.5 C POWER7 2.15 C TODO C * Try to reduce the number of needed live registers C * Micro-optimise header code C * Keep in synch with lshift.asm and rshift.asm C * Could the long-scheduled std insns be less scheduled? C INPUT PARAMETERS define(`rp', `r3') define(`up', `r4') define(`n', `r5') define(`cnt', `r6') define(`tnc',`r0') define(`u0',`r30') define(`u1',`r31') define(`retval',`r5') ASM_START() PROLOGUE(mpn_lshiftc) std r31, -8(r1) std r30, -16(r1) subfic tnc, cnt, 64 sldi r7, n, 3 C byte count corresponding to n add up, up, r7 C up = up + n add rp, rp, r7 C rp = rp + n rldicl. r30, n, 0,62 C r30 = n & 3, set cr0 cmpdi cr6, r30, 2 addi r31, n, 3 C compute count... ld r10, -8(up) C load 1st limb for b00...b11 srd retval, r10, tnc srdi r31, r31, 2 C ...for ctr mtctr r31 C copy count into ctr beq cr0, L(b00) blt cr6, L(b01) ld r11, -16(up) C load 2nd limb for b10 and b11 beq cr6, L(b10) ALIGN(16) L(b11): sld r8, r10, cnt srd r9, r11, tnc ld u1, -24(up) addi up, up, -24 sld r12, r11, cnt srd r7, u1, tnc addi rp, rp, 16 bdnz L(gt3) nor r11, r8, r9 sld r8, u1, cnt nor r8, r8, r8 b L(cj3) ALIGN(16) L(gt3): ld u0, -8(up) nor r11, r8, r9 sld r8, u1, cnt srd r9, u0, tnc ld u1, -16(up) nor r10, r12, r7 b L(L11) ALIGN(32) L(b10): sld r12, r10, cnt addi rp, rp, 24 srd r7, r11, tnc bdnz L(gt2) sld r8, r11, cnt nor r10, r12, r7 nor r8, r8, r8 b L(cj2) L(gt2): ld u0, -24(up) sld r8, r11, cnt srd r9, u0, tnc ld u1, -32(up) nor r10, r12, r7 sld r12, u0, cnt srd r7, u1, tnc ld u0, -40(up) nor r11, r8, r9 addi up, up, -16 b L(L10) ALIGN(16) L(b00): ld u1, -16(up) sld r12, r10, cnt srd r7, u1, tnc ld u0, -24(up) sld r8, u1, cnt srd r9, u0, tnc ld u1, -32(up) nor r10, r12, r7 sld r12, u0, cnt srd r7, u1, tnc addi rp, rp, 8 bdz L(cj4) L(gt4): addi up, up, -32 ld u0, -8(up) nor r11, r8, r9 b L(L00) ALIGN(16) L(b01): bdnz L(gt1) sld r8, r10, cnt nor r8, r8, r8 std r8, -8(rp) b L(ret) L(gt1): ld u0, -16(up) sld r8, r10, cnt srd r9, u0, tnc ld u1, -24(up) sld r12, u0, cnt srd r7, u1, tnc ld u0, -32(up) nor r11, r8, r9 sld r8, u1, cnt srd r9, u0, tnc ld u1, -40(up) addi up, up, -40 nor r10, r12, r7 bdz L(end) ALIGN(32) L(top): sld r12, u0, cnt srd r7, u1, tnc ld u0, -8(up) std r11, -8(rp) nor r11, r8, r9 L(L00): sld r8, u1, cnt srd r9, u0, tnc ld u1, -16(up) std r10, -16(rp) nor r10, r12, r7 L(L11): sld r12, u0, cnt srd r7, u1, tnc ld u0, -24(up) std r11, -24(rp) nor r11, r8, r9 L(L10): sld r8, u1, cnt srd r9, u0, tnc ld u1, -32(up) addi up, up, -32 std r10, -32(rp) addi rp, rp, -32 nor r10, r12, r7 bdnz L(top) ALIGN(32) L(end): sld r12, u0, cnt srd r7, u1, tnc std r11, -8(rp) L(cj4): nor r11, r8, r9 sld r8, u1, cnt std r10, -16(rp) nor r8, r8, r8 L(cj3): nor r10, r12, r7 std r11, -24(rp) L(cj2): std r10, -32(rp) std r8, -40(rp) L(ret): ld r31, -8(r1) ld r30, -16(r1) ifdef(`HAVE_ABI_mode32', ` srdi r3, retval, 32 mr r4, retval ',` mr r3, retval') blr EPILOGUE()