|
Packit |
5c3484 |
dnl ARM64 mpn_lshift.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 2013, 2014 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of the GNU Lesser General Public License as published
|
|
Packit |
5c3484 |
dnl by the Free Software Foundation; either version 3 of the License, or (at
|
|
Packit |
5c3484 |
dnl your option) any later version.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
Packit |
5c3484 |
dnl License for more details.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl You should have received a copy of the GNU Lesser General Public License
|
|
Packit |
5c3484 |
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C cycles/limb
|
|
Packit |
5c3484 |
C Cortex-A53 ?
|
|
Packit |
5c3484 |
C Cortex-A57 ?
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
changecom(@&*$)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`rp_arg', `x0')
|
|
Packit |
5c3484 |
define(`up', `x1')
|
|
Packit |
5c3484 |
define(`n', `x2')
|
|
Packit |
5c3484 |
define(`cnt', `x3')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`rp', `x16')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`tnc',`x8')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASM_START()
|
|
Packit |
5c3484 |
PROLOGUE(mpn_lshift)
|
|
Packit |
5c3484 |
add rp, rp_arg, n, lsl #3
|
|
Packit |
5c3484 |
add up, up, n, lsl #3
|
|
Packit |
5c3484 |
sub tnc, xzr, cnt
|
|
Packit |
5c3484 |
tbz n, #0, L(bx0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(bx1): ldr x4, [up,#-8]
|
|
Packit |
5c3484 |
tbnz n, #1, L(b11)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(b01): lsr x0, x4, tnc
|
|
Packit |
5c3484 |
lsl x18, x4, cnt
|
|
Packit |
5c3484 |
sub n, n, #1
|
|
Packit |
5c3484 |
cbnz n, L(gt1)
|
|
Packit |
5c3484 |
str x18, [rp,#-8]
|
|
Packit |
5c3484 |
ret
|
|
Packit |
5c3484 |
L(gt1): ldp x4, x5, [up,#-24]
|
|
Packit |
5c3484 |
sub up, up, #8
|
|
Packit |
5c3484 |
add rp, rp, #16
|
|
Packit |
5c3484 |
b L(lo2)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(b11): lsr x0, x4, tnc
|
|
Packit |
5c3484 |
lsl x9, x4, cnt
|
|
Packit |
5c3484 |
ldp x6, x7, [up,#-24]
|
|
Packit |
5c3484 |
add n, n, #1
|
|
Packit |
5c3484 |
add up, up, #8
|
|
Packit |
5c3484 |
add rp, rp, #32
|
|
Packit |
5c3484 |
b L(lo0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(bx0): ldp x4, x5, [up,#-16]
|
|
Packit |
5c3484 |
tbz n, #1, L(b00)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(b10): lsr x0, x5, tnc
|
|
Packit |
5c3484 |
lsl x13, x5, cnt
|
|
Packit |
5c3484 |
lsr x10, x4, tnc
|
|
Packit |
5c3484 |
lsl x18, x4, cnt
|
|
Packit |
5c3484 |
sub n, n, #2
|
|
Packit |
5c3484 |
cbnz n, L(gt2)
|
|
Packit |
5c3484 |
orr x10, x10, x13
|
|
Packit |
5c3484 |
stp x18, x10, [rp,#-16]
|
|
Packit |
5c3484 |
ret
|
|
Packit |
5c3484 |
L(gt2): ldp x4, x5, [up,#-32]
|
|
Packit |
5c3484 |
orr x10, x10, x13
|
|
Packit |
5c3484 |
str x10, [rp,#-8]
|
|
Packit |
5c3484 |
sub up, up, #16
|
|
Packit |
5c3484 |
add rp, rp, #8
|
|
Packit |
5c3484 |
b L(lo2)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(b00): lsr x0, x5, tnc
|
|
Packit |
5c3484 |
lsl x13, x5, cnt
|
|
Packit |
5c3484 |
lsr x10, x4, tnc
|
|
Packit |
5c3484 |
lsl x9, x4, cnt
|
|
Packit |
5c3484 |
ldp x6, x7, [up,#-32]
|
|
Packit |
5c3484 |
orr x10, x10, x13
|
|
Packit |
5c3484 |
str x10, [rp,#-8]
|
|
Packit |
5c3484 |
add rp, rp, #24
|
|
Packit |
5c3484 |
b L(lo0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ALIGN(16)
|
|
Packit |
5c3484 |
L(top): ldp x4, x5, [up,#-48]
|
|
Packit |
5c3484 |
sub rp, rp, #32 C integrate with stp?
|
|
Packit |
5c3484 |
sub up, up, #32 C integrate with ldp?
|
|
Packit |
5c3484 |
orr x11, x11, x9
|
|
Packit |
5c3484 |
orr x10, x10, x13
|
|
Packit |
5c3484 |
stp x10, x11, [rp,#-16]
|
|
Packit |
5c3484 |
L(lo2): lsr x11, x5, tnc
|
|
Packit |
5c3484 |
lsl x13, x5, cnt
|
|
Packit |
5c3484 |
lsr x10, x4, tnc
|
|
Packit |
5c3484 |
lsl x9, x4, cnt
|
|
Packit |
5c3484 |
ldp x6, x7, [up,#-32]
|
|
Packit |
5c3484 |
orr x11, x11, x18
|
|
Packit |
5c3484 |
orr x10, x10, x13
|
|
Packit |
5c3484 |
stp x10, x11, [rp,#-32]
|
|
Packit |
5c3484 |
L(lo0): sub n, n, #4
|
|
Packit |
5c3484 |
lsr x11, x7, tnc
|
|
Packit |
5c3484 |
lsl x13, x7, cnt
|
|
Packit |
5c3484 |
lsr x10, x6, tnc
|
|
Packit |
5c3484 |
lsl x18, x6, cnt
|
|
Packit |
5c3484 |
cbnz n, L(top)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(end): orr x11, x11, x9
|
|
Packit |
5c3484 |
orr x10, x10, x13
|
|
Packit |
5c3484 |
stp x10, x11, [rp,#-48]
|
|
Packit |
5c3484 |
str x18, [rp,#-56]
|
|
Packit |
5c3484 |
ret
|
|
Packit |
5c3484 |
EPILOGUE()
|