|
Packit |
5c3484 |
dnl ARM mpn_mod_1_1p
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Contributed to the GNU project by Torbjörn Granlund.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 2012 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of either:
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
dnl Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
dnl option) any later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
dnl later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or both in parallel, as here.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
dnl for more details.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
dnl see https://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C cycles/limb
|
|
Packit |
5c3484 |
C StrongARM -
|
|
Packit |
5c3484 |
C XScale ?
|
|
Packit |
5c3484 |
C Cortex-A7 ?
|
|
Packit |
5c3484 |
C Cortex-A8 ?
|
|
Packit |
5c3484 |
C Cortex-A9 7
|
|
Packit |
5c3484 |
C Cortex-A15 6
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`ap', `r0')
|
|
Packit |
5c3484 |
define(`n', `r1')
|
|
Packit |
5c3484 |
define(`d', `r2')
|
|
Packit |
5c3484 |
define(`cps',`r3')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASM_START()
|
|
Packit |
5c3484 |
PROLOGUE(mpn_mod_1_1p)
|
|
Packit |
5c3484 |
push {r4-r10}
|
|
Packit |
5c3484 |
add r0, r0, r1, asl #2
|
|
Packit |
5c3484 |
ldr r5, [r0, #-4]!
|
|
Packit |
5c3484 |
ldr r12, [r0, #-4]!
|
|
Packit |
5c3484 |
subs r1, r1, #2
|
|
Packit |
5c3484 |
ble L(4)
|
|
Packit |
5c3484 |
ldr r8, [r3, #12]
|
|
Packit |
5c3484 |
mov r4, r12
|
|
Packit |
5c3484 |
mov r10, r5
|
|
Packit |
5c3484 |
umull r7, r5, r10, r8
|
|
Packit |
5c3484 |
sub r1, r1, #1
|
|
Packit |
5c3484 |
b L(mid)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(top): adds r12, r6, r7
|
|
Packit |
5c3484 |
adcs r10, r4, r5
|
|
Packit |
5c3484 |
sub r1, r1, #1
|
|
Packit |
5c3484 |
mov r6, #0
|
|
Packit |
5c3484 |
movcs r6, r8
|
|
Packit |
5c3484 |
umull r7, r5, r10, r8
|
|
Packit |
5c3484 |
adds r4, r12, r6
|
|
Packit |
5c3484 |
subcs r4, r4, r2
|
|
Packit |
5c3484 |
L(mid): ldr r6, [r0, #-4]!
|
|
Packit |
5c3484 |
teq r1, #0
|
|
Packit |
5c3484 |
bne L(top)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
adds r12, r6, r7
|
|
Packit |
5c3484 |
adcs r5, r4, r5
|
|
Packit |
5c3484 |
subcs r5, r5, r2
|
|
Packit |
5c3484 |
L(4): ldr r1, [r3, #4]
|
|
Packit |
5c3484 |
cmp r1, #0
|
|
Packit |
5c3484 |
beq L(7)
|
|
Packit |
5c3484 |
ldr r4, [r3, #8]
|
|
Packit |
5c3484 |
umull r0, r6, r5, r4
|
|
Packit |
5c3484 |
adds r12, r0, r12
|
|
Packit |
5c3484 |
addcs r6, r6, #1
|
|
Packit |
5c3484 |
rsb r0, r1, #32
|
|
Packit |
5c3484 |
mov r0, r12, lsr r0
|
|
Packit |
5c3484 |
orr r5, r0, r6, asl r1
|
|
Packit |
5c3484 |
mov r12, r12, asl r1
|
|
Packit |
5c3484 |
b L(8)
|
|
Packit |
5c3484 |
L(7): cmp r5, r2
|
|
Packit |
5c3484 |
subcs r5, r5, r2
|
|
Packit |
5c3484 |
L(8): ldr r0, [r3, #0]
|
|
Packit |
5c3484 |
umull r4, r3, r5, r0
|
|
Packit |
5c3484 |
add r5, r5, #1
|
|
Packit |
5c3484 |
adds r0, r4, r12
|
|
Packit |
5c3484 |
adc r5, r3, r5
|
|
Packit |
5c3484 |
mul r5, r2, r5
|
|
Packit |
5c3484 |
sub r12, r12, r5
|
|
Packit |
5c3484 |
cmp r12, r0
|
|
Packit |
5c3484 |
addhi r12, r12, r2
|
|
Packit |
5c3484 |
cmp r2, r12
|
|
Packit |
5c3484 |
subls r12, r12, r2
|
|
Packit |
5c3484 |
mov r0, r12, lsr r1
|
|
Packit |
5c3484 |
pop {r4-r10}
|
|
Packit |
5c3484 |
bx r14
|
|
Packit |
5c3484 |
EPILOGUE()
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
PROLOGUE(mpn_mod_1_1p_cps)
|
|
Packit |
5c3484 |
stmfd sp!, {r4, r5, r6, r14}
|
|
Packit |
5c3484 |
mov r5, r0
|
|
Packit |
5c3484 |
clz r4, r1
|
|
Packit |
5c3484 |
mov r0, r1, asl r4
|
|
Packit |
5c3484 |
rsb r6, r0, #0
|
|
Packit |
5c3484 |
bl mpn_invert_limb
|
|
Packit |
5c3484 |
str r0, [r5, #0]
|
|
Packit |
5c3484 |
str r4, [r5, #4]
|
|
Packit |
5c3484 |
cmp r4, #0
|
|
Packit |
5c3484 |
beq L(2)
|
|
Packit |
5c3484 |
rsb r1, r4, #32
|
|
Packit |
5c3484 |
mov r3, #1
|
|
Packit |
5c3484 |
mov r3, r3, asl r4
|
|
Packit |
5c3484 |
orr r3, r3, r0, lsr r1
|
|
Packit |
5c3484 |
mul r3, r6, r3
|
|
Packit |
5c3484 |
mov r4, r3, lsr r4
|
|
Packit |
5c3484 |
str r4, [r5, #8]
|
|
Packit |
5c3484 |
L(2): mul r0, r6, r0
|
|
Packit |
5c3484 |
str r0, [r5, #12]
|
|
Packit |
5c3484 |
ldmfd sp!, {r4, r5, r6, pc}
|
|
Packit |
5c3484 |
EPILOGUE()
|