|
Packit |
5c3484 |
dnl ARM mpn_addlsh1_n and mpn_sublsh1_n
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Contributed to the GNU project by Torbjörn Granlund.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 2012 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of either:
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
dnl Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
dnl option) any later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
dnl later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or both in parallel, as here.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
dnl for more details.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
dnl see https://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C addlsh1_n sublsh1_n
|
|
Packit |
5c3484 |
C cycles/limb cycles/limb
|
|
Packit |
5c3484 |
C StrongARM ? ?
|
|
Packit |
5c3484 |
C XScale ? ?
|
|
Packit |
5c3484 |
C Cortex-A7 ? ?
|
|
Packit |
5c3484 |
C Cortex-A8 ? ?
|
|
Packit |
5c3484 |
C Cortex-A9 3.12 3.7
|
|
Packit |
5c3484 |
C Cortex-A15 ? ?
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C TODO
|
|
Packit |
5c3484 |
C * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1.
|
|
Packit |
5c3484 |
C The sublsh1_n code could surely be tweaked, its REVCY slows down things
|
|
Packit |
5c3484 |
C very much. If two insns are really needed, it might help to separate them
|
|
Packit |
5c3484 |
C for better micro-parallelism.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`rp', `r0')
|
|
Packit |
5c3484 |
define(`up', `r1')
|
|
Packit |
5c3484 |
define(`vp', `r2')
|
|
Packit |
5c3484 |
define(`n', `r3')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ifdef(`OPERATION_addlsh1_n', `
|
|
Packit |
5c3484 |
define(`ADDSUB', adds)
|
|
Packit |
5c3484 |
define(`ADDSUBC', adcs)
|
|
Packit |
5c3484 |
define(`SETCY', `cmp $1, #1')
|
|
Packit |
5c3484 |
define(`RETVAL', `adc r0, $1, #2')
|
|
Packit |
5c3484 |
define(`SAVECY', `sbc $1, $2, #0')
|
|
Packit |
5c3484 |
define(`RESTCY', `cmn $1, #1')
|
|
Packit |
5c3484 |
define(`REVCY', `')
|
|
Packit |
5c3484 |
define(`INICYR', `mov $1, #0')
|
|
Packit |
5c3484 |
define(`r10r11', `r11')
|
|
Packit |
5c3484 |
define(`func', mpn_addlsh1_n)
|
|
Packit |
5c3484 |
define(`func_nc', mpn_addlsh1_nc)')
|
|
Packit |
5c3484 |
ifdef(`OPERATION_sublsh1_n', `
|
|
Packit |
5c3484 |
define(`ADDSUB', subs)
|
|
Packit |
5c3484 |
define(`ADDSUBC', sbcs)
|
|
Packit |
5c3484 |
define(`SETCY', `rsbs $1, $1, #0')
|
|
Packit |
5c3484 |
define(`RETVAL', `adc r0, $1, #1')
|
|
Packit |
5c3484 |
define(`SAVECY', `sbc $1, $1, $1')
|
|
Packit |
5c3484 |
define(`RESTCY', `cmn $1, #1')
|
|
Packit |
5c3484 |
define(`REVCY', `sbc $1, $1, $1
|
|
Packit |
5c3484 |
cmn $1, #1')
|
|
Packit |
5c3484 |
define(`INICYR', `mvn $1, #0')
|
|
Packit |
5c3484 |
define(`r10r11', `r10')
|
|
Packit |
5c3484 |
define(`func', mpn_sublsh1_n)
|
|
Packit |
5c3484 |
define(`func_nc', mpn_sublsh1_nc)')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASM_START()
|
|
Packit |
5c3484 |
PROLOGUE(func)
|
|
Packit |
5c3484 |
push {r4-r10r11, r14}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ifdef(`OPERATION_addlsh1_n', `
|
|
Packit |
5c3484 |
mvn r11, #0
|
|
Packit |
5c3484 |
')
|
|
Packit |
5c3484 |
INICYR( r14)
|
|
Packit |
5c3484 |
subs n, n, #3
|
|
Packit |
5c3484 |
blt L(le2) C carry clear on branch path
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
cmn r0, #0 C clear carry
|
|
Packit |
5c3484 |
ldmia vp!, {r8, r9, r10}
|
|
Packit |
5c3484 |
b L(mid)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(top): RESTCY( r14)
|
|
Packit |
5c3484 |
ADDSUBC r4, r4, r8
|
|
Packit |
5c3484 |
ADDSUBC r5, r5, r9
|
|
Packit |
5c3484 |
ADDSUBC r6, r6, r10
|
|
Packit |
5c3484 |
ldmia vp!, {r8, r9, r10}
|
|
Packit |
5c3484 |
stmia rp!, {r4, r5, r6}
|
|
Packit |
5c3484 |
REVCY(r14)
|
|
Packit |
5c3484 |
adcs r8, r8, r8
|
|
Packit |
5c3484 |
adcs r9, r9, r9
|
|
Packit |
5c3484 |
adcs r10, r10, r10
|
|
Packit |
5c3484 |
ldmia up!, {r4, r5, r6}
|
|
Packit |
5c3484 |
SAVECY( r14, r11)
|
|
Packit |
5c3484 |
subs n, n, #3
|
|
Packit |
5c3484 |
blt L(exi)
|
|
Packit |
5c3484 |
RESTCY( r12)
|
|
Packit |
5c3484 |
ADDSUBC r4, r4, r8
|
|
Packit |
5c3484 |
ADDSUBC r5, r5, r9
|
|
Packit |
5c3484 |
ADDSUBC r6, r6, r10
|
|
Packit |
5c3484 |
ldmia vp!, {r8, r9, r10}
|
|
Packit |
5c3484 |
stmia rp!, {r4, r5, r6}
|
|
Packit |
5c3484 |
REVCY(r12)
|
|
Packit |
5c3484 |
L(mid): adcs r8, r8, r8
|
|
Packit |
5c3484 |
adcs r9, r9, r9
|
|
Packit |
5c3484 |
adcs r10, r10, r10
|
|
Packit |
5c3484 |
ldmia up!, {r4, r5, r6}
|
|
Packit |
5c3484 |
SAVECY( r12, r11)
|
|
Packit |
5c3484 |
subs n, n, #3
|
|
Packit |
5c3484 |
bge L(top)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
mov r7, r12 C swap alternating...
|
|
Packit |
5c3484 |
mov r12, r14 C ...carry-save...
|
|
Packit |
5c3484 |
mov r14, r7 C ...registers
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(exi): RESTCY( r12)
|
|
Packit |
5c3484 |
ADDSUBC r4, r4, r8
|
|
Packit |
5c3484 |
ADDSUBC r5, r5, r9
|
|
Packit |
5c3484 |
ADDSUBC r6, r6, r10
|
|
Packit |
5c3484 |
stmia rp!, {r4, r5, r6}
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
REVCY(r12)
|
|
Packit |
5c3484 |
L(le2): tst n, #1 C n = {-1,-2,-3} map to [2], [1], [0]
|
|
Packit |
5c3484 |
beq L(e1)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(e02): tst n, #2
|
|
Packit |
5c3484 |
beq L(rt0)
|
|
Packit |
5c3484 |
ldm vp, {r8, r9}
|
|
Packit |
5c3484 |
adcs r8, r8, r8
|
|
Packit |
5c3484 |
adcs r9, r9, r9
|
|
Packit |
5c3484 |
ldm up, {r4, r5}
|
|
Packit |
5c3484 |
SAVECY( r12, r11)
|
|
Packit |
5c3484 |
RESTCY( r14)
|
|
Packit |
5c3484 |
ADDSUBC r4, r4, r8
|
|
Packit |
5c3484 |
ADDSUBC r5, r5, r9
|
|
Packit |
5c3484 |
stm rp, {r4, r5}
|
|
Packit |
5c3484 |
b L(rt1)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(e1): ldr r8, [vp]
|
|
Packit |
5c3484 |
adcs r8, r8, r8
|
|
Packit |
5c3484 |
ldr r4, [up]
|
|
Packit |
5c3484 |
SAVECY( r12, r11)
|
|
Packit |
5c3484 |
RESTCY( r14)
|
|
Packit |
5c3484 |
ADDSUBC r4, r4, r8
|
|
Packit |
5c3484 |
str r4, [rp]
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
L(rt1): mov r14, r12
|
|
Packit |
5c3484 |
REVCY(r12)
|
|
Packit |
5c3484 |
L(rt0): RETVAL( r14)
|
|
Packit |
5c3484 |
pop {r4-r10r11, r14}
|
|
Packit |
5c3484 |
ret r14
|
|
Packit |
5c3484 |
EPILOGUE()
|