|
Packit |
5c3484 |
dnl HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and
|
|
Packit |
5c3484 |
dnl subtract the result from a second limb vector.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl Copyright 1995, 2000-2003 Free Software Foundation, Inc.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
dnl This file is part of the GNU MP Library.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
Packit |
5c3484 |
dnl it under the terms of either:
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU Lesser General Public License as published by the Free
|
|
Packit |
5c3484 |
dnl Software Foundation; either version 3 of the License, or (at your
|
|
Packit |
5c3484 |
dnl option) any later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl * the GNU General Public License as published by the Free Software
|
|
Packit |
5c3484 |
dnl Foundation; either version 2 of the License, or (at your option) any
|
|
Packit |
5c3484 |
dnl later version.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl or both in parallel, as here.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
Packit |
5c3484 |
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
Packit |
5c3484 |
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
Packit |
5c3484 |
dnl for more details.
|
|
Packit |
5c3484 |
dnl
|
|
Packit |
5c3484 |
dnl You should have received copies of the GNU General Public License and the
|
|
Packit |
5c3484 |
dnl GNU Lesser General Public License along with the GNU MP Library. If not,
|
|
Packit |
5c3484 |
dnl see https://www.gnu.org/licenses/.
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
include(`../config.m4')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C INPUT PARAMETERS
|
|
Packit |
5c3484 |
define(`res_ptr',`%r26')
|
|
Packit |
5c3484 |
define(`s1_ptr',`%r25')
|
|
Packit |
5c3484 |
define(`size_param',`%r24')
|
|
Packit |
5c3484 |
define(`s2_limb',`%r23')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
define(`cylimb',`%r28')
|
|
Packit |
5c3484 |
define(`s0',`%r19')
|
|
Packit |
5c3484 |
define(`s1',`%r20')
|
|
Packit |
5c3484 |
define(`s2',`%r3')
|
|
Packit |
5c3484 |
define(`s3',`%r4')
|
|
Packit |
5c3484 |
define(`lo0',`%r21')
|
|
Packit |
5c3484 |
define(`lo1',`%r5')
|
|
Packit |
5c3484 |
define(`lo2',`%r6')
|
|
Packit |
5c3484 |
define(`lo3',`%r7')
|
|
Packit |
5c3484 |
define(`hi0',`%r22')
|
|
Packit |
5c3484 |
define(`hi1',`%r23') C safe to reuse
|
|
Packit |
5c3484 |
define(`hi2',`%r29')
|
|
Packit |
5c3484 |
define(`hi3',`%r1')
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ASM_START()
|
|
Packit |
5c3484 |
PROLOGUE(mpn_submul_1)
|
|
Packit |
5c3484 |
C .callinfo frame=128,no_calls
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ldo 128(%r30),%r30
|
|
Packit |
5c3484 |
stws s2_limb,-16(%r30)
|
|
Packit |
5c3484 |
add %r0,%r0,cylimb C clear cy and cylimb
|
|
Packit |
5c3484 |
addib,< -4,size_param,L(few_limbs)
|
|
Packit |
5c3484 |
fldws -16(%r30),%fr31R
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ldo -112(%r30),%r31
|
|
Packit |
5c3484 |
stw %r3,-96(%r30)
|
|
Packit |
5c3484 |
stw %r4,-92(%r30)
|
|
Packit |
5c3484 |
stw %r5,-88(%r30)
|
|
Packit |
5c3484 |
stw %r6,-84(%r30)
|
|
Packit |
5c3484 |
stw %r7,-80(%r30)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
bb,>=,n s1_ptr,29,L(0)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
fldws,ma 4(s1_ptr),%fr4
|
|
Packit |
5c3484 |
ldws 0(res_ptr),s0
|
|
Packit |
5c3484 |
xmpyu %fr4,%fr31R,%fr5
|
|
Packit |
5c3484 |
fstds %fr5,-16(%r31)
|
|
Packit |
5c3484 |
ldws -16(%r31),cylimb
|
|
Packit |
5c3484 |
ldws -12(%r31),lo0
|
|
Packit |
5c3484 |
sub s0,lo0,s0
|
|
Packit |
5c3484 |
add s0,lo0,%r0 C invert cy
|
|
Packit |
5c3484 |
addib,< -1,size_param,L(few_limbs)
|
|
Packit |
5c3484 |
stws,ma s0,4(res_ptr)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C start software pipeline ----------------------------------------------------
|
|
Packit |
5c3484 |
LDEF(0)
|
|
Packit |
5c3484 |
fldds,ma 8(s1_ptr),%fr4
|
|
Packit |
5c3484 |
fldds,ma 8(s1_ptr),%fr8
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
xmpyu %fr4L,%fr31R,%fr5
|
|
Packit |
5c3484 |
xmpyu %fr4R,%fr31R,%fr6
|
|
Packit |
5c3484 |
xmpyu %fr8L,%fr31R,%fr9
|
|
Packit |
5c3484 |
xmpyu %fr8R,%fr31R,%fr10
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
fstds %fr5,-16(%r31)
|
|
Packit |
5c3484 |
fstds %fr6,-8(%r31)
|
|
Packit |
5c3484 |
fstds %fr9,0(%r31)
|
|
Packit |
5c3484 |
fstds %fr10,8(%r31)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ldws -16(%r31),hi0
|
|
Packit |
5c3484 |
ldws -12(%r31),lo0
|
|
Packit |
5c3484 |
ldws -8(%r31),hi1
|
|
Packit |
5c3484 |
ldws -4(%r31),lo1
|
|
Packit |
5c3484 |
ldws 0(%r31),hi2
|
|
Packit |
5c3484 |
ldws 4(%r31),lo2
|
|
Packit |
5c3484 |
ldws 8(%r31),hi3
|
|
Packit |
5c3484 |
ldws 12(%r31),lo3
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
addc lo0,cylimb,lo0
|
|
Packit |
5c3484 |
addc lo1,hi0,lo1
|
|
Packit |
5c3484 |
addc lo2,hi1,lo2
|
|
Packit |
5c3484 |
addc lo3,hi2,lo3
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
addib,< -4,size_param,L(end)
|
|
Packit |
5c3484 |
addc %r0,hi3,cylimb C propagate carry into cylimb
|
|
Packit |
5c3484 |
C main loop ------------------------------------------------------------------
|
|
Packit |
5c3484 |
LDEF(loop)
|
|
Packit |
5c3484 |
fldds,ma 8(s1_ptr),%fr4
|
|
Packit |
5c3484 |
fldds,ma 8(s1_ptr),%fr8
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ldws 0(res_ptr),s0
|
|
Packit |
5c3484 |
xmpyu %fr4L,%fr31R,%fr5
|
|
Packit |
5c3484 |
ldws 4(res_ptr),s1
|
|
Packit |
5c3484 |
xmpyu %fr4R,%fr31R,%fr6
|
|
Packit |
5c3484 |
ldws 8(res_ptr),s2
|
|
Packit |
5c3484 |
xmpyu %fr8L,%fr31R,%fr9
|
|
Packit |
5c3484 |
ldws 12(res_ptr),s3
|
|
Packit |
5c3484 |
xmpyu %fr8R,%fr31R,%fr10
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
fstds %fr5,-16(%r31)
|
|
Packit |
5c3484 |
sub s0,lo0,s0
|
|
Packit |
5c3484 |
fstds %fr6,-8(%r31)
|
|
Packit |
5c3484 |
subb s1,lo1,s1
|
|
Packit |
5c3484 |
fstds %fr9,0(%r31)
|
|
Packit |
5c3484 |
subb s2,lo2,s2
|
|
Packit |
5c3484 |
fstds %fr10,8(%r31)
|
|
Packit |
5c3484 |
subb s3,lo3,s3
|
|
Packit |
5c3484 |
subb %r0,%r0,lo0 C these two insns ...
|
|
Packit |
5c3484 |
add lo0,lo0,%r0 C ... just invert cy
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
ldws -16(%r31),hi0
|
|
Packit |
5c3484 |
ldws -12(%r31),lo0
|
|
Packit |
5c3484 |
ldws -8(%r31),hi1
|
|
Packit |
5c3484 |
ldws -4(%r31),lo1
|
|
Packit |
5c3484 |
ldws 0(%r31),hi2
|
|
Packit |
5c3484 |
ldws 4(%r31),lo2
|
|
Packit |
5c3484 |
ldws 8(%r31),hi3
|
|
Packit |
5c3484 |
ldws 12(%r31),lo3
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
addc lo0,cylimb,lo0
|
|
Packit |
5c3484 |
stws,ma s0,4(res_ptr)
|
|
Packit |
5c3484 |
addc lo1,hi0,lo1
|
|
Packit |
5c3484 |
stws,ma s1,4(res_ptr)
|
|
Packit |
5c3484 |
addc lo2,hi1,lo2
|
|
Packit |
5c3484 |
stws,ma s2,4(res_ptr)
|
|
Packit |
5c3484 |
addc lo3,hi2,lo3
|
|
Packit |
5c3484 |
stws,ma s3,4(res_ptr)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
addib,>= -4,size_param,L(loop)
|
|
Packit |
5c3484 |
addc %r0,hi3,cylimb C propagate carry into cylimb
|
|
Packit |
5c3484 |
C finish software pipeline ---------------------------------------------------
|
|
Packit |
5c3484 |
LDEF(end)
|
|
Packit |
5c3484 |
ldws 0(res_ptr),s0
|
|
Packit |
5c3484 |
ldws 4(res_ptr),s1
|
|
Packit |
5c3484 |
ldws 8(res_ptr),s2
|
|
Packit |
5c3484 |
ldws 12(res_ptr),s3
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
sub s0,lo0,s0
|
|
Packit |
5c3484 |
stws,ma s0,4(res_ptr)
|
|
Packit |
5c3484 |
subb s1,lo1,s1
|
|
Packit |
5c3484 |
stws,ma s1,4(res_ptr)
|
|
Packit |
5c3484 |
subb s2,lo2,s2
|
|
Packit |
5c3484 |
stws,ma s2,4(res_ptr)
|
|
Packit |
5c3484 |
subb s3,lo3,s3
|
|
Packit |
5c3484 |
stws,ma s3,4(res_ptr)
|
|
Packit |
5c3484 |
subb %r0,%r0,lo0 C these two insns ...
|
|
Packit |
5c3484 |
add lo0,lo0,%r0 C ... invert cy
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
C restore callee-saves registers ---------------------------------------------
|
|
Packit |
5c3484 |
ldw -96(%r30),%r3
|
|
Packit |
5c3484 |
ldw -92(%r30),%r4
|
|
Packit |
5c3484 |
ldw -88(%r30),%r5
|
|
Packit |
5c3484 |
ldw -84(%r30),%r6
|
|
Packit |
5c3484 |
ldw -80(%r30),%r7
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
LDEF(few_limbs)
|
|
Packit |
5c3484 |
addib,=,n 4,size_param,L(ret)
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
LDEF(loop2)
|
|
Packit |
5c3484 |
fldws,ma 4(s1_ptr),%fr4
|
|
Packit |
5c3484 |
ldws 0(res_ptr),s0
|
|
Packit |
5c3484 |
xmpyu %fr4,%fr31R,%fr5
|
|
Packit |
5c3484 |
fstds %fr5,-16(%r30)
|
|
Packit |
5c3484 |
ldws -16(%r30),hi0
|
|
Packit |
5c3484 |
ldws -12(%r30),lo0
|
|
Packit |
5c3484 |
addc lo0,cylimb,lo0
|
|
Packit |
5c3484 |
addc %r0,hi0,cylimb
|
|
Packit |
5c3484 |
sub s0,lo0,s0
|
|
Packit |
5c3484 |
add s0,lo0,%r0 C invert cy
|
|
Packit |
5c3484 |
stws,ma s0,4(res_ptr)
|
|
Packit |
5c3484 |
addib,<> -1,size_param,L(loop2)
|
|
Packit |
5c3484 |
nop
|
|
Packit |
5c3484 |
|
|
Packit |
5c3484 |
LDEF(ret)
|
|
Packit |
5c3484 |
addc %r0,cylimb,cylimb
|
|
Packit |
5c3484 |
bv 0(%r2)
|
|
Packit |
5c3484 |
ldo -128(%r30),%r30
|
|
Packit |
5c3484 |
EPILOGUE(mpn_submul_1)
|