Blame mpn/pa32/hppa1_1/pa7100/submul_1.asm

Packit 5c3484
dnl  HP-PA 7100/7200 mpn_submul_1 -- Multiply a limb vector with a limb and
Packit 5c3484
dnl  subtract the result from a second limb vector.
Packit 5c3484
Packit 5c3484
dnl  Copyright 1995, 2000-2003 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
dnl  This file is part of the GNU MP Library.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
dnl  it under the terms of either:
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU Lesser General Public License as published by the Free
Packit 5c3484
dnl      Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
dnl      option) any later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU General Public License as published by the Free Software
Packit 5c3484
dnl      Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
dnl      later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or both in parallel, as here.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
dnl  for more details.
Packit 5c3484
dnl
Packit 5c3484
dnl  You should have received copies of the GNU General Public License and the
Packit 5c3484
dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
dnl  see https://www.gnu.org/licenses/.
Packit 5c3484
Packit 5c3484
include(`../config.m4')
Packit 5c3484
Packit 5c3484
C INPUT PARAMETERS
Packit 5c3484
define(`res_ptr',`%r26')
Packit 5c3484
define(`s1_ptr',`%r25')
Packit 5c3484
define(`size_param',`%r24')
Packit 5c3484
define(`s2_limb',`%r23')
Packit 5c3484
Packit 5c3484
define(`cylimb',`%r28')
Packit 5c3484
define(`s0',`%r19')
Packit 5c3484
define(`s1',`%r20')
Packit 5c3484
define(`s2',`%r3')
Packit 5c3484
define(`s3',`%r4')
Packit 5c3484
define(`lo0',`%r21')
Packit 5c3484
define(`lo1',`%r5')
Packit 5c3484
define(`lo2',`%r6')
Packit 5c3484
define(`lo3',`%r7')
Packit 5c3484
define(`hi0',`%r22')
Packit 5c3484
define(`hi1',`%r23')				C safe to reuse
Packit 5c3484
define(`hi2',`%r29')
Packit 5c3484
define(`hi3',`%r1')
Packit 5c3484
Packit 5c3484
ASM_START()
Packit 5c3484
PROLOGUE(mpn_submul_1)
Packit 5c3484
C	.callinfo	frame=128,no_calls
Packit 5c3484
Packit 5c3484
	ldo	128(%r30),%r30
Packit 5c3484
	stws	s2_limb,-16(%r30)
Packit 5c3484
	add	 %r0,%r0,cylimb			C clear cy and cylimb
Packit 5c3484
	addib,<	-4,size_param,L(few_limbs)
Packit 5c3484
	fldws	-16(%r30),%fr31R
Packit 5c3484
Packit 5c3484
	ldo	-112(%r30),%r31
Packit 5c3484
	stw	%r3,-96(%r30)
Packit 5c3484
	stw	%r4,-92(%r30)
Packit 5c3484
	stw	%r5,-88(%r30)
Packit 5c3484
	stw	%r6,-84(%r30)
Packit 5c3484
	stw	%r7,-80(%r30)
Packit 5c3484
Packit 5c3484
	bb,>=,n	 s1_ptr,29,L(0)
Packit 5c3484
Packit 5c3484
	fldws,ma 4(s1_ptr),%fr4
Packit 5c3484
	ldws	 0(res_ptr),s0
Packit 5c3484
	xmpyu	 %fr4,%fr31R,%fr5
Packit 5c3484
	fstds	 %fr5,-16(%r31)
Packit 5c3484
	ldws	-16(%r31),cylimb
Packit 5c3484
	ldws	-12(%r31),lo0
Packit 5c3484
	sub	 s0,lo0,s0
Packit 5c3484
	add	 s0,lo0,%r0			C invert cy
Packit 5c3484
	addib,< -1,size_param,L(few_limbs)
Packit 5c3484
	stws,ma	 s0,4(res_ptr)
Packit 5c3484
Packit 5c3484
C start software pipeline ----------------------------------------------------
Packit 5c3484
LDEF(0)
Packit 5c3484
	fldds,ma 8(s1_ptr),%fr4
Packit 5c3484
	fldds,ma 8(s1_ptr),%fr8
Packit 5c3484
Packit 5c3484
	xmpyu	 %fr4L,%fr31R,%fr5
Packit 5c3484
	xmpyu	 %fr4R,%fr31R,%fr6
Packit 5c3484
	xmpyu	 %fr8L,%fr31R,%fr9
Packit 5c3484
	xmpyu	 %fr8R,%fr31R,%fr10
Packit 5c3484
Packit 5c3484
	fstds	 %fr5,-16(%r31)
Packit 5c3484
	fstds	 %fr6,-8(%r31)
Packit 5c3484
	fstds	 %fr9,0(%r31)
Packit 5c3484
	fstds	 %fr10,8(%r31)
Packit 5c3484
Packit 5c3484
	ldws   -16(%r31),hi0
Packit 5c3484
	ldws   -12(%r31),lo0
Packit 5c3484
	ldws	-8(%r31),hi1
Packit 5c3484
	ldws	-4(%r31),lo1
Packit 5c3484
	ldws	 0(%r31),hi2
Packit 5c3484
	ldws	 4(%r31),lo2
Packit 5c3484
	ldws	 8(%r31),hi3
Packit 5c3484
	ldws	12(%r31),lo3
Packit 5c3484
Packit 5c3484
	addc	 lo0,cylimb,lo0
Packit 5c3484
	addc	 lo1,hi0,lo1
Packit 5c3484
	addc	 lo2,hi1,lo2
Packit 5c3484
	addc	 lo3,hi2,lo3
Packit 5c3484
Packit 5c3484
	addib,<	 -4,size_param,L(end)
Packit 5c3484
	addc	 %r0,hi3,cylimb			C propagate carry into cylimb
Packit 5c3484
C main loop ------------------------------------------------------------------
Packit 5c3484
LDEF(loop)
Packit 5c3484
	fldds,ma 8(s1_ptr),%fr4
Packit 5c3484
	fldds,ma 8(s1_ptr),%fr8
Packit 5c3484
Packit 5c3484
	ldws	 0(res_ptr),s0
Packit 5c3484
	xmpyu	 %fr4L,%fr31R,%fr5
Packit 5c3484
	ldws	 4(res_ptr),s1
Packit 5c3484
	xmpyu	 %fr4R,%fr31R,%fr6
Packit 5c3484
	ldws	 8(res_ptr),s2
Packit 5c3484
	xmpyu	 %fr8L,%fr31R,%fr9
Packit 5c3484
	ldws	12(res_ptr),s3
Packit 5c3484
	xmpyu	 %fr8R,%fr31R,%fr10
Packit 5c3484
Packit 5c3484
	fstds	 %fr5,-16(%r31)
Packit 5c3484
	sub	 s0,lo0,s0
Packit 5c3484
	fstds	 %fr6,-8(%r31)
Packit 5c3484
	subb	 s1,lo1,s1
Packit 5c3484
	fstds	 %fr9,0(%r31)
Packit 5c3484
	subb	 s2,lo2,s2
Packit 5c3484
	fstds	 %fr10,8(%r31)
Packit 5c3484
	subb	 s3,lo3,s3
Packit 5c3484
	subb	 %r0,%r0,lo0			C these two insns ...
Packit 5c3484
	add	 lo0,lo0,%r0			C ... just invert cy
Packit 5c3484
Packit 5c3484
	ldws   -16(%r31),hi0
Packit 5c3484
	ldws   -12(%r31),lo0
Packit 5c3484
	ldws	-8(%r31),hi1
Packit 5c3484
	ldws	-4(%r31),lo1
Packit 5c3484
	ldws	 0(%r31),hi2
Packit 5c3484
	ldws	 4(%r31),lo2
Packit 5c3484
	ldws	 8(%r31),hi3
Packit 5c3484
	ldws	12(%r31),lo3
Packit 5c3484
Packit 5c3484
	addc	 lo0,cylimb,lo0
Packit 5c3484
	stws,ma	 s0,4(res_ptr)
Packit 5c3484
	addc	 lo1,hi0,lo1
Packit 5c3484
	stws,ma	 s1,4(res_ptr)
Packit 5c3484
	addc	 lo2,hi1,lo2
Packit 5c3484
	stws,ma	 s2,4(res_ptr)
Packit 5c3484
	addc	 lo3,hi2,lo3
Packit 5c3484
	stws,ma	 s3,4(res_ptr)
Packit 5c3484
Packit 5c3484
	addib,>= -4,size_param,L(loop)
Packit 5c3484
	addc	 %r0,hi3,cylimb			C propagate carry into cylimb
Packit 5c3484
C finish software pipeline ---------------------------------------------------
Packit 5c3484
LDEF(end)
Packit 5c3484
	ldws	 0(res_ptr),s0
Packit 5c3484
	ldws	 4(res_ptr),s1
Packit 5c3484
	ldws	 8(res_ptr),s2
Packit 5c3484
	ldws	12(res_ptr),s3
Packit 5c3484
Packit 5c3484
	sub	 s0,lo0,s0
Packit 5c3484
	stws,ma	 s0,4(res_ptr)
Packit 5c3484
	subb	 s1,lo1,s1
Packit 5c3484
	stws,ma	 s1,4(res_ptr)
Packit 5c3484
	subb	 s2,lo2,s2
Packit 5c3484
	stws,ma	 s2,4(res_ptr)
Packit 5c3484
	subb	 s3,lo3,s3
Packit 5c3484
	stws,ma	 s3,4(res_ptr)
Packit 5c3484
	subb	 %r0,%r0,lo0			C these two insns ...
Packit 5c3484
	add	 lo0,lo0,%r0			C ... invert cy
Packit 5c3484
Packit 5c3484
C restore callee-saves registers ---------------------------------------------
Packit 5c3484
	ldw	-96(%r30),%r3
Packit 5c3484
	ldw	-92(%r30),%r4
Packit 5c3484
	ldw	-88(%r30),%r5
Packit 5c3484
	ldw	-84(%r30),%r6
Packit 5c3484
	ldw	-80(%r30),%r7
Packit 5c3484
Packit 5c3484
LDEF(few_limbs)
Packit 5c3484
	addib,=,n 4,size_param,L(ret)
Packit 5c3484
Packit 5c3484
LDEF(loop2)
Packit 5c3484
	fldws,ma 4(s1_ptr),%fr4
Packit 5c3484
	ldws	 0(res_ptr),s0
Packit 5c3484
	xmpyu	 %fr4,%fr31R,%fr5
Packit 5c3484
	fstds	 %fr5,-16(%r30)
Packit 5c3484
	ldws	-16(%r30),hi0
Packit 5c3484
	ldws	-12(%r30),lo0
Packit 5c3484
	addc	 lo0,cylimb,lo0
Packit 5c3484
	addc	 %r0,hi0,cylimb
Packit 5c3484
	sub	 s0,lo0,s0
Packit 5c3484
	add	 s0,lo0,%r0			C invert cy
Packit 5c3484
	stws,ma	 s0,4(res_ptr)
Packit 5c3484
	addib,<> -1,size_param,L(loop2)
Packit 5c3484
	nop
Packit 5c3484
Packit 5c3484
LDEF(ret)
Packit 5c3484
	addc	 %r0,cylimb,cylimb
Packit 5c3484
	bv	 0(%r2)
Packit 5c3484
	ldo	 -128(%r30),%r30
Packit 5c3484
EPILOGUE(mpn_submul_1)