Blame sysdeps/alpha/rshift.S

Packit Service 82fcde
 # Alpha 21064 __mpn_rshift --
Packit Service 82fcde
Packit Service 82fcde
 # Copyright (C) 1994-2018 Free Software Foundation, Inc.
Packit Service 82fcde
Packit Service 82fcde
 # This file is part of the GNU MP Library.
Packit Service 82fcde
Packit Service 82fcde
 # The GNU MP Library is free software; you can redistribute it and/or modify
Packit Service 82fcde
 # it under the terms of the GNU Lesser General Public License as published by
Packit Service 82fcde
 # the Free Software Foundation; either version 2.1 of the License, or (at your
Packit Service 82fcde
 # option) any later version.
Packit Service 82fcde
Packit Service 82fcde
 # The GNU MP Library is distributed in the hope that it will be useful, but
Packit Service 82fcde
 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit Service 82fcde
 # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
Packit Service 82fcde
 # License for more details.
Packit Service 82fcde
Packit Service 82fcde
 # You should have received a copy of the GNU Lesser General Public License
Packit Service 82fcde
 # along with the GNU MP Library.  If not, see <http://www.gnu.org/licenses/>.
Packit Service 82fcde
Packit Service 82fcde
Packit Service 82fcde
 # INPUT PARAMETERS
Packit Service 82fcde
 # res_ptr	r16
Packit Service 82fcde
 # s1_ptr	r17
Packit Service 82fcde
 # size		r18
Packit Service 82fcde
 # cnt		r19
Packit Service 82fcde
Packit Service 82fcde
 # This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
Packit Service 82fcde
 # it would take 4 cycles/limb.  It should be possible to get down to 3
Packit Service 82fcde
 # cycles/limb since both ldq and stq can be paired with the other used
Packit Service 82fcde
 # instructions.  But there are many restrictions in the 21064 pipeline that
Packit Service 82fcde
 # makes it hard, if not impossible, to get down to 3 cycles/limb:
Packit Service 82fcde
Packit Service 82fcde
 # 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
Packit Service 82fcde
 # 2. Only aligned instruction pairs can be paired.
Packit Service 82fcde
 # 3. The store buffer or silo might not be able to deal with the bandwidth.
Packit Service 82fcde
Packit Service 82fcde
	.set	noreorder
Packit Service 82fcde
	.set	noat
Packit Service 82fcde
.text
Packit Service 82fcde
	.align	3
Packit Service 82fcde
	.globl	__mpn_rshift
Packit Service 82fcde
	.ent	__mpn_rshift
Packit Service 82fcde
__mpn_rshift:
Packit Service 82fcde
	.frame	$30,0,$26,0
Packit Service 82fcde
Packit Service 82fcde
	ldq	$4,0($17)	# load first limb
Packit Service 82fcde
	addq	$17,8,$17
Packit Service 82fcde
	subq	$31,$19,$7
Packit Service 82fcde
	subq	$18,1,$18
Packit Service 82fcde
	and	$18,4-1,$20	# number of limbs in first loop
Packit Service 82fcde
	sll	$4,$7,$0	# compute function result
Packit Service 82fcde
Packit Service 82fcde
	beq	$20,.L0
Packit Service 82fcde
	subq	$18,$20,$18
Packit Service 82fcde
Packit Service 82fcde
	.align	3
Packit Service 82fcde
.Loop0:
Packit Service 82fcde
	ldq	$3,0($17)
Packit Service 82fcde
	addq	$16,8,$16
Packit Service 82fcde
	addq	$17,8,$17
Packit Service 82fcde
	subq	$20,1,$20
Packit Service 82fcde
	srl	$4,$19,$5
Packit Service 82fcde
	sll	$3,$7,$6
Packit Service 82fcde
	bis	$3,$3,$4
Packit Service 82fcde
	bis	$5,$6,$8
Packit Service 82fcde
	stq	$8,-8($16)
Packit Service 82fcde
	bne	$20,.Loop0
Packit Service 82fcde
Packit Service 82fcde
.L0:	beq	$18,.Lend
Packit Service 82fcde
Packit Service 82fcde
	.align	3
Packit Service 82fcde
.Loop:	ldq	$3,0($17)
Packit Service 82fcde
	addq	$16,32,$16
Packit Service 82fcde
	subq	$18,4,$18
Packit Service 82fcde
	srl	$4,$19,$5
Packit Service 82fcde
	sll	$3,$7,$6
Packit Service 82fcde
Packit Service 82fcde
	ldq	$4,8($17)
Packit Service 82fcde
	srl	$3,$19,$1
Packit Service 82fcde
	bis	$5,$6,$8
Packit Service 82fcde
	stq	$8,-32($16)
Packit Service 82fcde
	sll	$4,$7,$2
Packit Service 82fcde
Packit Service 82fcde
	ldq	$3,16($17)
Packit Service 82fcde
	srl	$4,$19,$5
Packit Service 82fcde
	bis	$1,$2,$8
Packit Service 82fcde
	stq	$8,-24($16)
Packit Service 82fcde
	sll	$3,$7,$6
Packit Service 82fcde
Packit Service 82fcde
	ldq	$4,24($17)
Packit Service 82fcde
	srl	$3,$19,$1
Packit Service 82fcde
	bis	$5,$6,$8
Packit Service 82fcde
	stq	$8,-16($16)
Packit Service 82fcde
	sll	$4,$7,$2
Packit Service 82fcde
Packit Service 82fcde
	addq	$17,32,$17
Packit Service 82fcde
	bis	$1,$2,$8
Packit Service 82fcde
	stq	$8,-8($16)
Packit Service 82fcde
Packit Service 82fcde
	bgt	$18,.Loop
Packit Service 82fcde
Packit Service 82fcde
.Lend:	srl	$4,$19,$8
Packit Service 82fcde
	stq	$8,0($16)
Packit Service 82fcde
	ret	$31,($26),1
Packit Service 82fcde
	.end	__mpn_rshift