Blame mpn/ia64/divrem_1.asm

Packit 5c3484
dnl  IA-64 mpn_divrem_1 and mpn_preinv_divrem_1 -- Divide an mpn number by an
Packit 5c3484
dnl  unnormalized limb.
Packit 5c3484
Packit 5c3484
dnl  Contributed to the GNU project by Torbjorn Granlund.
Packit 5c3484
Packit 5c3484
dnl  Copyright 2002, 2004, 2005 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
dnl  This file is part of the GNU MP Library.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
dnl  it under the terms of either:
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU Lesser General Public License as published by the Free
Packit 5c3484
dnl      Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
dnl      option) any later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU General Public License as published by the Free Software
Packit 5c3484
dnl      Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
dnl      later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or both in parallel, as here.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
dnl  for more details.
Packit 5c3484
dnl
Packit 5c3484
dnl  You should have received copies of the GNU General Public License and the
Packit 5c3484
dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
dnl  see https://www.gnu.org/licenses/.
Packit 5c3484
Packit 5c3484
include(`../config.m4')
Packit 5c3484
Packit 5c3484
Packit 5c3484
C         cycles/limb
Packit 5c3484
C Itanium:    40-42
Packit 5c3484
C Itanium 2:  29-30
Packit 5c3484
Packit 5c3484
C This was generated by gcc, then the loops were optimized.  The preinv entry
Packit 5c3484
C point was shoehorned into the file.  Lots of things outside the loops could
Packit 5c3484
C be streamlined.  It would probably be a good idea to merge the loops for
Packit 5c3484
C normalized and unnormalized divisor, since the shifting stuff is done for
Packit 5c3484
C free in parallel with other operations.  It would even be possible to merge
Packit 5c3484
C all loops, if the ld8 were made conditional.
Packit 5c3484
Packit 5c3484
C TODO
Packit 5c3484
C  * Consider delaying inversion for normalized mpn_divrem_1 entry till after
Packit 5c3484
C    computing leading limb.
Packit 5c3484
C  * Inline and interleave limb inversion code with loop setup code.
Packit 5c3484
Packit 5c3484
ASM_START()
Packit 5c3484
Packit 5c3484
C HP's assembler requires these declarations for importing mpn_invert_limb
Packit 5c3484
	.global	mpn_invert_limb
Packit 5c3484
	.type	mpn_invert_limb,@function
Packit 5c3484
Packit 5c3484
C INPUT PARAMETERS
Packit 5c3484
C rp    = r32
Packit 5c3484
C qxn   = r33
Packit 5c3484
C up    = r34
Packit 5c3484
C n     = r35
Packit 5c3484
C vl    = r36
Packit 5c3484
C vlinv = r37  (preinv only)
Packit 5c3484
C cnt = r38    (preinv only)
Packit 5c3484
Packit 5c3484
PROLOGUE(mpn_preinv_divrem_1)
Packit 5c3484
	.prologue
Packit 5c3484
	.save	ar.pfs, r42
Packit 5c3484
	alloc		r42 = ar.pfs, 7, 8, 1, 0
Packit 5c3484
	.save	ar.lc, r44
Packit 5c3484
	mov		r44 = ar.lc
Packit 5c3484
	.save	rp, r41
Packit 5c3484
	mov		r41 = b0
Packit 5c3484
	.body
Packit 5c3484
ifdef(`HAVE_ABI_32',
Packit 5c3484
`	addp4		r32 = 0, r32
Packit 5c3484
	sxt4		r33 = r33
Packit 5c3484
	addp4		r34 = 0, r34
Packit 5c3484
	sxt4		r35 = r35
Packit 5c3484
	;;
Packit 5c3484
')
Packit 5c3484
	mov		r40 = r38
Packit 5c3484
	shladd		r34 = r35, 3, r34
Packit 5c3484
	;;
Packit 5c3484
	adds		r34 = -8, r34
Packit 5c3484
	;;
Packit 5c3484
	ld8		r39 = [r34], -8
Packit 5c3484
	;;
Packit 5c3484
Packit 5c3484
	add		r15 = r35, r33
Packit 5c3484
	;;
Packit 5c3484
	mov		r8 = r37
Packit 5c3484
	shladd		r32 = r15, 3, r32	C r32 = rp + n + qxn
Packit 5c3484
	cmp.le		p8, p0 = 0, r36
Packit 5c3484
	;;
Packit 5c3484
	adds		r32 = -8, r32		C r32 = rp + n + qxn - 1
Packit 5c3484
	cmp.leu		p6, p7 = r36, r39
Packit 5c3484
   (p8)	br.cond.dpnt	.Lpunnorm
Packit 5c3484
	;;
Packit 5c3484
Packit 5c3484
   (p6)	addl		r15 = 1, r0
Packit 5c3484
   (p7)	mov		r15 = r0
Packit 5c3484
	;;
Packit 5c3484
   (p6)	sub		r38 = r39, r36
Packit 5c3484
   (p7)	mov		r38 = r39
Packit 5c3484
	st8		[r32] = r15, -8
Packit 5c3484
	adds		r35 = -2, r35		C un -= 2
Packit 5c3484
	br	.Lpn
Packit 5c3484
Packit 5c3484
.Lpunnorm:
Packit 5c3484
   (p6)	add		r34 = 8, r34
Packit 5c3484
	mov		r38 = 0			C r = 0
Packit 5c3484
	shl		r36 = r36, r40
Packit 5c3484
   (p6)	br.cond.dptk	.Lpu
Packit 5c3484
	;;
Packit 5c3484
	shl		r38 = r39, r40		C r = ahigh << cnt
Packit 5c3484
	cmp.ne		p8, p0 = 1, r35
Packit 5c3484
	st8		[r32] = r0, -8
Packit 5c3484
	adds		r35 = -1, r35		C un--
Packit 5c3484
   (p8)	br.cond.dpnt	.Lpu
Packit 5c3484
Packit 5c3484
	mov		r23 = 1
Packit 5c3484
	;;
Packit 5c3484
	setf.sig	f6 = r8
Packit 5c3484
	setf.sig	f12 = r23
Packit 5c3484
	br		.L435
Packit 5c3484
EPILOGUE()
Packit 5c3484
Packit 5c3484
Packit 5c3484
PROLOGUE(mpn_divrem_1)
Packit 5c3484
	.prologue
Packit 5c3484
	.save	ar.pfs, r42
Packit 5c3484
	alloc		r42 = ar.pfs, 5, 8, 1, 0
Packit 5c3484
	.save	ar.lc, r44
Packit 5c3484
	mov		r44 = ar.lc
Packit 5c3484
	.save	rp, r41
Packit 5c3484
	mov		r41 = b0
Packit 5c3484
	.body
Packit 5c3484
ifdef(`HAVE_ABI_32',
Packit 5c3484
`	addp4		r32 = 0, r32
Packit 5c3484
	sxt4		r33 = r33
Packit 5c3484
	addp4		r34 = 0, r34
Packit 5c3484
	sxt4		r35 = r35
Packit 5c3484
	;;
Packit 5c3484
')
Packit 5c3484
	mov		r38 = r0
Packit 5c3484
	add		r15 = r35, r33
Packit 5c3484
	;;
Packit 5c3484
	cmp.ne		p6, p7 = 0, r15
Packit 5c3484
	;;
Packit 5c3484
   (p7)	mov		r8 = r0
Packit 5c3484
   (p7)	br.cond.dpnt	.Lret
Packit 5c3484
	shladd		r14 = r15, 3, r32	C r14 = rp + n + qxn
Packit 5c3484
	cmp.le		p6, p7 = 0, r36
Packit 5c3484
	;;
Packit 5c3484
	adds		r32 = -8, r14		C r32 = rp + n + qxn - 1
Packit 5c3484
   (p6)	br.cond.dpnt	.Lunnorm
Packit 5c3484
	cmp.eq		p6, p7 = 0, r35
Packit 5c3484
   (p6)	br.cond.dpnt	.L179
Packit 5c3484
	shladd		r14 = r35, 3, r34
Packit 5c3484
	;;
Packit 5c3484
	adds		r14 = -8, r14
Packit 5c3484
	adds		r35 = -1, r35
Packit 5c3484
	;;
Packit 5c3484
	ld8		r38 = [r14]
Packit 5c3484
	;;
Packit 5c3484
	cmp.leu		p6, p7 = r36, r38
Packit 5c3484
	;;
Packit 5c3484
   (p6)	addl		r15 = 1, r0
Packit 5c3484
   (p7)	mov		r15 = r0
Packit 5c3484
	;;
Packit 5c3484
	st8		[r32] = r15, -8
Packit 5c3484
  (p6)	sub		r38 = r38, r36
Packit 5c3484
Packit 5c3484
.L179:
Packit 5c3484
	mov		r45 = r36
Packit 5c3484
	adds		r35 = -1, r35
Packit 5c3484
	br.call.sptk.many b0 = mpn_invert_limb
Packit 5c3484
	;;
Packit 5c3484
	shladd		r34 = r35, 3, r34
Packit 5c3484
.Lpn:
Packit 5c3484
	mov		r23 = 1
Packit 5c3484
	;;
Packit 5c3484
	setf.sig	f6 = r8
Packit 5c3484
	setf.sig	f12 = r23
Packit 5c3484
	cmp.le		p6, p7 = 0, r35
Packit 5c3484
	mov		r40 = 0
Packit 5c3484
   (p7)	br.cond.dpnt	.L435
Packit 5c3484
	setf.sig	f10 = r36
Packit 5c3484
	mov		ar.lc = r35
Packit 5c3484
	setf.sig	f7 = r38
Packit 5c3484
	;;
Packit 5c3484
	sub		r28 = -1, r36
Packit 5c3484
C Develop quotient limbs for normalized divisor
Packit 5c3484
.Loop1:		C 00				C q=r18 nh=r38/f7
Packit 5c3484
	ld8		r20 = [r34], -8
Packit 5c3484
	xma.hu		f11 = f7, f6, f0
Packit 5c3484
	;;	C 04
Packit 5c3484
	xma.l		f8 = f11, f12, f7	C q = q + nh
Packit 5c3484
	;;	C 08
Packit 5c3484
	getf.sig	r18 = f8
Packit 5c3484
	xma.hu		f9 = f8, f10, f0
Packit 5c3484
	xma.l		f8 = f8, f10, f0
Packit 5c3484
	;;	C 12
Packit 5c3484
	getf.sig	r16 = f9
Packit 5c3484
		C 13
Packit 5c3484
	getf.sig	r15 = f8
Packit 5c3484
	;;	C 18
Packit 5c3484
	cmp.ltu		p6, p7 = r20, r15
Packit 5c3484
	sub		r15 = r20, r15
Packit 5c3484
	sub		r16 = r38, r16
Packit 5c3484
	;;	C 19
Packit 5c3484
   (p6)	cmp.ne		p8, p9 = 1, r16		C is rH != 0?
Packit 5c3484
   (p7)	cmp.ne		p8, p9 = 0, r16		C is rH != 0?
Packit 5c3484
   (p6)	add		r16 = -1, r16
Packit 5c3484
   (p0)	cmp.ne.unc	p6, p7 = r0, r0
Packit 5c3484
	;;	C 20
Packit 5c3484
   (p8)	cmp.ltu		p6, p7 = r15, r36
Packit 5c3484
   (p8)	sub		r15 = r15, r36
Packit 5c3484
   (p8)	add		r18 = 1, r18		C q = q + 1;	done if: rH > 0
Packit 5c3484
	;;	C 21
Packit 5c3484
	.pred.rel "mutex",p6,p7
Packit 5c3484
   (p6)	cmp.ne		p8, p9 = 1, r16		C is rH != 0 still?
Packit 5c3484
   (p7)	cmp.ne		p8, p9 = 0, r16		C is rH != 0 still?
Packit 5c3484
	cmp.ltu		p6, p7 = r15, r36	C speculative
Packit 5c3484
	sub		r28 = r15, r36		C speculative, just for cmp
Packit 5c3484
	;;	C 22
Packit 5c3484
   (p8)	cmp.ltu		p6, p7 = r28, r36	C redo last cmp if needed
Packit 5c3484
   (p8)	mov		r15 = r28
Packit 5c3484
   (p8)	add		r18 = 1, r18		C q = q + 1;	done if: rH > 0
Packit 5c3484
	;;	C 23
Packit 5c3484
   (p6)	setf.sig	f7 = r15
Packit 5c3484
   (p7)	sub		r15 = r15, r36
Packit 5c3484
   (p7)	add		r18 = 1, r18		C q = q + 1;	done if: rH > 0
Packit 5c3484
	;;	C 24
Packit 5c3484
   (p7)	setf.sig	f7 = r15
Packit 5c3484
	st8		[r32] = r18, -8
Packit 5c3484
	mov		r38 = r15
Packit 5c3484
	br.cloop.dptk	.Loop1
Packit 5c3484
		C 29/30
Packit 5c3484
	br.sptk		.L435
Packit 5c3484
	;;
Packit 5c3484
.Lunnorm:
Packit 5c3484
	mux1		r16 = r36, @rev
Packit 5c3484
	cmp.eq		p6, p7 = 0, r35
Packit 5c3484
   (p6)	br.cond.dpnt	.L322
Packit 5c3484
	shladd		r34 = r35, 3, r34
Packit 5c3484
	;;
Packit 5c3484
	adds		r34 = -8, r34
Packit 5c3484
	;;
Packit 5c3484
	ld8		r39 = [r34]
Packit 5c3484
	;;
Packit 5c3484
	cmp.leu		p6, p7 = r36, r39
Packit 5c3484
   (p6)	br.cond.dptk	.L322
Packit 5c3484
	adds		r34 = -8, r34
Packit 5c3484
	;;
Packit 5c3484
	mov		r38 = r39
Packit 5c3484
	;;
Packit 5c3484
	cmp.ne		p6, p7 = 1, r15
Packit 5c3484
	st8		[r32] = r0, -8
Packit 5c3484
	;;
Packit 5c3484
   (p7)	mov		r8 = r38
Packit 5c3484
   (p7)	br.cond.dpnt	.Lret
Packit 5c3484
	adds		r35 = -1, r35
Packit 5c3484
.L322:
Packit 5c3484
	sub		r14 = r0, r16
Packit 5c3484
	;;
Packit 5c3484
	or		r14 = r16, r14
Packit 5c3484
	;;
Packit 5c3484
	mov		r16 = -8
Packit 5c3484
	czx1.l		r14 = r14
Packit 5c3484
	;;
Packit 5c3484
	shladd		r16 = r14, 3, r16
Packit 5c3484
	;;
Packit 5c3484
	shr.u		r14 = r36, r16
Packit 5c3484
	;;
Packit 5c3484
	cmp.geu		p6, p7 = 15, r14
Packit 5c3484
	;;
Packit 5c3484
   (p7)	shr.u		r14 = r14, 4
Packit 5c3484
   (p7)	adds		r16 = 4, r16
Packit 5c3484
	;;
Packit 5c3484
	cmp.geu		p6, p7 = 3, r14
Packit 5c3484
	;;
Packit 5c3484
   (p7)	shr.u		r14 = r14, 2
Packit 5c3484
   (p7)	adds		r16 = 2, r16
Packit 5c3484
	;;
Packit 5c3484
	tbit.nz		p6, p7 = r14, 1
Packit 5c3484
	;;
Packit 5c3484
	.pred.rel "mutex",p6,p7
Packit 5c3484
  (p6)	sub		r40 = 62, r16
Packit 5c3484
  (p7)	sub		r40 = 63, r16
Packit 5c3484
	;;
Packit 5c3484
	shl		r45 = r36, r40
Packit 5c3484
	shl		r36 = r36, r40
Packit 5c3484
	shl		r38 = r38, r40
Packit 5c3484
	br.call.sptk.many b0 = mpn_invert_limb
Packit 5c3484
	;;
Packit 5c3484
.Lpu:
Packit 5c3484
	mov		r23 = 1
Packit 5c3484
	;;
Packit 5c3484
	setf.sig	f6 = r8
Packit 5c3484
	setf.sig	f12 = r23
Packit 5c3484
	cmp.eq		p6, p7 = 0, r35
Packit 5c3484
   (p6)	br.cond.dpnt	.L435
Packit 5c3484
	sub		r16 = 64, r40
Packit 5c3484
	adds		r35 = -2, r35
Packit 5c3484
	;;
Packit 5c3484
	ld8		r39 = [r34], -8
Packit 5c3484
	cmp.le		p6, p7 = 0, r35
Packit 5c3484
	;;
Packit 5c3484
	shr.u		r14 = r39, r16
Packit 5c3484
	;;
Packit 5c3484
	or		r38 = r14, r38
Packit 5c3484
   (p7)	br.cond.dpnt	.Lend3
Packit 5c3484
	;;
Packit 5c3484
	mov		r22 = r16
Packit 5c3484
	setf.sig	f10 = r36
Packit 5c3484
	setf.sig	f7 = r38
Packit 5c3484
	mov		ar.lc = r35
Packit 5c3484
	;;
Packit 5c3484
C Develop quotient limbs for unnormalized divisor
Packit 5c3484
.Loop3:
Packit 5c3484
	ld8		r14 = [r34], -8
Packit 5c3484
	xma.hu		f11 = f7, f6, f0
Packit 5c3484
	;;
Packit 5c3484
	xma.l		f8 = f11, f12, f7	C q = q + nh
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r18 = f8
Packit 5c3484
	xma.hu		f9 = f8, f10, f0
Packit 5c3484
	shl		r20 = r39, r40
Packit 5c3484
	xma.l		f8 = f8, f10, f0
Packit 5c3484
	shr.u		r24 = r14, r22
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r16 = f9
Packit 5c3484
	getf.sig	r15 = f8
Packit 5c3484
	or		r20 = r24, r20
Packit 5c3484
	;;
Packit 5c3484
	cmp.ltu		p6, p7 = r20, r15
Packit 5c3484
	sub		r15 = r20, r15
Packit 5c3484
	sub		r16 = r38, r16
Packit 5c3484
	;;
Packit 5c3484
   (p6)	cmp.ne		p8, p9 = 1, r16		C is rH != 0?
Packit 5c3484
   (p7)	cmp.ne		p8, p9 = 0, r16		C is rH != 0?
Packit 5c3484
   (p6)	add		r16 = -1, r16
Packit 5c3484
   (p0)	cmp.ne.unc	p6, p7 = r0, r0
Packit 5c3484
	;;
Packit 5c3484
   (p8)	cmp.ltu		p6, p7 = r15, r36
Packit 5c3484
   (p8)	sub		r15 = r15, r36
Packit 5c3484
   (p8)	add		r18 = 1, r18		C q = q + 1;	done if: rH > 0
Packit 5c3484
	;;
Packit 5c3484
	.pred.rel "mutex",p6,p7
Packit 5c3484
   (p6)	cmp.ne		p8, p9 = 1, r16		C is rH != 0 still?
Packit 5c3484
   (p7)	cmp.ne		p8, p9 = 0, r16		C is rH != 0 still?
Packit 5c3484
	cmp.ltu		p6, p7 = r15, r36	C speculative
Packit 5c3484
	sub		r28 = r15, r36		C speculative, just for cmp
Packit 5c3484
	;;
Packit 5c3484
   (p8)	cmp.ltu		p6, p7 = r28, r36	C redo last cmp if needed
Packit 5c3484
   (p8)	mov		r15 = r28
Packit 5c3484
   (p8)	add		r18 = 1, r18		C q = q + 1;	done if: rH > 0
Packit 5c3484
	;;
Packit 5c3484
   (p6)	setf.sig	f7 = r15
Packit 5c3484
   (p7)	sub		r15 = r15, r36
Packit 5c3484
   (p7)	add		r18 = 1, r18		C q = q + 1;	done if: rH > 0
Packit 5c3484
	;;
Packit 5c3484
   (p7)	setf.sig	f7 = r15
Packit 5c3484
	st8		[r32] = r18, -8
Packit 5c3484
	mov		r39 = r14
Packit 5c3484
	mov		r38 = r15
Packit 5c3484
	br.cloop.dptk	.Loop3
Packit 5c3484
	;;
Packit 5c3484
.Lend3:
Packit 5c3484
	setf.sig	f10 = r36
Packit 5c3484
	setf.sig	f7 = r38
Packit 5c3484
	;;
Packit 5c3484
	xma.hu		f11 = f7, f6, f0
Packit 5c3484
	;;
Packit 5c3484
	xma.l		f8 = f11, f12, f7	C q = q + nh
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r18 = f8
Packit 5c3484
	xma.hu		f9 = f8, f10, f0
Packit 5c3484
	shl		r20 = r39, r40
Packit 5c3484
	xma.l		f8 = f8, f10, f0
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r16 = f9
Packit 5c3484
	getf.sig	r15 = f8
Packit 5c3484
	;;
Packit 5c3484
	cmp.ltu		p6, p7 = r20, r15
Packit 5c3484
	sub		r15 = r20, r15
Packit 5c3484
	sub		r16 = r38, r16
Packit 5c3484
	;;
Packit 5c3484
   (p6)	cmp.ne		p8, p9 = 1, r16		C is rH != 0?
Packit 5c3484
   (p7)	cmp.ne		p8, p9 = 0, r16		C is rH != 0?
Packit 5c3484
   (p6)	add		r16 = -1, r16
Packit 5c3484
   (p0)	cmp.ne.unc	p6, p7 = r0, r0
Packit 5c3484
	;;
Packit 5c3484
   (p8)	cmp.ltu		p6, p7 = r15, r36
Packit 5c3484
   (p8)	sub		r15 = r15, r36
Packit 5c3484
   (p8)	add		r18 = 1, r18		C q = q + 1;	done if: rH > 0
Packit 5c3484
	;;
Packit 5c3484
	.pred.rel "mutex",p6,p7
Packit 5c3484
   (p6)	cmp.ne		p8, p9 = 1, r16		C is rH != 0 still?
Packit 5c3484
   (p7)	cmp.ne		p8, p9 = 0, r16		C is rH != 0 still?
Packit 5c3484
	;;
Packit 5c3484
   (p8)	sub		r15 = r15, r36
Packit 5c3484
   (p8)	add		r18 = 1, r18		C q = q + 1;	done if: rH > 0
Packit 5c3484
	;;
Packit 5c3484
	cmp.ltu		p6, p7 = r15, r36
Packit 5c3484
	;;
Packit 5c3484
   (p7)	sub		r15 = r15, r36
Packit 5c3484
   (p7)	add		r18 = 1, r18		C q = q + 1;	done if: rH > 0
Packit 5c3484
	;;
Packit 5c3484
	st8		[r32] = r18, -8
Packit 5c3484
	mov		r38 = r15
Packit 5c3484
.L435:
Packit 5c3484
	adds		r35 = -1, r33
Packit 5c3484
	cmp.le		p6, p7 = 1, r33
Packit 5c3484
   (p7)	br.cond.dpnt	.Lend4
Packit 5c3484
	;;
Packit 5c3484
	setf.sig	f7 = r38
Packit 5c3484
	setf.sig	f10 = r36
Packit 5c3484
	mov		ar.lc = r35
Packit 5c3484
	;;
Packit 5c3484
.Loop4:
Packit 5c3484
	xma.hu		f11 = f7, f6, f0
Packit 5c3484
	;;
Packit 5c3484
	xma.l		f8 = f11, f12, f7	C q = q + nh
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r18 = f8
Packit 5c3484
	xma.hu		f9 = f8, f10, f0
Packit 5c3484
	xma.l		f8 = f8, f10, f0
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r16 = f9
Packit 5c3484
	getf.sig	r15 = f8
Packit 5c3484
	;;
Packit 5c3484
	cmp.ltu		p6, p7 = 0, r15
Packit 5c3484
	sub		r15 = 0, r15
Packit 5c3484
	sub		r16 = r38, r16
Packit 5c3484
	;;
Packit 5c3484
   (p6)	cmp.ne		p8, p9 = 1, r16		C is rH != 0?
Packit 5c3484
   (p7)	cmp.ne		p8, p9 = 0, r16		C is rH != 0?
Packit 5c3484
   (p6)	add		r16 = -1, r16
Packit 5c3484
   (p0)	cmp.ne.unc	p6, p7 = r0, r0
Packit 5c3484
	;;
Packit 5c3484
   (p8)	cmp.ltu		p6, p7 = r15, r36
Packit 5c3484
   (p8)	sub		r15 = r15, r36
Packit 5c3484
   (p8)	add		r18 = 1, r18		C q = q + 1;	done if: rH > 0
Packit 5c3484
	;;
Packit 5c3484
	.pred.rel "mutex",p6,p7
Packit 5c3484
   (p6)	cmp.ne		p8, p9 = 1, r16		C is rH != 0 still?
Packit 5c3484
   (p7)	cmp.ne		p8, p9 = 0, r16		C is rH != 0 still?
Packit 5c3484
	cmp.ltu		p6, p7 = r15, r36	C speculative
Packit 5c3484
	sub		r28 = r15, r36		C speculative, just for cmp
Packit 5c3484
	;;
Packit 5c3484
   (p8)	cmp.ltu		p6, p7 = r28, r36	C redo last cmp if needed
Packit 5c3484
   (p8)	mov		r15 = r28
Packit 5c3484
   (p8)	add		r18 = 1, r18		C q = q + 1;	done if: rH > 0
Packit 5c3484
	;;
Packit 5c3484
   (p6)	setf.sig	f7 = r15
Packit 5c3484
   (p7)	sub		r15 = r15, r36
Packit 5c3484
   (p7)	add		r18 = 1, r18		C q = q + 1;	done if: rH > 0
Packit 5c3484
	;;
Packit 5c3484
   (p7)	setf.sig	f7 = r15
Packit 5c3484
	st8		[r32] = r18, -8
Packit 5c3484
	mov		r38 = r15
Packit 5c3484
	br.cloop.dptk	.Loop4
Packit 5c3484
	;;
Packit 5c3484
.Lend4:
Packit 5c3484
	shr.u		r8 = r38, r40
Packit 5c3484
.Lret:
Packit 5c3484
	mov		ar.pfs = r42
Packit 5c3484
	mov		ar.lc = r44
Packit 5c3484
	mov		b0 = r41
Packit 5c3484
	br.ret.sptk.many b0
Packit 5c3484
EPILOGUE()
Packit 5c3484
ASM_END()