Blame mpn/ia64/submul_1.asm

Packit 5c3484
dnl  IA-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
Packit 5c3484
dnl  result from a second limb vector.
Packit 5c3484
Packit 5c3484
dnl  Contributed to the GNU project by Torbjorn Granlund.
Packit 5c3484
Packit 5c3484
dnl  Copyright 2000-2004 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
dnl  This file is part of the GNU MP Library.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
dnl  it under the terms of either:
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU Lesser General Public License as published by the Free
Packit 5c3484
dnl      Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
dnl      option) any later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU General Public License as published by the Free Software
Packit 5c3484
dnl      Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
dnl      later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or both in parallel, as here.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
dnl  for more details.
Packit 5c3484
dnl
Packit 5c3484
dnl  You should have received copies of the GNU General Public License and the
Packit 5c3484
dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
dnl  see https://www.gnu.org/licenses/.
Packit 5c3484
Packit 5c3484
include(`../config.m4')
Packit 5c3484
Packit 5c3484
C         cycles/limb
Packit 5c3484
C Itanium:    4.0
Packit 5c3484
C Itanium 2:  2.25 (alignment dependent, sometimes it seems to need 3 c/l)
Packit 5c3484
Packit 5c3484
C TODO
Packit 5c3484
C  * Optimize feed-in and wind-down code, both for speed and code size.
Packit 5c3484
C  * Handle low limb input and results specially, using a common stf8 in the
Packit 5c3484
C    epilogue.
Packit 5c3484
C  * Delay r8, r10 initialization, put cmp-p6 in 1st bundle and br .Ldone in
Packit 5c3484
C    2nd bundle.  This will allow the bbb bundle to be one cycle earlier and
Packit 5c3484
C    save a cycle.
Packit 5c3484
Packit 5c3484
C INPUT PARAMETERS
Packit 5c3484
define(`rp', `r32')
Packit 5c3484
define(`up', `r33')
Packit 5c3484
define(`n',  `r34')
Packit 5c3484
define(`vl', `r35')
Packit 5c3484
Packit 5c3484
ASM_START()
Packit 5c3484
PROLOGUE(mpn_submul_1)
Packit 5c3484
	.prologue
Packit 5c3484
	.save	ar.lc, r2
Packit 5c3484
	.body
Packit 5c3484
Packit 5c3484
ifdef(`HAVE_ABI_32',
Packit 5c3484
`	addp4		rp = 0, rp		C M I
Packit 5c3484
	addp4		up = 0, up		C M I
Packit 5c3484
	zxt4		n = n			C I
Packit 5c3484
	;;
Packit 5c3484
')
Packit 5c3484
{.mmi
Packit 5c3484
	mov		r10 = rp		C M I
Packit 5c3484
	mov		r9 = up			C M I
Packit 5c3484
	sub		vl = r0, vl		C M I	negate vl
Packit 5c3484
}
Packit 5c3484
{.mmi
Packit 5c3484
	ldf8		f8 = [rp], 8		C M
Packit 5c3484
	ldf8		f7 = [up], 8		C M
Packit 5c3484
	add		r19 = -1, n		C M I	n - 1
Packit 5c3484
	;;
Packit 5c3484
}
Packit 5c3484
{.mmi
Packit 5c3484
	cmp.eq		p6, p0 = 0, vl		C M I
Packit 5c3484
	mov		r8 = 0			C M I	zero cylimb
Packit 5c3484
	mov		r2 = ar.lc		C I0
Packit 5c3484
}
Packit 5c3484
{.mmi
Packit 5c3484
	setf.sig	f6 = vl			C M2 M3
Packit 5c3484
	and		r14 = 3, n		C M I
Packit 5c3484
	shr.u		r19 = r19, 2		C I0
Packit 5c3484
	;;
Packit 5c3484
}
Packit 5c3484
{.mmb
Packit 5c3484
	nop		0
Packit 5c3484
	cmp.eq		p10, p0 = 0, r14	C M I
Packit 5c3484
   (p6)	br.spnt		.Ldone			C B	vl == 0
Packit 5c3484
}
Packit 5c3484
{.mmi
Packit 5c3484
	cmp.eq		p11, p0 = 2, r14	C M I
Packit 5c3484
	cmp.eq		p12, p0 = 3, r14	C M I
Packit 5c3484
	mov		ar.lc = r19		C I0
Packit 5c3484
}
Packit 5c3484
{.bbb
Packit 5c3484
  (p10)	br.dptk		.Lb00			C B
Packit 5c3484
  (p11)	br.dptk		.Lb10			C B
Packit 5c3484
  (p12)	br.dptk		.Lb11			C B
Packit 5c3484
	;;
Packit 5c3484
}
Packit 5c3484
Packit 5c3484
.Lb01:	br.cloop.dptk	.grt1
Packit 5c3484
Packit 5c3484
	xma.l		f39 = f7, f6, f8
Packit 5c3484
	xma.hu		f43 = f7, f6, f8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	getf.sig	r31 = f43			C hi
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	br		.Lcj1
Packit 5c3484
Packit 5c3484
.grt1:	ldf8		f44 = [rp], 8
Packit 5c3484
	ldf8		f32 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f45 = [rp], 8
Packit 5c3484
	ldf8		f33 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f46 = [rp], 8
Packit 5c3484
	xma.l		f39 = f7, f6, f8
Packit 5c3484
	ldf8		f34 = [up], 8
Packit 5c3484
	xma.hu		f43 = f7, f6, f8
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f47 = [rp], 8
Packit 5c3484
	xma.l		f36 = f32, f6, f44
Packit 5c3484
	ldf8		f35 = [up], 8
Packit 5c3484
	xma.hu		f40 = f32, f6, f44
Packit 5c3484
	br.cloop.dptk	.grt5
Packit 5c3484
	;;
Packit 5c3484
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	xma.l		f37 = f33, f6, f45
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	xma.hu		f41 = f33, f6, f45
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r31 = f43			C hi
Packit 5c3484
	getf.sig	r24 = f36			C lo
Packit 5c3484
	xma.l		f38 = f34, f6, f46
Packit 5c3484
	ld8		r21 = [r9], 8
Packit 5c3484
	xma.hu		f42 = f34, f6, f46
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r28 = f40			C hi
Packit 5c3484
	getf.sig	r25 = f37			C lo
Packit 5c3484
	xma.l		f39 = f35, f6, f47
Packit 5c3484
	ld8		r22 = [r9], 8
Packit 5c3484
	xma.hu		f43 = f35, f6, f47
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r29 = f41			C hi
Packit 5c3484
	getf.sig	r26 = f38			C lo
Packit 5c3484
	ld8		r23 = [r9], 8
Packit 5c3484
	br		.Lcj5
Packit 5c3484
Packit 5c3484
.grt5:	ldf8		f44 = [rp], 8
Packit 5c3484
	ldf8		f32 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	xma.l		f37 = f33, f6, f45
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	xma.hu		f41 = f33, f6, f45
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f45 = [rp], 8
Packit 5c3484
	getf.sig	r31 = f43			C hi
Packit 5c3484
	ldf8		f33 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r24 = f36			C lo
Packit 5c3484
	xma.l		f38 = f34, f6, f46
Packit 5c3484
	ld8		r21 = [r9], 8
Packit 5c3484
	xma.hu		f42 = f34, f6, f46
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f46 = [rp], 8
Packit 5c3484
	getf.sig	r28 = f40			C hi
Packit 5c3484
	ldf8		f34 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r25 = f37			C lo
Packit 5c3484
	xma.l		f39 = f35, f6, f47
Packit 5c3484
	ld8		r22 = [r9], 8
Packit 5c3484
	xma.hu		f43 = f35, f6, f47
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f47 = [rp], 8
Packit 5c3484
	getf.sig	r29 = f41			C hi
Packit 5c3484
	ldf8		f35 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r26 = f38			C lo
Packit 5c3484
	xma.l		f36 = f32, f6, f44
Packit 5c3484
	ld8		r23 = [r9], 8
Packit 5c3484
	xma.hu		f40 = f32, f6, f44
Packit 5c3484
	br.cloop.dptk	.Loop
Packit 5c3484
	br		.Lend
Packit 5c3484
Packit 5c3484
Packit 5c3484
.Lb10:	ldf8		f47 = [rp], 8
Packit 5c3484
	ldf8		f35 = [up], 8
Packit 5c3484
	br.cloop.dptk	.grt2
Packit 5c3484
Packit 5c3484
	xma.l		f38 = f7, f6, f8
Packit 5c3484
	xma.hu		f42 = f7, f6, f8
Packit 5c3484
	;;
Packit 5c3484
	xma.l		f39 = f35, f6, f47
Packit 5c3484
	xma.hu		f43 = f35, f6, f47
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r26 = f38			C lo
Packit 5c3484
	getf.sig	r30 = f42			C hi
Packit 5c3484
	ld8		r23 = [r9], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	getf.sig	r31 = f43			C hi
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	br		.Lcj2
Packit 5c3484
Packit 5c3484
.grt2:	ldf8		f44 = [rp], 8
Packit 5c3484
	ldf8		f32 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f45 = [rp], 8
Packit 5c3484
	ldf8		f33 = [up], 8
Packit 5c3484
	xma.l		f38 = f7, f6, f8
Packit 5c3484
	xma.hu		f42 = f7, f6, f8
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f46 = [rp], 8
Packit 5c3484
	ldf8		f34 = [up], 8
Packit 5c3484
	xma.l		f39 = f35, f6, f47
Packit 5c3484
	xma.hu		f43 = f35, f6, f47
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f47 = [rp], 8
Packit 5c3484
	ldf8		f35 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r26 = f38			C lo
Packit 5c3484
	xma.l		f36 = f32, f6, f44
Packit 5c3484
	ld8		r23 = [r9], 8
Packit 5c3484
	xma.hu		f40 = f32, f6, f44
Packit 5c3484
	br.cloop.dptk	.grt6
Packit 5c3484
Packit 5c3484
	getf.sig	r30 = f42			C hi
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	xma.l		f37 = f33, f6, f45
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	xma.hu		f41 = f33, f6, f45
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r31 = f43			C hi
Packit 5c3484
	getf.sig	r24 = f36			C lo
Packit 5c3484
	xma.l		f38 = f34, f6, f46
Packit 5c3484
	ld8		r21 = [r9], 8
Packit 5c3484
	xma.hu		f42 = f34, f6, f46
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r28 = f40			C hi
Packit 5c3484
	getf.sig	r25 = f37			C lo
Packit 5c3484
	xma.l		f39 = f35, f6, f47
Packit 5c3484
	ld8		r22 = [r9], 8
Packit 5c3484
	xma.hu		f43 = f35, f6, f47
Packit 5c3484
	br		.Lcj6
Packit 5c3484
Packit 5c3484
.grt6:	ldf8		f44 = [rp], 8
Packit 5c3484
	getf.sig	r30 = f42			C hi
Packit 5c3484
	ldf8		f32 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	xma.l		f37 = f33, f6, f45
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	xma.hu		f41 = f33, f6, f45
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f45 = [rp], 8
Packit 5c3484
	getf.sig	r31 = f43			C hi
Packit 5c3484
	ldf8		f33 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r24 = f36			C lo
Packit 5c3484
	xma.l		f38 = f34, f6, f46
Packit 5c3484
	ld8		r21 = [r9], 8
Packit 5c3484
	xma.hu		f42 = f34, f6, f46
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f46 = [rp], 8
Packit 5c3484
	getf.sig	r28 = f40			C hi
Packit 5c3484
	ldf8		f34 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r25 = f37			C lo
Packit 5c3484
	xma.l		f39 = f35, f6, f47
Packit 5c3484
	ld8		r22 = [r9], 8
Packit 5c3484
	xma.hu		f43 = f35, f6, f47
Packit 5c3484
	br		.LL10
Packit 5c3484
Packit 5c3484
Packit 5c3484
.Lb11:	ldf8		f46 = [rp], 8
Packit 5c3484
	ldf8		f34 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f47 = [rp], 8
Packit 5c3484
	ldf8		f35 = [up], 8
Packit 5c3484
	br.cloop.dptk	.grt3
Packit 5c3484
Packit 5c3484
	xma.l		f37 = f7, f6, f8
Packit 5c3484
	xma.hu		f41 = f7, f6, f8
Packit 5c3484
	;;
Packit 5c3484
	xma.l		f38 = f34, f6, f46
Packit 5c3484
	xma.hu		f42 = f34, f6, f46
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r25 = f37			C lo
Packit 5c3484
	xma.l		f39 = f35, f6, f47
Packit 5c3484
	xma.hu		f43 = f35, f6, f47
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r29 = f41			C hi
Packit 5c3484
	ld8		r22 = [r9], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r26 = f38			C lo
Packit 5c3484
	getf.sig	r30 = f42			C hi
Packit 5c3484
	ld8		r23 = [r9], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	getf.sig	r31 = f43			C hi
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	br		.Lcj3
Packit 5c3484
Packit 5c3484
.grt3:	ldf8		f44 = [rp], 8
Packit 5c3484
	xma.l		f37 = f7, f6, f8
Packit 5c3484
	ldf8		f32 = [up], 8
Packit 5c3484
	xma.hu		f41 = f7, f6, f8
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f45 = [rp], 8
Packit 5c3484
	xma.l		f38 = f34, f6, f46
Packit 5c3484
	ldf8		f33 = [up], 8
Packit 5c3484
	xma.hu		f42 = f34, f6, f46
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f46 = [rp], 8
Packit 5c3484
	ldf8		f34 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r25 = f37			C lo
Packit 5c3484
	xma.l		f39 = f35, f6, f47
Packit 5c3484
	ld8		r22 = [r9], 8
Packit 5c3484
	xma.hu		f43 = f35, f6, f47
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f47 = [rp], 8
Packit 5c3484
	getf.sig	r29 = f41			C hi
Packit 5c3484
	ldf8		f35 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r26 = f38			C lo
Packit 5c3484
	xma.l		f36 = f32, f6, f44
Packit 5c3484
	ld8		r23 = [r9], 8
Packit 5c3484
	xma.hu		f40 = f32, f6, f44
Packit 5c3484
	br.cloop.dptk	.grt7
Packit 5c3484
	;;
Packit 5c3484
Packit 5c3484
	getf.sig	r30 = f42			C hi
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	xma.l		f37 = f33, f6, f45
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	xma.hu		f41 = f33, f6, f45
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r31 = f43			C hi
Packit 5c3484
	getf.sig	r24 = f36			C lo
Packit 5c3484
	xma.l		f38 = f34, f6, f46
Packit 5c3484
	ld8		r21 = [r9], 8
Packit 5c3484
	xma.hu		f42 = f34, f6, f46
Packit 5c3484
	br		.Lcj7
Packit 5c3484
Packit 5c3484
.grt7:	ldf8		f44 = [rp], 8
Packit 5c3484
	getf.sig	r30 = f42			C hi
Packit 5c3484
	ldf8		f32 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	xma.l		f37 = f33, f6, f45
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	xma.hu		f41 = f33, f6, f45
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f45 = [rp], 8
Packit 5c3484
	getf.sig	r31 = f43			C hi
Packit 5c3484
	ldf8		f33 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r24 = f36			C lo
Packit 5c3484
	xma.l		f38 = f34, f6, f46
Packit 5c3484
	ld8		r21 = [r9], 8
Packit 5c3484
	xma.hu		f42 = f34, f6, f46
Packit 5c3484
	br		.LL11
Packit 5c3484
Packit 5c3484
Packit 5c3484
.Lb00:	ldf8		f45 = [rp], 8
Packit 5c3484
	ldf8		f33 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f46 = [rp], 8
Packit 5c3484
	ldf8		f34 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f47 = [rp], 8
Packit 5c3484
	xma.l		f36 = f7, f6, f8
Packit 5c3484
	ldf8		f35 = [up], 8
Packit 5c3484
	xma.hu		f40 = f7, f6, f8
Packit 5c3484
	br.cloop.dptk	.grt4
Packit 5c3484
Packit 5c3484
	xma.l		f37 = f33, f6, f45
Packit 5c3484
	xma.hu		f41 = f33, f6, f45
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r24 = f36			C lo
Packit 5c3484
	xma.l		f38 = f34, f6, f46
Packit 5c3484
	ld8		r21 = [r9], 8
Packit 5c3484
	xma.hu		f42 = f34, f6, f46
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r28 = f40			C hi
Packit 5c3484
	xma.l		f39 = f35, f6, f47
Packit 5c3484
	getf.sig	r25 = f37			C lo
Packit 5c3484
	ld8		r22 = [r9], 8
Packit 5c3484
	xma.hu		f43 = f35, f6, f47
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r29 = f41			C hi
Packit 5c3484
	getf.sig	r26 = f38			C lo
Packit 5c3484
	ld8		r23 = [r9], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r30 = f42			C hi
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	br		.Lcj4
Packit 5c3484
Packit 5c3484
.grt4:	ldf8		f44 = [rp], 8
Packit 5c3484
	xma.l		f37 = f33, f6, f45
Packit 5c3484
	ldf8		f32 = [up], 8
Packit 5c3484
	xma.hu		f41 = f33, f6, f45
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f45 = [rp], 8
Packit 5c3484
	ldf8		f33 = [up], 8
Packit 5c3484
	xma.l		f38 = f34, f6, f46
Packit 5c3484
	getf.sig	r24 = f36			C lo
Packit 5c3484
	ld8		r21 = [r9], 8
Packit 5c3484
	xma.hu		f42 = f34, f6, f46
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f46 = [rp], 8
Packit 5c3484
	getf.sig	r28 = f40			C hi
Packit 5c3484
	ldf8		f34 = [up], 8
Packit 5c3484
	xma.l		f39 = f35, f6, f47
Packit 5c3484
	getf.sig	r25 = f37			C lo
Packit 5c3484
	ld8		r22 = [r9], 8
Packit 5c3484
	xma.hu		f43 = f35, f6, f47
Packit 5c3484
	;;
Packit 5c3484
	ldf8		f47 = [rp], 8
Packit 5c3484
	getf.sig	r29 = f41			C hi
Packit 5c3484
	ldf8		f35 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r26 = f38			C lo
Packit 5c3484
	xma.l		f36 = f32, f6, f44
Packit 5c3484
	ld8		r23 = [r9], 8
Packit 5c3484
	xma.hu		f40 = f32, f6, f44
Packit 5c3484
	br.cloop.dptk	.grt8
Packit 5c3484
	;;
Packit 5c3484
Packit 5c3484
	getf.sig	r30 = f42			C hi
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	xma.l		f37 = f33, f6, f45
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	xma.hu		f41 = f33, f6, f45
Packit 5c3484
	br		.Lcj8
Packit 5c3484
Packit 5c3484
.grt8:	ldf8		f44 = [rp], 8
Packit 5c3484
	getf.sig	r30 = f42			C hi
Packit 5c3484
	ldf8		f32 = [up], 8
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	xma.l		f37 = f33, f6, f45
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	xma.hu		f41 = f33, f6, f45
Packit 5c3484
	br		.LL00
Packit 5c3484
Packit 5c3484
	ALIGN(32)
Packit 5c3484
.Loop:
Packit 5c3484
{.mmi
Packit 5c3484
	ldf8		f44 = [rp], 8
Packit 5c3484
	cmp.ltu		p6, p0 = r27, r8	C lo cmp
Packit 5c3484
	sub		r14 = r27, r8		C lo sub
Packit 5c3484
}
Packit 5c3484
{.mmi
Packit 5c3484
	getf.sig	r30 = f42			C hi
Packit 5c3484
	ldf8		f32 = [up], 8
Packit 5c3484
	sub		r8 = r20, r31		C hi sub
Packit 5c3484
	;;				C 01
Packit 5c3484
}
Packit 5c3484
{.mmf
Packit 5c3484
	getf.sig	r27 = f39			C lo
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
	xma.l		f37 = f33, f6, f45
Packit 5c3484
}
Packit 5c3484
{.mfi
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	xma.hu		f41 = f33, f6, f45
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
	;;				C 02
Packit 5c3484
}
Packit 5c3484
{.mmi
Packit 5c3484
.LL00:	ldf8		f45 = [rp], 8
Packit 5c3484
	cmp.ltu		p6, p0 = r24, r8
Packit 5c3484
	sub		r14 = r24, r8
Packit 5c3484
}
Packit 5c3484
{.mmi
Packit 5c3484
	getf.sig	r31 = f43			C hi
Packit 5c3484
	ldf8		f33 = [up], 8
Packit 5c3484
	sub		r8 = r21, r28
Packit 5c3484
	;;				C 03
Packit 5c3484
}
Packit 5c3484
{.mmf
Packit 5c3484
	getf.sig	r24 = f36			C lo
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
	xma.l		f38 = f34, f6, f46
Packit 5c3484
}
Packit 5c3484
{.mfi
Packit 5c3484
	ld8		r21 = [r9], 8
Packit 5c3484
	xma.hu		f42 = f34, f6, f46
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
	;;				C 04
Packit 5c3484
}
Packit 5c3484
{.mmi
Packit 5c3484
.LL11:	ldf8		f46 = [rp], 8
Packit 5c3484
	cmp.ltu		p6, p0 = r25, r8
Packit 5c3484
	sub		r14 = r25, r8
Packit 5c3484
}
Packit 5c3484
{.mmi
Packit 5c3484
	getf.sig	r28 = f40			C hi
Packit 5c3484
	ldf8		f34 = [up], 8
Packit 5c3484
	sub		r8 = r22, r29
Packit 5c3484
	;;				C 05
Packit 5c3484
}
Packit 5c3484
{.mmf
Packit 5c3484
	getf.sig	r25 = f37			C lo
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
	xma.l		f39 = f35, f6, f47
Packit 5c3484
}
Packit 5c3484
{.mfi
Packit 5c3484
	ld8		r22 = [r9], 8
Packit 5c3484
	xma.hu		f43 = f35, f6, f47
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
	;;				C 06
Packit 5c3484
}
Packit 5c3484
{.mmi
Packit 5c3484
.LL10:	ldf8		f47 = [rp], 8
Packit 5c3484
	cmp.ltu		p6, p0 = r26, r8
Packit 5c3484
	sub		r14 = r26, r8
Packit 5c3484
}
Packit 5c3484
{.mmi
Packit 5c3484
	getf.sig	r29 = f41			C hi
Packit 5c3484
	ldf8		f35 = [up], 8
Packit 5c3484
	sub		r8 = r23, r30
Packit 5c3484
	;;				C 07
Packit 5c3484
}
Packit 5c3484
{.mmf
Packit 5c3484
	getf.sig	r26 = f38			C lo
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
	xma.l		f36 = f32, f6, f44
Packit 5c3484
}
Packit 5c3484
{.mfi
Packit 5c3484
	ld8		r23 = [r9], 8
Packit 5c3484
	xma.hu		f40 = f32, f6, f44
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
}
Packit 5c3484
	br.cloop.dptk	.Loop
Packit 5c3484
	;;
Packit 5c3484
Packit 5c3484
.Lend:
Packit 5c3484
	cmp.ltu		p6, p0 = r27, r8
Packit 5c3484
	sub		r14 = r27, r8
Packit 5c3484
	getf.sig	r30 = f42
Packit 5c3484
	sub		r8 = r20, r31
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r27 = f39
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
	xma.l		f37 = f33, f6, f45
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
	xma.hu		f41 = f33, f6, f45
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
	;;
Packit 5c3484
.Lcj8:
Packit 5c3484
	cmp.ltu		p6, p0 = r24, r8
Packit 5c3484
	sub		r14 = r24, r8
Packit 5c3484
	getf.sig	r31 = f43
Packit 5c3484
	sub		r8 = r21, r28
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r24 = f36
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
	xma.l		f38 = f34, f6, f46
Packit 5c3484
	ld8		r21 = [r9], 8
Packit 5c3484
	xma.hu		f42 = f34, f6, f46
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
	;;
Packit 5c3484
.Lcj7:
Packit 5c3484
	cmp.ltu		p6, p0 = r25, r8
Packit 5c3484
	sub		r14 = r25, r8
Packit 5c3484
	getf.sig	r28 = f40
Packit 5c3484
	sub		r8 = r22, r29
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r25 = f37
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
	xma.l		f39 = f35, f6, f47
Packit 5c3484
	ld8		r22 = [r9], 8
Packit 5c3484
	xma.hu		f43 = f35, f6, f47
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
	;;
Packit 5c3484
.Lcj6:
Packit 5c3484
	cmp.ltu		p6, p0 = r26, r8
Packit 5c3484
	sub		r14 = r26, r8
Packit 5c3484
	getf.sig	r29 = f41
Packit 5c3484
	sub		r8 = r23, r30
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r26 = f38
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
	ld8		r23 = [r9], 8
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
	;;
Packit 5c3484
.Lcj5:
Packit 5c3484
	cmp.ltu		p6, p0 = r27, r8
Packit 5c3484
	sub		r14 = r27, r8
Packit 5c3484
	getf.sig	r30 = f42
Packit 5c3484
	sub		r8 = r20, r31
Packit 5c3484
	;;
Packit 5c3484
	getf.sig	r27 = f39
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
	ld8		r20 = [r9], 8
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
	;;
Packit 5c3484
.Lcj4:
Packit 5c3484
	cmp.ltu		p6, p0 = r24, r8
Packit 5c3484
	sub		r14 = r24, r8
Packit 5c3484
	getf.sig	r31 = f43
Packit 5c3484
	sub		r8 = r21, r28
Packit 5c3484
	;;
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
	;;
Packit 5c3484
.Lcj3:
Packit 5c3484
	cmp.ltu		p6, p0 = r25, r8
Packit 5c3484
	sub		r14 = r25, r8
Packit 5c3484
	sub		r8 = r22, r29
Packit 5c3484
	;;
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
	;;
Packit 5c3484
.Lcj2:
Packit 5c3484
	cmp.ltu		p6, p0 = r26, r8
Packit 5c3484
	sub		r14 = r26, r8
Packit 5c3484
	sub		r8 = r23, r30
Packit 5c3484
	;;
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
	;;
Packit 5c3484
.Lcj1:
Packit 5c3484
	cmp.ltu		p6, p0 = r27, r8
Packit 5c3484
	sub		r14 = r27, r8
Packit 5c3484
	sub		r8 = r20, r31
Packit 5c3484
	;;
Packit 5c3484
	st8		[r10] = r14, 8
Packit 5c3484
	mov		ar.lc = r2
Packit 5c3484
   (p6)	add		r8 = 1, r8
Packit 5c3484
	br.ret.sptk.many b0
Packit 5c3484
.Ldone:	mov		ar.lc = r2
Packit 5c3484
	br.ret.sptk.many b0
Packit 5c3484
EPILOGUE()
Packit 5c3484
ASM_END()