Blame mpn/x86_64/div_qr_2u_pi1.asm

Packit 5c3484
dnl  x86-64 mpn_div_qr_2u_pi1
Packit 5c3484
dnl  -- Divide an mpn number by an unnormalized 2-limb number,
Packit 5c3484
dnl     using a single-limb inverse and shifting the dividend on the fly.
Packit 5c3484
Packit 5c3484
dnl  Copyright 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
dnl  This file is part of the GNU MP Library.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
dnl  it under the terms of either:
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU Lesser General Public License as published by the Free
Packit 5c3484
dnl      Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
dnl      option) any later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU General Public License as published by the Free Software
Packit 5c3484
dnl      Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
dnl      later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or both in parallel, as here.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
dnl  for more details.
Packit 5c3484
dnl
Packit 5c3484
dnl  You should have received copies of the GNU General Public License and the
Packit 5c3484
dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
dnl  see https://www.gnu.org/licenses/.
Packit 5c3484
Packit 5c3484
include(`../config.m4')
Packit 5c3484
Packit 5c3484
Packit 5c3484
C		c/l
Packit 5c3484
C INPUT PARAMETERS
Packit 5c3484
define(`qp',		`%rdi')
Packit 5c3484
define(`rp',		`%rsi')
Packit 5c3484
define(`up_param',	`%rdx')
Packit 5c3484
define(`un_param',	`%rcx') dnl %rcx needed for shift count
Packit 5c3484
define(`d1',		`%r8')
Packit 5c3484
define(`d0',		`%r9')
Packit 5c3484
define(`shift_param',	`FRAME+8(%rsp)')
Packit 5c3484
define(`di_param',	`FRAME+16(%rsp)')
Packit 5c3484
Packit 5c3484
define(`di',		`%r10')
Packit 5c3484
define(`up',		`%r11')
Packit 5c3484
define(`un',		`%rbp')
Packit 5c3484
define(`u2',		`%rbx')
Packit 5c3484
define(`u1',		`%r12')
Packit 5c3484
define(`u0',		`%rsi') dnl Same as rp, which is saved and restored.
Packit 5c3484
define(`t1',		`%r13')
Packit 5c3484
define(`t0',		`%r14')
Packit 5c3484
define(`md1',		`%r15')
Packit 5c3484
Packit 5c3484
ASM_START()
Packit 5c3484
	TEXT
Packit 5c3484
	ALIGN(16)
Packit 5c3484
deflit(`FRAME', 0)
Packit 5c3484
PROLOGUE(mpn_div_qr_2u_pi1)
Packit 5c3484
	mov	di_param, di
Packit 5c3484
	mov	up_param, up
Packit 5c3484
	push	%r15
Packit 5c3484
	push	%r14
Packit 5c3484
	push	%r13
Packit 5c3484
	push	%r12
Packit 5c3484
	push	%rbx
Packit 5c3484
	push	%rbp
Packit 5c3484
	push	rp
Packit 5c3484
deflit(`FRAME', 56)
Packit 5c3484
	lea	-2(un_param), un
Packit 5c3484
	mov	d1, md1
Packit 5c3484
	neg	md1
Packit 5c3484
Packit 5c3484
	C int parameter, 32 bits only
Packit 5c3484
	movl	shift_param, R32(%rcx)
Packit 5c3484
Packit 5c3484
	C FIXME: Different code for SHLD_SLOW
Packit 5c3484
Packit 5c3484
	xor	R32(u2), R32(u2)
Packit 5c3484
	mov	8(up, un, 8), u1
Packit 5c3484
	shld	%cl, u1, u2
Packit 5c3484
	C Remains to read (up, un, 8) and shift u1, u0
Packit 5c3484
	C udiv_qr_3by2 (qh,u2,u1,u2,u1,n0, d1,d0,di)
Packit 5c3484
	mov	di, %rax
Packit 5c3484
	mul	u2
Packit 5c3484
	mov	(up, un, 8), u0
Packit 5c3484
	shld	%cl, u0, u1
Packit 5c3484
	mov	u1, t0
Packit 5c3484
	add	%rax, t0	C q0 in t0
Packit 5c3484
	adc	u2, %rdx
Packit 5c3484
	mov	%rdx, t1	C q in t1
Packit 5c3484
	imul	md1, %rdx
Packit 5c3484
	mov	d0, %rax
Packit 5c3484
	lea	(%rdx, u1), u2
Packit 5c3484
	mul	t1
Packit 5c3484
	mov	u0, u1
Packit 5c3484
	shl	%cl, u1
Packit 5c3484
	sub	d0, u1
Packit 5c3484
	sbb	d1, u2
Packit 5c3484
	sub	%rax, u1
Packit 5c3484
	sbb	%rdx, u2
Packit 5c3484
	xor	R32(%rax), R32(%rax)
Packit 5c3484
	xor	R32(%rdx), R32(%rdx)
Packit 5c3484
	cmp	t0, u2
Packit 5c3484
	cmovnc	d0, %rax
Packit 5c3484
	cmovnc	d1, %rdx
Packit 5c3484
	adc	$0, t1
Packit 5c3484
	nop
Packit 5c3484
	add	%rax, u1
Packit 5c3484
	adc	%rdx, u2
Packit 5c3484
	cmp	d1, u2
Packit 5c3484
	jae	L(fix_qh)
Packit 5c3484
L(bck_qh):
Packit 5c3484
	push	t1	C push qh on stack
Packit 5c3484
Packit 5c3484
	jmp	L(next)
Packit 5c3484
Packit 5c3484
	ALIGN(16)
Packit 5c3484
L(loop):
Packit 5c3484
	C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
Packit 5c3484
	C Based on the optimized divrem_2.asm code.
Packit 5c3484
Packit 5c3484
	mov	di, %rax
Packit 5c3484
	mul	u2
Packit 5c3484
	mov	(up, un, 8), u0
Packit 5c3484
	xor	R32(t1), R32(t1)
Packit 5c3484
	shld	%cl, u0, t1
Packit 5c3484
	or	t1, u1
Packit 5c3484
	mov	u1, t0
Packit 5c3484
	add	%rax, t0	C q0 in t0
Packit 5c3484
	adc	u2, %rdx
Packit 5c3484
	mov	%rdx, t1	C q in t1
Packit 5c3484
	imul	md1, %rdx
Packit 5c3484
	mov	d0, %rax
Packit 5c3484
	lea	(%rdx, u1), u2
Packit 5c3484
	mul	t1
Packit 5c3484
	mov	u0, u1
Packit 5c3484
	shl	%cl, u1
Packit 5c3484
	sub	d0, u1
Packit 5c3484
	sbb	d1, u2
Packit 5c3484
	sub	%rax, u1
Packit 5c3484
	sbb	%rdx, u2
Packit 5c3484
	xor	R32(%rax), R32(%rax)
Packit 5c3484
	xor	R32(%rdx), R32(%rdx)
Packit 5c3484
	cmp	t0, u2
Packit 5c3484
	cmovnc	d0, %rax
Packit 5c3484
	cmovnc	d1, %rdx
Packit 5c3484
	adc	$0, t1
Packit 5c3484
	nop
Packit 5c3484
	add	%rax, u1
Packit 5c3484
	adc	%rdx, u2
Packit 5c3484
	cmp	d1, u2
Packit 5c3484
	jae	L(fix)
Packit 5c3484
L(bck):
Packit 5c3484
	mov	t1, (qp, un, 8)
Packit 5c3484
L(next):
Packit 5c3484
	sub	$1, un
Packit 5c3484
	jnc	L(loop)
Packit 5c3484
L(end):
Packit 5c3484
	C qh on stack
Packit 5c3484
	pop	%rax
Packit 5c3484
	pop	rp
Packit 5c3484
	shrd	%cl, u2, u1
Packit 5c3484
	shr	%cl, u2
Packit 5c3484
	mov	u2, 8(rp)
Packit 5c3484
	mov	u1, (rp)
Packit 5c3484
Packit 5c3484
	pop	%rbp
Packit 5c3484
	pop	%rbx
Packit 5c3484
	pop	%r12
Packit 5c3484
	pop	%r13
Packit 5c3484
	pop	%r14
Packit 5c3484
	pop	%r15
Packit 5c3484
	ret
Packit 5c3484
Packit 5c3484
L(fix):	C Unlikely update. u2 >= d1
Packit 5c3484
	seta	%dl
Packit 5c3484
	cmp	d0, u1
Packit 5c3484
	setae	%al
Packit 5c3484
	orb	%dl, %al		C "orb" form to placate Sun tools
Packit 5c3484
	je	L(bck)
Packit 5c3484
	inc	t1
Packit 5c3484
	sub	d0, u1
Packit 5c3484
	sbb	d1, u2
Packit 5c3484
	jmp	L(bck)
Packit 5c3484
Packit 5c3484
C Duplicated, just jumping back to a different address.
Packit 5c3484
L(fix_qh):	C Unlikely update. u2 >= d1
Packit 5c3484
	seta	%dl
Packit 5c3484
	cmp	d0, u1
Packit 5c3484
	setae	%al
Packit 5c3484
	orb	%dl, %al		C "orb" form to placate Sun tools
Packit 5c3484
	je	L(bck_qh)
Packit 5c3484
	inc	t1
Packit 5c3484
	sub	d0, u1
Packit 5c3484
	sbb	d1, u2
Packit 5c3484
	jmp	L(bck_qh)
Packit 5c3484
EPILOGUE()
rpm-build 01f633
CF_PROT