Blame mpn/x86_64/div_qr_2n_pi1.asm

Packit 5c3484
dnl  x86-64 mpn_div_qr_2n_pi1
Packit 5c3484
dnl  -- Divide an mpn number by a normalized 2-limb number,
Packit 5c3484
dnl     using a single-limb inverse.
Packit 5c3484
Packit 5c3484
dnl  Copyright 2007, 2008, 2010-2012 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
dnl  This file is part of the GNU MP Library.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
dnl  it under the terms of either:
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU Lesser General Public License as published by the Free
Packit 5c3484
dnl      Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
dnl      option) any later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU General Public License as published by the Free Software
Packit 5c3484
dnl      Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
dnl      later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or both in parallel, as here.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
dnl  for more details.
Packit 5c3484
dnl
Packit 5c3484
dnl  You should have received copies of the GNU General Public License and the
Packit 5c3484
dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
dnl  see https://www.gnu.org/licenses/.
Packit 5c3484
Packit 5c3484
include(`../config.m4')
Packit 5c3484
Packit 5c3484
Packit 5c3484
C		c/l
Packit 5c3484
C INPUT PARAMETERS
Packit 5c3484
define(`qp',		`%rdi')
Packit 5c3484
define(`rp',		`%rsi')
Packit 5c3484
define(`up_param',	`%rdx')
Packit 5c3484
define(`un',		`%rcx')
Packit 5c3484
define(`d1',		`%r8')
Packit 5c3484
define(`d0',		`%r9')
Packit 5c3484
define(`di_param',	`8(%rsp)')
Packit 5c3484
Packit 5c3484
define(`di',		`%r10')
Packit 5c3484
define(`up',		`%r11')
Packit 5c3484
define(`u2',		`%rbx')
Packit 5c3484
define(`u1',		`%r12')
Packit 5c3484
define(`t1',		`%r13')
Packit 5c3484
define(`t0',		`%r14')
Packit 5c3484
define(`md1',		`%r15')
Packit 5c3484
Packit 5c3484
C TODO
Packit 5c3484
C * Store qh in the same stack slot as di_param, instead of pushing
Packit 5c3484
C   it. (we could put it in register %rbp, but then we would need to
Packit 5c3484
C   save and restore that instead, which doesn't seem like a win).
Packit 5c3484
Packit 5c3484
ABI_SUPPORT(DOS64)
Packit 5c3484
ABI_SUPPORT(STD64)
Packit 5c3484
Packit 5c3484
ASM_START()
Packit 5c3484
	TEXT
Packit 5c3484
	ALIGN(16)
Packit 5c3484
PROLOGUE(mpn_div_qr_2n_pi1)
Packit 5c3484
	FUNC_ENTRY(4)
Packit 5c3484
IFDOS(`	mov	56(%rsp), %r8	')
Packit 5c3484
IFDOS(`	mov	64(%rsp), %r9	')
Packit 5c3484
IFDOS(`define(`di_param', `72(%rsp)')')
Packit 5c3484
	mov	di_param, di
Packit 5c3484
	mov	up_param, up
Packit 5c3484
	push	%r15
Packit 5c3484
	push	%r14
Packit 5c3484
	push	%r13
Packit 5c3484
	push	%r12
Packit 5c3484
	push	%rbx
Packit 5c3484
Packit 5c3484
	mov	-16(up, un, 8), u1
Packit 5c3484
	mov	-8(up, un, 8), u2
Packit 5c3484
Packit 5c3484
	mov	u1, t0
Packit 5c3484
	mov	u2, t1
Packit 5c3484
	sub	d0, t0
Packit 5c3484
	sbb	d1, t1
Packit 5c3484
	cmovnc  t0, u1
Packit 5c3484
	cmovnc	t1, u2
Packit 5c3484
	C push qh which is !carry
Packit 5c3484
	sbb	%rax, %rax
Packit 5c3484
	inc	%rax
Packit 5c3484
	push	%rax
Packit 5c3484
	lea	-2(un), un
Packit 5c3484
	mov	d1, md1
Packit 5c3484
	neg	md1
Packit 5c3484
Packit 5c3484
	jmp	L(next)
Packit 5c3484
Packit 5c3484
	ALIGN(16)
Packit 5c3484
L(loop):
Packit 5c3484
	C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
Packit 5c3484
	C Based on the optimized divrem_2.asm code.
Packit 5c3484
Packit 5c3484
	mov	di, %rax
Packit 5c3484
	mul	u2
Packit 5c3484
	mov	u1, t0
Packit 5c3484
	add	%rax, t0	C q0 in t0
Packit 5c3484
	adc	u2, %rdx
Packit 5c3484
	mov	%rdx, t1	C q in t1
Packit 5c3484
	imul	md1, %rdx
Packit 5c3484
	mov	d0, %rax
Packit 5c3484
	lea	(%rdx, u1), u2
Packit 5c3484
	mul	t1
Packit 5c3484
	mov	(up, un, 8), u1
Packit 5c3484
	sub	d0, u1
Packit 5c3484
	sbb	d1, u2
Packit 5c3484
	sub	%rax, u1
Packit 5c3484
	sbb	%rdx, u2
Packit 5c3484
	xor	R32(%rax), R32(%rax)
Packit 5c3484
	xor	R32(%rdx), R32(%rdx)
Packit 5c3484
	cmp	t0, u2
Packit 5c3484
	cmovnc	d0, %rax
Packit 5c3484
	cmovnc	d1, %rdx
Packit 5c3484
	adc	$0, t1
Packit 5c3484
	nop
Packit 5c3484
	add	%rax, u1
Packit 5c3484
	adc	%rdx, u2
Packit 5c3484
	cmp	d1, u2
Packit 5c3484
	jae	L(fix)
Packit 5c3484
L(bck):
Packit 5c3484
	mov	t1, (qp, un, 8)
Packit 5c3484
L(next):
Packit 5c3484
	sub	$1, un
Packit 5c3484
	jnc	L(loop)
Packit 5c3484
L(end):
Packit 5c3484
	mov	u2, 8(rp)
Packit 5c3484
	mov	u1, (rp)
Packit 5c3484
Packit 5c3484
	C qh on stack
Packit 5c3484
	pop	%rax
Packit 5c3484
Packit 5c3484
	pop	%rbx
Packit 5c3484
	pop	%r12
Packit 5c3484
	pop	%r13
Packit 5c3484
	pop	%r14
Packit 5c3484
	pop	%r15
Packit 5c3484
	FUNC_EXIT()
Packit 5c3484
	ret
Packit 5c3484
Packit 5c3484
L(fix):	C Unlikely update. u2 >= d1
Packit 5c3484
	seta	%dl
Packit 5c3484
	cmp	d0, u1
Packit 5c3484
	setae	%al
Packit 5c3484
	orb	%dl, %al		C "orb" form to placate Sun tools
Packit 5c3484
	je	L(bck)
Packit 5c3484
	inc	t1
Packit 5c3484
	sub	d0, u1
Packit 5c3484
	sbb	d1, u2
Packit 5c3484
	jmp	L(bck)
Packit 5c3484
EPILOGUE()