Blame mpn/x86_64/divrem_1.asm

Packit 5c3484
dnl  x86-64 mpn_divrem_1 -- mpn by limb division.
Packit 5c3484
Packit 5c3484
dnl  Copyright 2004, 2005, 2007-2012, 2014 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
dnl  This file is part of the GNU MP Library.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
dnl  it under the terms of either:
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU Lesser General Public License as published by the Free
Packit 5c3484
dnl      Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
dnl      option) any later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU General Public License as published by the Free Software
Packit 5c3484
dnl      Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
dnl      later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or both in parallel, as here.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
dnl  for more details.
Packit 5c3484
dnl
Packit 5c3484
dnl  You should have received copies of the GNU General Public License and the
Packit 5c3484
dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
dnl  see https://www.gnu.org/licenses/.
Packit 5c3484
Packit 5c3484
include(`../config.m4')
Packit 5c3484
Packit 5c3484
Packit 5c3484
C		norm	unorm	frac
Packit 5c3484
C AMD K8,K9	13	13	12
Packit 5c3484
C AMD K10	13	13	12
Packit 5c3484
C Intel P4	43	44	43
Packit 5c3484
C Intel core2	24.5	24.5	19.5
Packit 5c3484
C Intel corei	20.5	19.5	18
Packit 5c3484
C Intel atom	43	46	36
Packit 5c3484
C VIA nano	25.5	25.5	24
Packit 5c3484
Packit 5c3484
C mp_limb_t
Packit 5c3484
C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
Packit 5c3484
C               mp_srcptr np, mp_size_t nn, mp_limb_t d)
Packit 5c3484
Packit 5c3484
C mp_limb_t
Packit 5c3484
C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
Packit 5c3484
C                      mp_srcptr np, mp_size_t nn, mp_limb_t d,
Packit 5c3484
C                      mp_limb_t dinv, int cnt)
Packit 5c3484
Packit 5c3484
C INPUT PARAMETERS
Packit 5c3484
define(`qp',		`%rdi')
Packit 5c3484
define(`fn_param',	`%rsi')
Packit 5c3484
define(`up_param',	`%rdx')
Packit 5c3484
define(`un_param',	`%rcx')
Packit 5c3484
define(`d',		`%r8')
Packit 5c3484
define(`dinv',		`%r9')		C only for mpn_preinv_divrem_1
Packit 5c3484
C       shift passed on stack		C only for mpn_preinv_divrem_1
Packit 5c3484
Packit 5c3484
define(`cnt',		`%rcx')
Packit 5c3484
define(`up',		`%rsi')
Packit 5c3484
define(`fn',		`%r12')
Packit 5c3484
define(`un',		`%rbx')
Packit 5c3484
Packit 5c3484
Packit 5c3484
C rax rbx rcx rdx rsi rdi rbp r8  r9  r10 r11 r12 r13 r14 r15
Packit 5c3484
C         cnt         qp      d  dinv
Packit 5c3484
Packit 5c3484
ABI_SUPPORT(DOS64)
Packit 5c3484
ABI_SUPPORT(STD64)
Packit 5c3484
Packit 5c3484
IFSTD(`define(`CNTOFF',		`40($1)')')
Packit 5c3484
IFDOS(`define(`CNTOFF',		`104($1)')')
Packit 5c3484
Packit 5c3484
ASM_START()
Packit 5c3484
	TEXT
Packit 5c3484
	ALIGN(16)
Packit 5c3484
PROLOGUE(mpn_preinv_divrem_1)
Packit 5c3484
	FUNC_ENTRY(4)
Packit 5c3484
IFDOS(`	mov	56(%rsp), %r8	')
Packit 5c3484
IFDOS(`	mov	64(%rsp), %r9	')
Packit 5c3484
	xor	R32(%rax), R32(%rax)
Packit 5c3484
	push	%r13
Packit 5c3484
	push	%r12
Packit 5c3484
	push	%rbp
Packit 5c3484
	push	%rbx
Packit 5c3484
Packit 5c3484
	mov	fn_param, fn
Packit 5c3484
	mov	un_param, un
Packit 5c3484
	add	fn_param, un_param
Packit 5c3484
	mov	up_param, up
Packit 5c3484
Packit 5c3484
	lea	-8(qp,un_param,8), qp
Packit 5c3484
Packit 5c3484
	test	d, d
Packit 5c3484
	js	L(nent)
Packit 5c3484
Packit 5c3484
	mov	CNTOFF(%rsp), R8(cnt)
Packit 5c3484
	shl	R8(cnt), d
Packit 5c3484
	jmp	L(uent)
Packit 5c3484
EPILOGUE()
Packit 5c3484
Packit 5c3484
	ALIGN(16)
Packit 5c3484
PROLOGUE(mpn_divrem_1)
Packit 5c3484
	FUNC_ENTRY(4)
Packit 5c3484
IFDOS(`	mov	56(%rsp), %r8	')
Packit 5c3484
	xor	R32(%rax), R32(%rax)
Packit 5c3484
	push	%r13
Packit 5c3484
	push	%r12
Packit 5c3484
	push	%rbp
Packit 5c3484
	push	%rbx
Packit 5c3484
Packit 5c3484
	mov	fn_param, fn
Packit 5c3484
	mov	un_param, un
Packit 5c3484
	add	fn_param, un_param
Packit 5c3484
	mov	up_param, up
Packit 5c3484
	je	L(ret)
Packit 5c3484
Packit 5c3484
	lea	-8(qp,un_param,8), qp
Packit 5c3484
	xor	R32(%rbp), R32(%rbp)
Packit 5c3484
Packit 5c3484
	test	d, d
Packit 5c3484
	jns	L(unnormalized)
Packit 5c3484
Packit 5c3484
L(normalized):
Packit 5c3484
	test	un, un
Packit 5c3484
	je	L(8)			C un == 0
Packit 5c3484
	mov	-8(up,un,8), %rbp
Packit 5c3484
	dec	un
Packit 5c3484
	mov	%rbp, %rax
Packit 5c3484
	sub	d, %rbp
Packit 5c3484
	cmovc	%rax, %rbp
Packit 5c3484
	sbb	R32(%rax), R32(%rax)
Packit 5c3484
	inc	R32(%rax)
Packit 5c3484
	mov	%rax, (qp)
Packit 5c3484
	lea	-8(qp), qp
Packit 5c3484
L(8):
Packit 5c3484
IFSTD(`	push	%rdi		')
Packit 5c3484
IFSTD(`	push	%rsi		')
Packit 5c3484
	push	%r8
Packit 5c3484
IFSTD(`	mov	d, %rdi		')
Packit 5c3484
IFDOS(`	sub	$32, %rsp	')
Packit 5c3484
IFDOS(`	mov	d, %rcx		')
Packit 5c3484
	ASSERT(nz, `test $15, %rsp')
Packit 5c3484
	CALL(	mpn_invert_limb)
Packit 5c3484
IFDOS(`	add	$32, %rsp	')
Packit 5c3484
	pop	%r8
Packit 5c3484
IFSTD(`	pop	%rsi		')
Packit 5c3484
IFSTD(`	pop	%rdi		')
Packit 5c3484
Packit 5c3484
	mov	%rax, dinv
Packit 5c3484
	mov	%rbp, %rax
Packit 5c3484
	jmp	L(nent)
Packit 5c3484
Packit 5c3484
	ALIGN(16)
Packit 5c3484
L(ntop):mov	(up,un,8), %r10		C	    K8-K10  P6-CNR P6-NHM  P4
Packit 5c3484
	mul	dinv			C	      0,13   0,20   0,18   0,45
Packit 5c3484
	add	%r10, %rax		C	      4      8      3     12
Packit 5c3484
	adc	%rbp, %rdx		C	      5      9     10     13
Packit 5c3484
	mov	%rax, %rbp		C	      5      9      4     13
Packit 5c3484
	mov	%rdx, %r13		C	      6     11     12     23
Packit 5c3484
	imul	d, %rdx			C	      6     11     11     23
Packit 5c3484
	sub	%rdx, %r10		C	     10     16     14     33
Packit 5c3484
	mov	d, %rax			C
Packit 5c3484
	add	%r10, %rax		C	     11     17     15     34
Packit 5c3484
	cmp	%rbp, %r10		C	     11     17     15     34
Packit 5c3484
	cmovc	%r10, %rax		C	     12     18     16     35
Packit 5c3484
	adc	$-1, %r13		C
Packit 5c3484
	cmp	d, %rax			C
Packit 5c3484
	jae	L(nfx)			C
Packit 5c3484
L(nok):	mov	%r13, (qp)		C
Packit 5c3484
	sub	$8, qp			C
Packit 5c3484
L(nent):lea	1(%rax), %rbp		C
Packit 5c3484
	dec	un			C
Packit 5c3484
	jns	L(ntop)			C
Packit 5c3484
Packit 5c3484
	xor	R32(%rcx), R32(%rcx)
Packit 5c3484
	jmp	L(frac)
Packit 5c3484
Packit 5c3484
L(nfx):	sub	d, %rax
Packit 5c3484
	inc	%r13
Packit 5c3484
	jmp	L(nok)
Packit 5c3484
Packit 5c3484
L(unnormalized):
Packit 5c3484
	test	un, un
Packit 5c3484
	je	L(44)
Packit 5c3484
	mov	-8(up,un,8), %rax
Packit 5c3484
	cmp	d, %rax
Packit 5c3484
	jae	L(44)
Packit 5c3484
	mov	%rbp, (qp)
Packit 5c3484
	mov	%rax, %rbp
Packit 5c3484
	lea	-8(qp), qp
Packit 5c3484
	je	L(ret)
Packit 5c3484
	dec	un
Packit 5c3484
L(44):
Packit 5c3484
	bsr	d, %rcx
Packit 5c3484
	not	R32(%rcx)
Packit 5c3484
	shl	R8(%rcx), d
Packit 5c3484
	shl	R8(%rcx), %rbp
Packit 5c3484
Packit 5c3484
	push	%rcx
Packit 5c3484
IFSTD(`	push	%rdi		')
Packit 5c3484
IFSTD(`	push	%rsi		')
Packit 5c3484
	push	%r8
Packit 5c3484
IFSTD(`	sub	$8, %rsp	')
Packit 5c3484
IFSTD(`	mov	d, %rdi		')
Packit 5c3484
IFDOS(`	sub	$40, %rsp	')
Packit 5c3484
IFDOS(`	mov	d, %rcx		')
Packit 5c3484
	ASSERT(nz, `test $15, %rsp')
Packit 5c3484
	CALL(	mpn_invert_limb)
Packit 5c3484
IFSTD(`	add	$8, %rsp	')
Packit 5c3484
IFDOS(`	add	$40, %rsp	')
Packit 5c3484
	pop	%r8
Packit 5c3484
IFSTD(`	pop	%rsi		')
Packit 5c3484
IFSTD(`	pop	%rdi		')
Packit 5c3484
	pop	%rcx
Packit 5c3484
Packit 5c3484
	mov	%rax, dinv
Packit 5c3484
	mov	%rbp, %rax
Packit 5c3484
	test	un, un
Packit 5c3484
	je	L(frac)
Packit 5c3484
Packit 5c3484
L(uent):dec	un
Packit 5c3484
	mov	(up,un,8), %rbp
Packit 5c3484
	neg	R32(%rcx)
Packit 5c3484
	shr	R8(%rcx), %rbp
Packit 5c3484
	neg	R32(%rcx)
Packit 5c3484
	or	%rbp, %rax
Packit 5c3484
	jmp	L(ent)
Packit 5c3484
Packit 5c3484
	ALIGN(16)
Packit 5c3484
L(utop):mov	(up,un,8), %r10
Packit 5c3484
	shl	R8(%rcx), %rbp
Packit 5c3484
	neg	R32(%rcx)
Packit 5c3484
	shr	R8(%rcx), %r10
Packit 5c3484
	neg	R32(%rcx)
Packit 5c3484
	or	%r10, %rbp
Packit 5c3484
	mul	dinv
Packit 5c3484
	add	%rbp, %rax
Packit 5c3484
	adc	%r11, %rdx
Packit 5c3484
	mov	%rax, %r11
Packit 5c3484
	mov	%rdx, %r13
Packit 5c3484
	imul	d, %rdx
Packit 5c3484
	sub	%rdx, %rbp
Packit 5c3484
	mov	d, %rax
Packit 5c3484
	add	%rbp, %rax
Packit 5c3484
	cmp	%r11, %rbp
Packit 5c3484
	cmovc	%rbp, %rax
Packit 5c3484
	adc	$-1, %r13
Packit 5c3484
	cmp	d, %rax
Packit 5c3484
	jae	L(ufx)
Packit 5c3484
L(uok):	mov	%r13, (qp)
Packit 5c3484
	sub	$8, qp
Packit 5c3484
L(ent):	mov	(up,un,8), %rbp
Packit 5c3484
	dec	un
Packit 5c3484
	lea	1(%rax), %r11
Packit 5c3484
	jns	L(utop)
Packit 5c3484
Packit 5c3484
L(uend):shl	R8(%rcx), %rbp
Packit 5c3484
	mul	dinv
Packit 5c3484
	add	%rbp, %rax
Packit 5c3484
	adc	%r11, %rdx
Packit 5c3484
	mov	%rax, %r11
Packit 5c3484
	mov	%rdx, %r13
Packit 5c3484
	imul	d, %rdx
Packit 5c3484
	sub	%rdx, %rbp
Packit 5c3484
	mov	d, %rax
Packit 5c3484
	add	%rbp, %rax
Packit 5c3484
	cmp	%r11, %rbp
Packit 5c3484
	cmovc	%rbp, %rax
Packit 5c3484
	adc	$-1, %r13
Packit 5c3484
	cmp	d, %rax
Packit 5c3484
	jae	L(efx)
Packit 5c3484
L(eok):	mov	%r13, (qp)
Packit 5c3484
	sub	$8, qp
Packit 5c3484
	jmp	L(frac)
Packit 5c3484
Packit 5c3484
L(ufx):	sub	d, %rax
Packit 5c3484
	inc	%r13
Packit 5c3484
	jmp	L(uok)
Packit 5c3484
L(efx):	sub	d, %rax
Packit 5c3484
	inc	%r13
Packit 5c3484
	jmp	L(eok)
Packit 5c3484
Packit 5c3484
L(frac):mov	d, %rbp
Packit 5c3484
	neg	%rbp
Packit 5c3484
	jmp	L(fent)
Packit 5c3484
Packit 5c3484
	ALIGN(16)			C	    K8-K10  P6-CNR P6-NHM  P4
Packit 5c3484
L(ftop):mul	dinv			C	      0,12   0,17   0,17
Packit 5c3484
	add	%r11, %rdx		C	      5      8     10
Packit 5c3484
	mov	%rax, %r11		C	      4      8      3
Packit 5c3484
	mov	%rdx, %r13		C	      6      9     11
Packit 5c3484
	imul	%rbp, %rdx		C	      6      9     11
Packit 5c3484
	mov	d, %rax			C
Packit 5c3484
	add	%rdx, %rax		C	     10     14     14
Packit 5c3484
	cmp	%r11, %rdx		C	     10     14     14
Packit 5c3484
	cmovc	%rdx, %rax		C	     11     15     15
Packit 5c3484
	adc	$-1, %r13		C
Packit 5c3484
	mov	%r13, (qp)		C
Packit 5c3484
	sub	$8, qp			C
Packit 5c3484
L(fent):lea	1(%rax), %r11		C
Packit 5c3484
	dec	fn			C
Packit 5c3484
	jns	L(ftop)			C
Packit 5c3484
Packit 5c3484
	shr	R8(%rcx), %rax
Packit 5c3484
L(ret):	pop	%rbx
Packit 5c3484
	pop	%rbp
Packit 5c3484
	pop	%r12
Packit 5c3484
	pop	%r13
Packit 5c3484
	FUNC_EXIT()
Packit 5c3484
	ret
Packit 5c3484
EPILOGUE()
rpm-build c3cd4f
CF_PROT