Blame mpn/x86/aors_n.asm

Packit 5c3484
dnl  x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
Packit 5c3484
Packit 5c3484
dnl  Copyright 1992, 1994-1996, 1999-2002 Free Software Foundation, Inc.
Packit 5c3484
Packit 5c3484
dnl  This file is part of the GNU MP Library.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is free software; you can redistribute it and/or modify
Packit 5c3484
dnl  it under the terms of either:
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU Lesser General Public License as published by the Free
Packit 5c3484
dnl      Software Foundation; either version 3 of the License, or (at your
Packit 5c3484
dnl      option) any later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or
Packit 5c3484
dnl
Packit 5c3484
dnl    * the GNU General Public License as published by the Free Software
Packit 5c3484
dnl      Foundation; either version 2 of the License, or (at your option) any
Packit 5c3484
dnl      later version.
Packit 5c3484
dnl
Packit 5c3484
dnl  or both in parallel, as here.
Packit 5c3484
dnl
Packit 5c3484
dnl  The GNU MP Library is distributed in the hope that it will be useful, but
Packit 5c3484
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 5c3484
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
Packit 5c3484
dnl  for more details.
Packit 5c3484
dnl
Packit 5c3484
dnl  You should have received copies of the GNU General Public License and the
Packit 5c3484
dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
Packit 5c3484
dnl  see https://www.gnu.org/licenses/.
Packit 5c3484
Packit 5c3484
include(`../config.m4')
Packit 5c3484
Packit 5c3484
Packit 5c3484
C     cycles/limb
Packit 5c3484
C P5	3.375
Packit 5c3484
C P6	3.125
Packit 5c3484
C K6	3.5
Packit 5c3484
C K7	2.25
Packit 5c3484
C P4	8.75
Packit 5c3484
Packit 5c3484
Packit 5c3484
ifdef(`OPERATION_add_n',`
Packit 5c3484
	define(M4_inst,        adcl)
Packit 5c3484
	define(M4_function_n,  mpn_add_n)
Packit 5c3484
	define(M4_function_nc, mpn_add_nc)
Packit 5c3484
Packit 5c3484
',`ifdef(`OPERATION_sub_n',`
Packit 5c3484
	define(M4_inst,        sbbl)
Packit 5c3484
	define(M4_function_n,  mpn_sub_n)
Packit 5c3484
	define(M4_function_nc, mpn_sub_nc)
Packit 5c3484
Packit 5c3484
',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
Packit 5c3484
')')')
Packit 5c3484
Packit 5c3484
MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
Packit 5c3484
Packit 5c3484
Packit 5c3484
C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
Packit 5c3484
C                          mp_size_t size);
Packit 5c3484
C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
Packit 5c3484
C	                    mp_size_t size, mp_limb_t carry);
Packit 5c3484
Packit 5c3484
defframe(PARAM_CARRY,20)
Packit 5c3484
defframe(PARAM_SIZE, 16)
Packit 5c3484
defframe(PARAM_SRC2, 12)
Packit 5c3484
defframe(PARAM_SRC1, 8)
Packit 5c3484
defframe(PARAM_DST,  4)
Packit 5c3484
Packit 5c3484
	TEXT
Packit 5c3484
	ALIGN(8)
Packit 5c3484
Packit 5c3484
PROLOGUE(M4_function_nc)
Packit 5c3484
deflit(`FRAME',0)
Packit 5c3484
Packit 5c3484
	pushl	%edi		FRAME_pushl()
Packit 5c3484
	pushl	%esi		FRAME_pushl()
Packit 5c3484
Packit 5c3484
	movl	PARAM_DST,%edi
Packit 5c3484
	movl	PARAM_SRC1,%esi
Packit 5c3484
	movl	PARAM_SRC2,%edx
Packit 5c3484
	movl	PARAM_SIZE,%ecx
Packit 5c3484
Packit 5c3484
	movl	%ecx,%eax
Packit 5c3484
	shrl	$3,%ecx			C compute count for unrolled loop
Packit 5c3484
	negl	%eax
Packit 5c3484
	andl	$7,%eax			C get index where to start loop
Packit 5c3484
	jz	L(oopgo)		C necessary special case for 0
Packit 5c3484
	incl	%ecx			C adjust loop count
Packit 5c3484
	shll	$2,%eax			C adjustment for pointers...
Packit 5c3484
	subl	%eax,%edi		C ... since they are offset ...
Packit 5c3484
	subl	%eax,%esi		C ... by a constant when we ...
Packit 5c3484
	subl	%eax,%edx		C ... enter the loop
Packit 5c3484
	shrl	$2,%eax			C restore previous value
Packit 5c3484
Packit 5c3484
ifdef(`PIC',`
Packit 5c3484
	C Calculate start address in loop for PIC.  Due to limitations in
Packit 5c3484
	C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal
Packit 5c3484
	call	L(0a)
Packit 5c3484
L(0a):	leal	(%eax,%eax,8),%eax
Packit 5c3484
	addl	(%esp),%eax
Packit 5c3484
	addl	$L(oop)-L(0a)-3,%eax
Packit 5c3484
	addl	$4,%esp
Packit 5c3484
',`
Packit 5c3484
	C Calculate start address in loop for non-PIC.
Packit 5c3484
	leal	L(oop)-3(%eax,%eax,8),%eax
Packit 5c3484
')
Packit 5c3484
Packit 5c3484
	C These lines initialize carry from the 5th parameter.  Should be
Packit 5c3484
	C possible to simplify.
Packit 5c3484
	pushl	%ebp		FRAME_pushl()
Packit 5c3484
	movl	PARAM_CARRY,%ebp
Packit 5c3484
	shrl	%ebp			C shift bit 0 into carry
Packit 5c3484
	popl	%ebp		FRAME_popl()
Packit 5c3484
Packit 5c3484
	jmp	*%eax			C jump into loop
Packit 5c3484
Packit 5c3484
EPILOGUE()
Packit 5c3484
Packit 5c3484
Packit 5c3484
	ALIGN(16)
Packit 5c3484
PROLOGUE(M4_function_n)
Packit 5c3484
deflit(`FRAME',0)
Packit 5c3484
Packit 5c3484
	pushl	%edi		FRAME_pushl()
Packit 5c3484
	pushl	%esi		FRAME_pushl()
Packit 5c3484
Packit 5c3484
	movl	PARAM_DST,%edi
Packit 5c3484
	movl	PARAM_SRC1,%esi
Packit 5c3484
	movl	PARAM_SRC2,%edx
Packit 5c3484
	movl	PARAM_SIZE,%ecx
Packit 5c3484
Packit 5c3484
	movl	%ecx,%eax
Packit 5c3484
	shrl	$3,%ecx			C compute count for unrolled loop
Packit 5c3484
	negl	%eax
Packit 5c3484
	andl	$7,%eax			C get index where to start loop
Packit 5c3484
	jz	L(oop)			C necessary special case for 0
Packit 5c3484
	incl	%ecx			C adjust loop count
Packit 5c3484
	shll	$2,%eax			C adjustment for pointers...
Packit 5c3484
	subl	%eax,%edi		C ... since they are offset ...
Packit 5c3484
	subl	%eax,%esi		C ... by a constant when we ...
Packit 5c3484
	subl	%eax,%edx		C ... enter the loop
Packit 5c3484
	shrl	$2,%eax			C restore previous value
Packit 5c3484
Packit 5c3484
ifdef(`PIC',`
Packit 5c3484
	C Calculate start address in loop for PIC.  Due to limitations in
Packit 5c3484
	C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal
Packit 5c3484
	call	L(0b)
Packit 5c3484
L(0b):	leal	(%eax,%eax,8),%eax
Packit 5c3484
	addl	(%esp),%eax
Packit 5c3484
	addl	$L(oop)-L(0b)-3,%eax
Packit 5c3484
	addl	$4,%esp
Packit 5c3484
',`
Packit 5c3484
	C Calculate start address in loop for non-PIC.
Packit 5c3484
	leal	L(oop)-3(%eax,%eax,8),%eax
Packit 5c3484
')
Packit 5c3484
	jmp	*%eax			C jump into loop
Packit 5c3484
Packit 5c3484
L(oopgo):
Packit 5c3484
	pushl	%ebp		FRAME_pushl()
Packit 5c3484
	movl	PARAM_CARRY,%ebp
Packit 5c3484
	shrl	%ebp			C shift bit 0 into carry
Packit 5c3484
	popl	%ebp		FRAME_popl()
Packit 5c3484
Packit 5c3484
	ALIGN(16)
Packit 5c3484
L(oop):	movl	(%esi),%eax
Packit 5c3484
	M4_inst	(%edx),%eax
Packit 5c3484
	movl	%eax,(%edi)
Packit 5c3484
	movl	4(%esi),%eax
Packit 5c3484
	M4_inst	4(%edx),%eax
Packit 5c3484
	movl	%eax,4(%edi)
Packit 5c3484
	movl	8(%esi),%eax
Packit 5c3484
	M4_inst	8(%edx),%eax
Packit 5c3484
	movl	%eax,8(%edi)
Packit 5c3484
	movl	12(%esi),%eax
Packit 5c3484
	M4_inst	12(%edx),%eax
Packit 5c3484
	movl	%eax,12(%edi)
Packit 5c3484
	movl	16(%esi),%eax
Packit 5c3484
	M4_inst	16(%edx),%eax
Packit 5c3484
	movl	%eax,16(%edi)
Packit 5c3484
	movl	20(%esi),%eax
Packit 5c3484
	M4_inst	20(%edx),%eax
Packit 5c3484
	movl	%eax,20(%edi)
Packit 5c3484
	movl	24(%esi),%eax
Packit 5c3484
	M4_inst	24(%edx),%eax
Packit 5c3484
	movl	%eax,24(%edi)
Packit 5c3484
	movl	28(%esi),%eax
Packit 5c3484
	M4_inst	28(%edx),%eax
Packit 5c3484
	movl	%eax,28(%edi)
Packit 5c3484
	leal	32(%edi),%edi
Packit 5c3484
	leal	32(%esi),%esi
Packit 5c3484
	leal	32(%edx),%edx
Packit 5c3484
	decl	%ecx
Packit 5c3484
	jnz	L(oop)
Packit 5c3484
Packit 5c3484
	sbbl	%eax,%eax
Packit 5c3484
	negl	%eax
Packit 5c3484
Packit 5c3484
	popl	%esi
Packit 5c3484
	popl	%edi
Packit 5c3484
	ret
Packit 5c3484
Packit 5c3484
EPILOGUE()