Blame sysdeps/x86_64/add_n.S

Packit 6c4009
/* x86-64 __mpn_add_n -- Add two limb vectors of the same length > 0 and store
Packit 6c4009
   sum in a third limb vector.
Packit 6c4009
   Copyright (C) 2006-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU MP Library.
Packit 6c4009
Packit 6c4009
   The GNU MP Library is free software; you can redistribute it and/or modify
Packit 6c4009
   it under the terms of the GNU Lesser General Public License as published by
Packit 6c4009
   the Free Software Foundation; either version 2.1 of the License, or (at your
Packit 6c4009
   option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU MP Library is distributed in the hope that it will be useful, but
Packit 6c4009
   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 6c4009
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
Packit 6c4009
   License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public License
Packit 6c4009
   along with the GNU MP Library; see the file COPYING.LIB.  If not,
Packit 6c4009
   see <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include "sysdep.h"
Packit 6c4009
#include "asm-syntax.h"
Packit 6c4009
Packit 6c4009
#define rp	%rdi
Packit 6c4009
#define up	%rsi
Packit 6c4009
#define vp	%rdx
Packit 6c4009
#define n	%rcx
Packit 6c4009
#define cy	%r8
Packit 6c4009
Packit 6c4009
#ifndef func
Packit 6c4009
# define func __mpn_add_n
Packit 6c4009
# define ADCSBB adc
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
ENTRY (func)
Packit 6c4009
	xor	%r8, %r8
Packit 6c4009
	mov	(up), %r10
Packit 6c4009
	mov	(vp), %r11
Packit 6c4009
Packit 6c4009
	lea	-8(up,n,8), up
Packit 6c4009
	lea	-8(vp,n,8), vp
Packit 6c4009
	lea	-16(rp,n,8), rp
Packit 6c4009
	mov	%ecx, %eax
Packit 6c4009
	neg	n
Packit 6c4009
	and	$3, %eax
Packit 6c4009
	je	L(b00)
Packit 6c4009
	add	%rax, n		/* clear low rcx bits for jrcxz */
Packit 6c4009
	cmp	$2, %eax
Packit 6c4009
	jl	L(b01)
Packit 6c4009
	je	L(b10)
Packit 6c4009
Packit 6c4009
L(b11):	shr	%r8		/* set cy */
Packit 6c4009
	jmp	L(e11)
Packit 6c4009
Packit 6c4009
L(b00):	shr	%r8		/* set cy */
Packit 6c4009
	mov	%r10, %r8
Packit 6c4009
	mov	%r11, %r9
Packit 6c4009
	lea	4(n), n
Packit 6c4009
	jmp	L(e00)
Packit 6c4009
Packit 6c4009
L(b01):	shr	%r8		/* set cy */
Packit 6c4009
	jmp	L(e01)
Packit 6c4009
Packit 6c4009
L(b10):	shr	%r8		/* set cy */
Packit 6c4009
	mov	%r10, %r8
Packit 6c4009
	mov	%r11, %r9
Packit 6c4009
	jmp	L(e10)
Packit 6c4009
Packit 6c4009
L(end):	ADCSBB	%r11, %r10
Packit 6c4009
	mov	%r10, 8(rp)
Packit 6c4009
	mov	%ecx, %eax	/* clear eax, ecx contains 0 */
Packit 6c4009
	adc	%eax, %eax
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(top):
Packit 6c4009
	mov	-24(up,n,8), %r8
Packit 6c4009
	mov	-24(vp,n,8), %r9
Packit 6c4009
	ADCSBB	%r11, %r10
Packit 6c4009
	mov	%r10, -24(rp,n,8)
Packit 6c4009
L(e00):
Packit 6c4009
	mov	-16(up,n,8), %r10
Packit 6c4009
	mov	-16(vp,n,8), %r11
Packit 6c4009
	ADCSBB	%r9, %r8
Packit 6c4009
	mov	%r8, -16(rp,n,8)
Packit 6c4009
L(e11):
Packit 6c4009
	mov	-8(up,n,8), %r8
Packit 6c4009
	mov	-8(vp,n,8), %r9
Packit 6c4009
	ADCSBB	%r11, %r10
Packit 6c4009
	mov	%r10, -8(rp,n,8)
Packit 6c4009
L(e10):
Packit 6c4009
	mov	(up,n,8), %r10
Packit 6c4009
	mov	(vp,n,8), %r11
Packit 6c4009
	ADCSBB	%r9, %r8
Packit 6c4009
	mov	%r8, (rp,n,8)
Packit 6c4009
L(e01):
Packit 6c4009
	jrcxz	L(end)
Packit 6c4009
	lea	4(n), n
Packit 6c4009
	jmp	L(top)
Packit 6c4009
END (func)