Blame sysdeps/x86_64/mul_1.S

Packit 6c4009
/* AMD64 __mpn_mul_1 -- Multiply a limb vector with a limb and store
Packit 6c4009
   the result in a second limb vector.
Packit 6c4009
   Copyright (C) 2003-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU MP Library.
Packit 6c4009
Packit 6c4009
   The GNU MP Library is free software; you can redistribute it and/or modify
Packit 6c4009
   it under the terms of the GNU Lesser General Public License as published by
Packit 6c4009
   the Free Software Foundation; either version 2.1 of the License, or (at your
Packit 6c4009
   option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU MP Library is distributed in the hope that it will be useful, but
Packit 6c4009
   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 6c4009
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
Packit 6c4009
   License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public License
Packit 6c4009
   along with the GNU MP Library; see the file COPYING.LIB.  If not,
Packit 6c4009
   see <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
#include "asm-syntax.h"
Packit 6c4009
Packit 6c4009
#define rp	%rdi
Packit 6c4009
#define up	%rsi
Packit 6c4009
#define n_param	%rdx
Packit 6c4009
#define vl	%rcx
Packit 6c4009
Packit 6c4009
#define n	%r11
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
ENTRY (__mpn_mul_1)
Packit 6c4009
	push	%rbx
Packit 6c4009
	cfi_adjust_cfa_offset (8)
Packit 6c4009
	cfi_rel_offset (%rbx, 0)
Packit 6c4009
	xor	%r10, %r10
Packit 6c4009
	mov	(up), %rax		/* read first u limb early */
Packit 6c4009
	mov	n_param, %rbx		/* move away n from rdx, mul uses it */
Packit 6c4009
	mul	vl
Packit 6c4009
	mov	%rbx, %r11
Packit 6c4009
Packit 6c4009
	add	%r10, %rax
Packit 6c4009
	adc	$0, %rdx
Packit 6c4009
Packit 6c4009
	and	$3, %ebx
Packit 6c4009
	jz	L(b0)
Packit 6c4009
	cmp	$2, %ebx
Packit 6c4009
	jz	L(b2)
Packit 6c4009
	jg	L(b3)
Packit 6c4009
Packit 6c4009
L(b1):	dec	n
Packit 6c4009
	jne	L(gt1)
Packit 6c4009
	mov	%rax, (rp)
Packit 6c4009
	jmp	L(ret)
Packit 6c4009
L(gt1):	lea	8(up,n,8), up
Packit 6c4009
	lea	-8(rp,n,8), rp
Packit 6c4009
	neg	n
Packit 6c4009
	xor	%r10, %r10
Packit 6c4009
	xor	%ebx, %ebx
Packit 6c4009
	mov	%rax, %r9
Packit 6c4009
	mov	(up,n,8), %rax
Packit 6c4009
	mov	%rdx, %r8
Packit 6c4009
	jmp	L(L1)
Packit 6c4009
Packit 6c4009
L(b0):	lea	(up,n,8), up
Packit 6c4009
	lea	-16(rp,n,8), rp
Packit 6c4009
	neg	n
Packit 6c4009
	xor	%r10, %r10
Packit 6c4009
	mov	%rax, %r8
Packit 6c4009
	mov	%rdx, %rbx
Packit 6c4009
	jmp	L(L0)
Packit 6c4009
Packit 6c4009
L(b3):	lea	-8(up,n,8), up
Packit 6c4009
	lea	-24(rp,n,8), rp
Packit 6c4009
	neg	n
Packit 6c4009
	mov	%rax, %rbx
Packit 6c4009
	mov	%rdx, %r10
Packit 6c4009
	jmp	L(L3)
Packit 6c4009
Packit 6c4009
L(b2):	lea	-16(up,n,8), up
Packit 6c4009
	lea	-32(rp,n,8), rp
Packit 6c4009
	neg	n
Packit 6c4009
	xor	%r8, %r8
Packit 6c4009
	xor	%ebx, %ebx
Packit 6c4009
	mov	%rax, %r10
Packit 6c4009
	mov	24(up,n,8), %rax
Packit 6c4009
	mov	%rdx, %r9
Packit 6c4009
	jmp	L(L2)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(top): mov	%r10, (rp,n,8)
Packit 6c4009
	add	%rax, %r9
Packit 6c4009
	mov	(up,n,8), %rax
Packit 6c4009
	adc	%rdx, %r8
Packit 6c4009
	mov	$0, %r10d
Packit 6c4009
L(L1):	mul	vl
Packit 6c4009
	mov	%r9, 8(rp,n,8)
Packit 6c4009
	add	%rax, %r8
Packit 6c4009
	adc	%rdx, %rbx
Packit 6c4009
L(L0):	mov	8(up,n,8), %rax
Packit 6c4009
	mul	vl
Packit 6c4009
	mov	%r8, 16(rp,n,8)
Packit 6c4009
	add	%rax, %rbx
Packit 6c4009
	adc	%rdx, %r10
Packit 6c4009
L(L3):	mov	16(up,n,8), %rax
Packit 6c4009
	mul	vl
Packit 6c4009
	mov	%rbx, 24(rp,n,8)
Packit 6c4009
	mov	$0, %r8d                # zero
Packit 6c4009
	mov	%r8, %rbx               # zero
Packit 6c4009
	add	%rax, %r10
Packit 6c4009
	mov	24(up,n,8), %rax
Packit 6c4009
	mov	%r8, %r9                # zero
Packit 6c4009
	adc	%rdx, %r9
Packit 6c4009
L(L2):	mul	vl
Packit 6c4009
	add	$4, n
Packit 6c4009
	js	L(top)
Packit 6c4009
Packit 6c4009
	mov	%r10, (rp,n,8)
Packit 6c4009
	add	%rax, %r9
Packit 6c4009
	adc	%r8, %rdx
Packit 6c4009
	mov	%r9, 8(rp,n,8)
Packit 6c4009
	add	%r8, %rdx
Packit 6c4009
L(ret):	mov	%rdx, %rax
Packit 6c4009
Packit 6c4009
	pop	%rbx
Packit 6c4009
	cfi_adjust_cfa_offset (-8)
Packit 6c4009
	cfi_restore (%rbx)
Packit 6c4009
	ret
Packit 6c4009
END (__mpn_mul_1)