Blame sysdeps/x86_64/lshift.S

Packit 6c4009
/* x86-64 __mpn_lshift --
Packit 6c4009
   Copyright (C) 2007-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU MP Library.
Packit 6c4009
Packit 6c4009
   The GNU MP Library is free software; you can redistribute it and/or modify
Packit 6c4009
   it under the terms of the GNU Lesser General Public License as published by
Packit 6c4009
   the Free Software Foundation; either version 2.1 of the License, or (at your
Packit 6c4009
   option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU MP Library is distributed in the hope that it will be useful, but
Packit 6c4009
   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 6c4009
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
Packit 6c4009
   License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public License
Packit 6c4009
   along with the GNU MP Library; see the file COPYING.LIB.  If not,
Packit 6c4009
   see <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include "sysdep.h"
Packit 6c4009
#include "asm-syntax.h"
Packit 6c4009
Packit 6c4009
#define rp	%rdi
Packit 6c4009
#define up	%rsi
Packit 6c4009
#define n	%rdx
Packit 6c4009
#define cnt	%cl
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
ENTRY (__mpn_lshift)
Packit 6c4009
	lea	-8(rp,n,8), rp
Packit 6c4009
	lea	-8(up,n,8), up
Packit 6c4009
Packit 6c4009
	mov	%edx, %eax
Packit 6c4009
	and	$3, %eax
Packit 6c4009
	jne	L(nb00)
Packit 6c4009
L(b00):	/* n = 4, 8, 12, ... */
Packit 6c4009
	mov	(up), %r10
Packit 6c4009
	mov	-8(up), %r11
Packit 6c4009
	xor	%eax, %eax
Packit 6c4009
	shld	%cl, %r10, %rax
Packit 6c4009
	mov	-16(up), %r8
Packit 6c4009
	lea	24(rp), rp
Packit 6c4009
	sub	$4, n
Packit 6c4009
	jmp	L(00)
Packit 6c4009
Packit 6c4009
L(nb00):/* n = 1, 5, 9, ... */
Packit 6c4009
	cmp	$2, %eax
Packit 6c4009
	jae	L(nb01)
Packit 6c4009
L(b01):	mov	(up), %r9
Packit 6c4009
	xor	%eax, %eax
Packit 6c4009
	shld	%cl, %r9, %rax
Packit 6c4009
	sub	$2, n
Packit 6c4009
	jb	L(le1)
Packit 6c4009
	mov	-8(up), %r10
Packit 6c4009
	mov	-16(up), %r11
Packit 6c4009
	lea	-8(up), up
Packit 6c4009
	lea	16(rp), rp
Packit 6c4009
	jmp	L(01)
Packit 6c4009
L(le1):	shl	%cl, %r9
Packit 6c4009
	mov	%r9, (rp)
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
L(nb01):/* n = 2, 6, 10, ... */
Packit 6c4009
	jne	L(b11)
Packit 6c4009
L(b10):	mov	(up), %r8
Packit 6c4009
	mov	-8(up), %r9
Packit 6c4009
	xor	%eax, %eax
Packit 6c4009
	shld	%cl, %r8, %rax
Packit 6c4009
	sub	$3, n
Packit 6c4009
	jb	L(le2)
Packit 6c4009
	mov	-16(up), %r10
Packit 6c4009
	lea	-16(up), up
Packit 6c4009
	lea	8(rp), rp
Packit 6c4009
	jmp	L(10)
Packit 6c4009
L(le2):	shld	%cl, %r9, %r8
Packit 6c4009
	mov	%r8, (rp)
Packit 6c4009
	shl	%cl, %r9
Packit 6c4009
	mov	%r9, -8(rp)
Packit 6c4009
	ret
Packit 6c4009
Packit 6c4009
	.p2align 4		/* performance critical! */
Packit 6c4009
L(b11):	/* n = 3, 7, 11, ... */
Packit 6c4009
	mov	(up), %r11
Packit 6c4009
	mov	-8(up), %r8
Packit 6c4009
	xor	%eax, %eax
Packit 6c4009
	shld	%cl, %r11, %rax
Packit 6c4009
	mov	-16(up), %r9
Packit 6c4009
	lea	-24(up), up
Packit 6c4009
	sub	$4, n
Packit 6c4009
	jb	L(end)
Packit 6c4009
Packit 6c4009
	.p2align 4
Packit 6c4009
L(top):	shld	%cl, %r8, %r11
Packit 6c4009
	mov	(up), %r10
Packit 6c4009
	mov	%r11, (rp)
Packit 6c4009
L(10):	shld	%cl, %r9, %r8
Packit 6c4009
	mov	-8(up), %r11
Packit 6c4009
	mov	%r8, -8(rp)
Packit 6c4009
L(01):	shld	%cl, %r10, %r9
Packit 6c4009
	mov	-16(up), %r8
Packit 6c4009
	mov	%r9, -16(rp)
Packit 6c4009
L(00):	shld	%cl, %r11, %r10
Packit 6c4009
	mov	-24(up), %r9
Packit 6c4009
	mov	%r10, -24(rp)
Packit 6c4009
	add	$-32, up
Packit 6c4009
	lea	-32(rp), rp
Packit 6c4009
	sub	$4, n
Packit 6c4009
	jnc	L(top)
Packit 6c4009
Packit 6c4009
L(end):	shld	%cl, %r8, %r11
Packit 6c4009
	mov	%r11, (rp)
Packit 6c4009
	shld	%cl, %r9, %r8
Packit 6c4009
	mov	%r8, -8(rp)
Packit 6c4009
	shl	%cl, %r9
Packit 6c4009
	mov	%r9, -16(rp)
Packit 6c4009
	ret
Packit 6c4009
END (__mpn_lshift)