Blob Blame History Raw
dnl  ARM64 mpn_lshift.

dnl  Copyright 2013, 2014 Free Software Foundation, Inc.

dnl  This file is part of the GNU MP Library.

dnl  The GNU MP Library is free software; you can redistribute it and/or modify
dnl  it under the terms of the GNU Lesser General Public License as published
dnl  by the Free Software Foundation; either version 3 of the License, or (at
dnl  your option) any later version.

dnl  The GNU MP Library is distributed in the hope that it will be useful, but
dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
dnl  License for more details.

dnl  You should have received a copy of the GNU Lesser General Public License
dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.

include(`../config.m4')

C	     cycles/limb
C Cortex-A53	 ?
C Cortex-A57	 ?

changecom(@&*$)

define(`rp_arg', `x0')
define(`up',     `x1')
define(`n',      `x2')
define(`cnt',    `x3')

define(`rp',     `x16')

define(`tnc',`x8')

ASM_START()
PROLOGUE(mpn_lshift)
	add	rp, rp_arg, n, lsl #3
	add	up, up, n, lsl #3
	sub	tnc, xzr, cnt
	tbz	n, #0, L(bx0)

L(bx1):	ldr	x4, [up,#-8]
	tbnz	n, #1, L(b11)

L(b01):	lsr	x0, x4, tnc
	lsl	x18, x4, cnt
	sub	n, n, #1
	cbnz	n, L(gt1)
	str	x18, [rp,#-8]
	ret
L(gt1):	ldp	x4, x5, [up,#-24]
	sub	up, up, #8
	add	rp, rp, #16
	b	L(lo2)

L(b11):	lsr	x0, x4, tnc
	lsl	x9, x4, cnt
	ldp	x6, x7, [up,#-24]
	add	n, n, #1
	add	up, up, #8
	add	rp, rp, #32
	b	L(lo0)

L(bx0):	ldp	x4, x5, [up,#-16]
	tbz	n, #1, L(b00)

L(b10):	lsr	x0, x5, tnc
	lsl	x13, x5, cnt
	lsr	x10, x4, tnc
	lsl	x18, x4, cnt
	sub	n, n, #2
	cbnz	n, L(gt2)
	orr	x10, x10, x13
	stp	x18, x10, [rp,#-16]
	ret
L(gt2):	ldp	x4, x5, [up,#-32]
	orr	x10, x10, x13
	str	x10, [rp,#-8]
	sub	up, up, #16
	add	rp, rp, #8
	b	L(lo2)

L(b00):	lsr	x0, x5, tnc
	lsl	x13, x5, cnt
	lsr	x10, x4, tnc
	lsl	x9, x4, cnt
	ldp	x6, x7, [up,#-32]
	orr	x10, x10, x13
	str	x10, [rp,#-8]
	add	rp, rp, #24
	b	L(lo0)

	ALIGN(16)
L(top):	ldp	x4, x5, [up,#-48]
	sub	rp, rp, #32		C integrate with stp?
	sub	up, up, #32		C integrate with ldp?
	orr	x11, x11, x9
	orr	x10, x10, x13
	stp	x10, x11, [rp,#-16]
L(lo2):	lsr	x11, x5, tnc
	lsl	x13, x5, cnt
	lsr	x10, x4, tnc
	lsl	x9, x4, cnt
	ldp	x6, x7, [up,#-32]
	orr	x11, x11, x18
	orr	x10, x10, x13
	stp	x10, x11, [rp,#-32]
L(lo0):	sub	n, n, #4
	lsr	x11, x7, tnc
	lsl	x13, x7, cnt
	lsr	x10, x6, tnc
	lsl	x18, x6, cnt
	cbnz	n, L(top)

L(end):	orr	x11, x11, x9
	orr	x10, x10, x13
	stp	x10, x11, [rp,#-48]
	str	x18, [rp,#-56]
	ret
EPILOGUE()