Blame sysdeps/sparc/sparc32/sub_n.S

Packit 6c4009
! SPARC __mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
Packit 6c4009
! store difference in a third limb vector.
Packit 6c4009
!
Packit 6c4009
! Copyright (C) 1995-2018 Free Software Foundation, Inc.
Packit 6c4009
!
Packit 6c4009
! This file is part of the GNU MP Library.
Packit 6c4009
!
Packit 6c4009
! The GNU MP Library is free software; you can redistribute it and/or modify
Packit 6c4009
! it under the terms of the GNU Lesser General Public License as published by
Packit 6c4009
! the Free Software Foundation; either version 2.1 of the License, or (at your
Packit 6c4009
! option) any later version.
Packit 6c4009
!
Packit 6c4009
! The GNU MP Library is distributed in the hope that it will be useful, but
Packit 6c4009
! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
Packit 6c4009
! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
Packit 6c4009
! License for more details.
Packit 6c4009
!
Packit 6c4009
! You should have received a copy of the GNU Lesser General Public License
Packit 6c4009
! along with the GNU MP Library; see the file COPYING.LIB.  If not,
Packit 6c4009
! see <http://www.gnu.org/licenses/>.
Packit 6c4009
Packit 6c4009
Packit 6c4009
! INPUT PARAMETERS
Packit 6c4009
#define RES_PTR	%o0
Packit 6c4009
#define S1_PTR	%o1
Packit 6c4009
#define S2_PTR	%o2
Packit 6c4009
#define SIZE	%o3
Packit 6c4009
Packit 6c4009
#include <sysdep.h>
Packit 6c4009
Packit 6c4009
ENTRY(__mpn_sub_n)
Packit 6c4009
	xor	S2_PTR,RES_PTR,%g1
Packit 6c4009
	andcc	%g1,4,%g0
Packit 6c4009
	bne	LOC(1)			! branch if alignment differs
Packit 6c4009
	nop
Packit 6c4009
! **  V1a  **
Packit 6c4009
	andcc	RES_PTR,4,%g0		! RES_PTR unaligned? Side effect: cy=0
Packit 6c4009
	be	LOC(v1)			! if no, branch
Packit 6c4009
	nop
Packit 6c4009
/* Add least significant limb separately to align RES_PTR and S2_PTR */
Packit 6c4009
	ld	[S1_PTR],%g4
Packit 6c4009
	add	S1_PTR,4,S1_PTR
Packit 6c4009
	ld	[S2_PTR],%g2
Packit 6c4009
	add	S2_PTR,4,S2_PTR
Packit 6c4009
	add	SIZE,-1,SIZE
Packit 6c4009
	subcc	%g4,%g2,%o4
Packit 6c4009
	st	%o4,[RES_PTR]
Packit 6c4009
	add	RES_PTR,4,RES_PTR
Packit 6c4009
LOC(v1):
Packit 6c4009
	addx	%g0,%g0,%o4		! save cy in register
Packit 6c4009
	cmp	SIZE,2			! if SIZE < 2 ...
Packit 6c4009
	bl	LOC(end2)		! ... branch to tail code
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
Packit 6c4009
	ld	[S1_PTR+0],%g4
Packit 6c4009
	addcc	SIZE,-10,SIZE
Packit 6c4009
	ld	[S1_PTR+4],%g1
Packit 6c4009
	ldd	[S2_PTR+0],%g2
Packit 6c4009
	blt	LOC(fin1)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
/* Add blocks of 8 limbs until less than 8 limbs remain */
Packit 6c4009
LOC(loop1):
Packit 6c4009
	subxcc	%g4,%g2,%o4
Packit 6c4009
	ld	[S1_PTR+8],%g4
Packit 6c4009
	subxcc	%g1,%g3,%o5
Packit 6c4009
	ld	[S1_PTR+12],%g1
Packit 6c4009
	ldd	[S2_PTR+8],%g2
Packit 6c4009
	std	%o4,[RES_PTR+0]
Packit 6c4009
	subxcc	%g4,%g2,%o4
Packit 6c4009
	ld	[S1_PTR+16],%g4
Packit 6c4009
	subxcc	%g1,%g3,%o5
Packit 6c4009
	ld	[S1_PTR+20],%g1
Packit 6c4009
	ldd	[S2_PTR+16],%g2
Packit 6c4009
	std	%o4,[RES_PTR+8]
Packit 6c4009
	subxcc	%g4,%g2,%o4
Packit 6c4009
	ld	[S1_PTR+24],%g4
Packit 6c4009
	subxcc	%g1,%g3,%o5
Packit 6c4009
	ld	[S1_PTR+28],%g1
Packit 6c4009
	ldd	[S2_PTR+24],%g2
Packit 6c4009
	std	%o4,[RES_PTR+16]
Packit 6c4009
	subxcc	%g4,%g2,%o4
Packit 6c4009
	ld	[S1_PTR+32],%g4
Packit 6c4009
	subxcc	%g1,%g3,%o5
Packit 6c4009
	ld	[S1_PTR+36],%g1
Packit 6c4009
	ldd	[S2_PTR+32],%g2
Packit 6c4009
	std	%o4,[RES_PTR+24]
Packit 6c4009
	addx	%g0,%g0,%o4		! save cy in register
Packit 6c4009
	addcc	SIZE,-8,SIZE
Packit 6c4009
	add	S1_PTR,32,S1_PTR
Packit 6c4009
	add	S2_PTR,32,S2_PTR
Packit 6c4009
	add	RES_PTR,32,RES_PTR
Packit 6c4009
	bge	LOC(loop1)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
Packit 6c4009
LOC(fin1):
Packit 6c4009
	addcc	SIZE,8-2,SIZE
Packit 6c4009
	blt	LOC(end1)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
/* Add blocks of 2 limbs until less than 2 limbs remain */
Packit 6c4009
LOC(loope1):
Packit 6c4009
	subxcc	%g4,%g2,%o4
Packit 6c4009
	ld	[S1_PTR+8],%g4
Packit 6c4009
	subxcc	%g1,%g3,%o5
Packit 6c4009
	ld	[S1_PTR+12],%g1
Packit 6c4009
	ldd	[S2_PTR+8],%g2
Packit 6c4009
	std	%o4,[RES_PTR+0]
Packit 6c4009
	addx	%g0,%g0,%o4		! save cy in register
Packit 6c4009
	addcc	SIZE,-2,SIZE
Packit 6c4009
	add	S1_PTR,8,S1_PTR
Packit 6c4009
	add	S2_PTR,8,S2_PTR
Packit 6c4009
	add	RES_PTR,8,RES_PTR
Packit 6c4009
	bge	LOC(loope1)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
LOC(end1):
Packit 6c4009
	subxcc	%g4,%g2,%o4
Packit 6c4009
	subxcc	%g1,%g3,%o5
Packit 6c4009
	std	%o4,[RES_PTR+0]
Packit 6c4009
	addx	%g0,%g0,%o4		! save cy in register
Packit 6c4009
Packit 6c4009
	andcc	SIZE,1,%g0
Packit 6c4009
	be	LOC(ret1)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
/* Add last limb */
Packit 6c4009
	ld	[S1_PTR+8],%g4
Packit 6c4009
	ld	[S2_PTR+8],%g2
Packit 6c4009
	subxcc	%g4,%g2,%o4
Packit 6c4009
	st	%o4,[RES_PTR+8]
Packit 6c4009
Packit 6c4009
LOC(ret1):
Packit 6c4009
	retl
Packit 6c4009
	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
Packit 6c4009
Packit 6c4009
LOC(1):	xor	S1_PTR,RES_PTR,%g1
Packit 6c4009
	andcc	%g1,4,%g0
Packit 6c4009
	bne	LOC(2)
Packit 6c4009
	nop
Packit 6c4009
! **  V1b  **
Packit 6c4009
	andcc	RES_PTR,4,%g0		! RES_PTR unaligned? Side effect: cy=0
Packit 6c4009
	be	LOC(v1b)		! if no, branch
Packit 6c4009
	nop
Packit 6c4009
/* Add least significant limb separately to align RES_PTR and S1_PTR */
Packit 6c4009
	ld	[S2_PTR],%g4
Packit 6c4009
	add	S2_PTR,4,S2_PTR
Packit 6c4009
	ld	[S1_PTR],%g2
Packit 6c4009
	add	S1_PTR,4,S1_PTR
Packit 6c4009
	add	SIZE,-1,SIZE
Packit 6c4009
	subcc	%g2,%g4,%o4
Packit 6c4009
	st	%o4,[RES_PTR]
Packit 6c4009
	add	RES_PTR,4,RES_PTR
Packit 6c4009
LOC(v1b):
Packit 6c4009
	addx	%g0,%g0,%o4		! save cy in register
Packit 6c4009
	cmp	SIZE,2			! if SIZE < 2 ...
Packit 6c4009
	bl	LOC(end2)		! ... branch to tail code
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
Packit 6c4009
	ld	[S2_PTR+0],%g4
Packit 6c4009
	addcc	SIZE,-10,SIZE
Packit 6c4009
	ld	[S2_PTR+4],%g1
Packit 6c4009
	ldd	[S1_PTR+0],%g2
Packit 6c4009
	blt	LOC(fin1b)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
/* Add blocks of 8 limbs until less than 8 limbs remain */
Packit 6c4009
LOC(loop1b):
Packit 6c4009
	subxcc	%g2,%g4,%o4
Packit 6c4009
	ld	[S2_PTR+8],%g4
Packit 6c4009
	subxcc	%g3,%g1,%o5
Packit 6c4009
	ld	[S2_PTR+12],%g1
Packit 6c4009
	ldd	[S1_PTR+8],%g2
Packit 6c4009
	std	%o4,[RES_PTR+0]
Packit 6c4009
	subxcc	%g2,%g4,%o4
Packit 6c4009
	ld	[S2_PTR+16],%g4
Packit 6c4009
	subxcc	%g3,%g1,%o5
Packit 6c4009
	ld	[S2_PTR+20],%g1
Packit 6c4009
	ldd	[S1_PTR+16],%g2
Packit 6c4009
	std	%o4,[RES_PTR+8]
Packit 6c4009
	subxcc	%g2,%g4,%o4
Packit 6c4009
	ld	[S2_PTR+24],%g4
Packit 6c4009
	subxcc	%g3,%g1,%o5
Packit 6c4009
	ld	[S2_PTR+28],%g1
Packit 6c4009
	ldd	[S1_PTR+24],%g2
Packit 6c4009
	std	%o4,[RES_PTR+16]
Packit 6c4009
	subxcc	%g2,%g4,%o4
Packit 6c4009
	ld	[S2_PTR+32],%g4
Packit 6c4009
	subxcc	%g3,%g1,%o5
Packit 6c4009
	ld	[S2_PTR+36],%g1
Packit 6c4009
	ldd	[S1_PTR+32],%g2
Packit 6c4009
	std	%o4,[RES_PTR+24]
Packit 6c4009
	addx	%g0,%g0,%o4		! save cy in register
Packit 6c4009
	addcc	SIZE,-8,SIZE
Packit 6c4009
	add	S1_PTR,32,S1_PTR
Packit 6c4009
	add	S2_PTR,32,S2_PTR
Packit 6c4009
	add	RES_PTR,32,RES_PTR
Packit 6c4009
	bge	LOC(loop1b)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
Packit 6c4009
LOC(fin1b):
Packit 6c4009
	addcc	SIZE,8-2,SIZE
Packit 6c4009
	blt	LOC(end1b)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
/* Add blocks of 2 limbs until less than 2 limbs remain */
Packit 6c4009
LOC(loope1b):
Packit 6c4009
	subxcc	%g2,%g4,%o4
Packit 6c4009
	ld	[S2_PTR+8],%g4
Packit 6c4009
	subxcc	%g3,%g1,%o5
Packit 6c4009
	ld	[S2_PTR+12],%g1
Packit 6c4009
	ldd	[S1_PTR+8],%g2
Packit 6c4009
	std	%o4,[RES_PTR+0]
Packit 6c4009
	addx	%g0,%g0,%o4		! save cy in register
Packit 6c4009
	addcc	SIZE,-2,SIZE
Packit 6c4009
	add	S1_PTR,8,S1_PTR
Packit 6c4009
	add	S2_PTR,8,S2_PTR
Packit 6c4009
	add	RES_PTR,8,RES_PTR
Packit 6c4009
	bge	LOC(loope1b)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
LOC(end1b):
Packit 6c4009
	subxcc	%g2,%g4,%o4
Packit 6c4009
	subxcc	%g3,%g1,%o5
Packit 6c4009
	std	%o4,[RES_PTR+0]
Packit 6c4009
	addx	%g0,%g0,%o4		! save cy in register
Packit 6c4009
Packit 6c4009
	andcc	SIZE,1,%g0
Packit 6c4009
	be	LOC(ret1b)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
/* Add last limb */
Packit 6c4009
	ld	[S2_PTR+8],%g4
Packit 6c4009
	ld	[S1_PTR+8],%g2
Packit 6c4009
	subxcc	%g2,%g4,%o4
Packit 6c4009
	st	%o4,[RES_PTR+8]
Packit 6c4009
Packit 6c4009
LOC(ret1b):
Packit 6c4009
	retl
Packit 6c4009
	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
Packit 6c4009
Packit 6c4009
! **  V2  **
Packit 6c4009
/* If we come here, the alignment of S1_PTR and RES_PTR as well as the
Packit 6c4009
   alignment of S2_PTR and RES_PTR differ.  Since there are only two ways
Packit 6c4009
   things can be aligned (that we care about) we now know that the alignment
Packit 6c4009
   of S1_PTR and S2_PTR are the same.  */
Packit 6c4009
Packit 6c4009
LOC(2):	cmp	SIZE,1
Packit 6c4009
	be	LOC(jone)
Packit 6c4009
	nop
Packit 6c4009
	andcc	S1_PTR,4,%g0		! S1_PTR unaligned? Side effect: cy=0
Packit 6c4009
	be	LOC(v2)			! if no, branch
Packit 6c4009
	nop
Packit 6c4009
/* Add least significant limb separately to align S1_PTR and S2_PTR */
Packit 6c4009
	ld	[S1_PTR],%g4
Packit 6c4009
	add	S1_PTR,4,S1_PTR
Packit 6c4009
	ld	[S2_PTR],%g2
Packit 6c4009
	add	S2_PTR,4,S2_PTR
Packit 6c4009
	add	SIZE,-1,SIZE
Packit 6c4009
	subcc	%g4,%g2,%o4
Packit 6c4009
	st	%o4,[RES_PTR]
Packit 6c4009
	add	RES_PTR,4,RES_PTR
Packit 6c4009
Packit 6c4009
LOC(v2):
Packit 6c4009
	addx	%g0,%g0,%o4		! save cy in register
Packit 6c4009
	addcc	SIZE,-8,SIZE
Packit 6c4009
	blt	LOC(fin2)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
/* Add blocks of 8 limbs until less than 8 limbs remain */
Packit 6c4009
LOC(loop2):
Packit 6c4009
	ldd	[S1_PTR+0],%g2
Packit 6c4009
	ldd	[S2_PTR+0],%o4
Packit 6c4009
	subxcc	%g2,%o4,%g2
Packit 6c4009
	st	%g2,[RES_PTR+0]
Packit 6c4009
	subxcc	%g3,%o5,%g3
Packit 6c4009
	st	%g3,[RES_PTR+4]
Packit 6c4009
	ldd	[S1_PTR+8],%g2
Packit 6c4009
	ldd	[S2_PTR+8],%o4
Packit 6c4009
	subxcc	%g2,%o4,%g2
Packit 6c4009
	st	%g2,[RES_PTR+8]
Packit 6c4009
	subxcc	%g3,%o5,%g3
Packit 6c4009
	st	%g3,[RES_PTR+12]
Packit 6c4009
	ldd	[S1_PTR+16],%g2
Packit 6c4009
	ldd	[S2_PTR+16],%o4
Packit 6c4009
	subxcc	%g2,%o4,%g2
Packit 6c4009
	st	%g2,[RES_PTR+16]
Packit 6c4009
	subxcc	%g3,%o5,%g3
Packit 6c4009
	st	%g3,[RES_PTR+20]
Packit 6c4009
	ldd	[S1_PTR+24],%g2
Packit 6c4009
	ldd	[S2_PTR+24],%o4
Packit 6c4009
	subxcc	%g2,%o4,%g2
Packit 6c4009
	st	%g2,[RES_PTR+24]
Packit 6c4009
	subxcc	%g3,%o5,%g3
Packit 6c4009
	st	%g3,[RES_PTR+28]
Packit 6c4009
	addx	%g0,%g0,%o4		! save cy in register
Packit 6c4009
	addcc	SIZE,-8,SIZE
Packit 6c4009
	add	S1_PTR,32,S1_PTR
Packit 6c4009
	add	S2_PTR,32,S2_PTR
Packit 6c4009
	add	RES_PTR,32,RES_PTR
Packit 6c4009
	bge	LOC(loop2)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
Packit 6c4009
LOC(fin2):
Packit 6c4009
	addcc	SIZE,8-2,SIZE
Packit 6c4009
	blt	LOC(end2)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
LOC(loope2):
Packit 6c4009
	ldd	[S1_PTR+0],%g2
Packit 6c4009
	ldd	[S2_PTR+0],%o4
Packit 6c4009
	subxcc	%g2,%o4,%g2
Packit 6c4009
	st	%g2,[RES_PTR+0]
Packit 6c4009
	subxcc	%g3,%o5,%g3
Packit 6c4009
	st	%g3,[RES_PTR+4]
Packit 6c4009
	addx	%g0,%g0,%o4		! save cy in register
Packit 6c4009
	addcc	SIZE,-2,SIZE
Packit 6c4009
	add	S1_PTR,8,S1_PTR
Packit 6c4009
	add	S2_PTR,8,S2_PTR
Packit 6c4009
	add	RES_PTR,8,RES_PTR
Packit 6c4009
	bge	LOC(loope2)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
LOC(end2):
Packit 6c4009
	andcc	SIZE,1,%g0
Packit 6c4009
	be	LOC(ret2)
Packit 6c4009
	subcc	%g0,%o4,%g0		! restore cy
Packit 6c4009
/* Add last limb */
Packit 6c4009
LOC(jone):
Packit 6c4009
	ld	[S1_PTR],%g4
Packit 6c4009
	ld	[S2_PTR],%g2
Packit 6c4009
	subxcc	%g4,%g2,%o4
Packit 6c4009
	st	%o4,[RES_PTR]
Packit 6c4009
Packit 6c4009
LOC(ret2):
Packit 6c4009
	retl
Packit 6c4009
	addx	%g0,%g0,%o0	! return carry-out from most sign. limb
Packit 6c4009
Packit 6c4009
END(__mpn_sub_n)