Blame sysdeps/alpha/divqu.S

Packit 6c4009
/* Copyright (C) 2004-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library.  If not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include "div_libc.h"
Packit 6c4009
Packit 6c4009
Packit 6c4009
/* 64-bit unsigned long divide.  These are not normal C functions.  Argument
Packit 6c4009
   registers are t10 and t11, the result goes in t12.  Only t12 and AT may be
Packit 6c4009
   clobbered.
Packit 6c4009
Packit 6c4009
   Theory of operation here is that we can use the FPU divider for virtually
Packit 6c4009
   all operands that we see: all dividend values between -2**53 and 2**53-1
Packit 6c4009
   can be computed directly.  Note that divisor values need not be checked
Packit 6c4009
   against that range because the rounded fp value will be close enough such
Packit 6c4009
   that the quotient is < 1, which will properly be truncated to zero when we
Packit 6c4009
   convert back to integer.
Packit 6c4009
Packit 6c4009
   When the dividend is outside the range for which we can compute exact
Packit 6c4009
   results, we use the fp quotent as an estimate from which we begin refining
Packit 6c4009
   an exact integral value.  This reduces the number of iterations in the
Packit 6c4009
   shift-and-subtract loop significantly.
Packit 6c4009
Packit 6c4009
   The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE
Packit 6c4009
   for cvttq/c even without /sui being set.  It will not, however, properly
Packit 6c4009
   raise the exception, so we don't have to worry about FPCR_INED being clear
Packit 6c4009
   and so dying by SIGFPE.  */
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
	.align	4
Packit 6c4009
	.globl	__divqu
Packit 6c4009
	.type	__divqu, @funcnoplt
Packit 6c4009
	.usepv	__divqu, no
Packit 6c4009
Packit 6c4009
	cfi_startproc
Packit 6c4009
	cfi_return_column (RA)
Packit 6c4009
__divqu:
Packit 6c4009
	lda	sp, -FRAME(sp)
Packit 6c4009
	cfi_def_cfa_offset (FRAME)
Packit 6c4009
	CALL_MCOUNT
Packit 6c4009
Packit 6c4009
	/* Get the fp divide insn issued as quickly as possible.  After
Packit 6c4009
	   that's done, we have at least 22 cycles until its results are
Packit 6c4009
	   ready -- all the time in the world to figure out how we're
Packit 6c4009
	   going to use the results.  */
Packit 6c4009
	stt	$f0, 0(sp)
Packit 6c4009
	excb
Packit 6c4009
	beq	Y, DIVBYZERO
Packit 6c4009
Packit 6c4009
	stt	$f1, 8(sp)
Packit 6c4009
	stt	$f3, 48(sp)
Packit 6c4009
	cfi_rel_offset ($f0, 0)
Packit 6c4009
	cfi_rel_offset ($f1, 8)
Packit 6c4009
	cfi_rel_offset ($f3, 48)
Packit 6c4009
	mf_fpcr	$f3
Packit 6c4009
Packit 6c4009
	_ITOFT2	X, $f0, 16, Y, $f1, 24
Packit 6c4009
	cvtqt	$f0, $f0
Packit 6c4009
	cvtqt	$f1, $f1
Packit 6c4009
	blt	X, $x_is_neg
Packit 6c4009
	divt/c	$f0, $f1, $f0
Packit 6c4009
Packit 6c4009
	/* Check to see if Y was mis-converted as signed value.  */
Packit 6c4009
	ldt	$f1, 8(sp)
Packit 6c4009
	blt	Y, $y_is_neg
Packit 6c4009
Packit 6c4009
	/* Check to see if X fit in the double as an exact value.  */
Packit 6c4009
	srl	X, 53, AT
Packit 6c4009
	bne	AT, $x_big
Packit 6c4009
Packit 6c4009
	/* If we get here, we're expecting exact results from the division.
Packit 6c4009
	   Do nothing else besides convert and clean up.  */
Packit 6c4009
	cvttq/c	$f0, $f0
Packit 6c4009
	excb
Packit 6c4009
	mt_fpcr	$f3
Packit 6c4009
	_FTOIT	$f0, RV, 16
Packit 6c4009
Packit 6c4009
	ldt	$f0, 0(sp)
Packit 6c4009
	ldt	$f3, 48(sp)
Packit 6c4009
	cfi_remember_state
Packit 6c4009
	cfi_restore ($f0)
Packit 6c4009
	cfi_restore ($f1)
Packit 6c4009
	cfi_restore ($f3)
Packit 6c4009
	cfi_def_cfa_offset (0)
Packit 6c4009
	lda	sp, FRAME(sp)
Packit 6c4009
	ret	$31, (RA), 1
Packit 6c4009
Packit 6c4009
	.align	4
Packit 6c4009
	cfi_restore_state
Packit 6c4009
$x_is_neg:
Packit 6c4009
	/* If we get here, X is so big that bit 63 is set, which made the
Packit 6c4009
	   conversion come out negative.  Fix it up lest we not even get
Packit 6c4009
	   a good estimate.  */
Packit 6c4009
	ldah	AT, 0x5f80		/* 2**64 as float.  */
Packit 6c4009
	stt	$f2, 24(sp)
Packit 6c4009
	cfi_rel_offset ($f2, 24)
Packit 6c4009
	_ITOFS	AT, $f2, 16
Packit 6c4009
Packit 6c4009
	.align	4
Packit 6c4009
	addt	$f0, $f2, $f0
Packit 6c4009
	unop
Packit 6c4009
	divt/c	$f0, $f1, $f0
Packit 6c4009
	unop
Packit 6c4009
Packit 6c4009
	/* Ok, we've now the divide issued.  Continue with other checks.  */
Packit 6c4009
	ldt	$f1, 8(sp)
Packit 6c4009
	unop
Packit 6c4009
	ldt	$f2, 24(sp)
Packit 6c4009
	blt	Y, $y_is_neg
Packit 6c4009
	cfi_restore ($f1)
Packit 6c4009
	cfi_restore ($f2)
Packit 6c4009
	cfi_remember_state	/* for y_is_neg */
Packit 6c4009
Packit 6c4009
	.align	4
Packit 6c4009
$x_big:
Packit 6c4009
	/* If we get here, X is large enough that we don't expect exact
Packit 6c4009
	   results, and neither X nor Y got mis-translated for the fp
Packit 6c4009
	   division.  Our task is to take the fp result, figure out how
Packit 6c4009
	   far it's off from the correct result and compute a fixup.  */
Packit 6c4009
	stq	t0, 16(sp)
Packit 6c4009
	stq	t1, 24(sp)
Packit 6c4009
	stq	t2, 32(sp)
Packit 6c4009
	stq	t3, 40(sp)
Packit 6c4009
	cfi_rel_offset (t0, 16)
Packit 6c4009
	cfi_rel_offset (t1, 24)
Packit 6c4009
	cfi_rel_offset (t2, 32)
Packit 6c4009
	cfi_rel_offset (t3, 40)
Packit 6c4009
Packit 6c4009
#define Q	RV		/* quotient */
Packit 6c4009
#define R	t0		/* remainder */
Packit 6c4009
#define SY	t1		/* scaled Y */
Packit 6c4009
#define S	t2		/* scalar */
Packit 6c4009
#define QY	t3		/* Q*Y */
Packit 6c4009
Packit 6c4009
	cvttq/c	$f0, $f0
Packit 6c4009
	_FTOIT	$f0, Q, 8
Packit 6c4009
	mulq	Q, Y, QY
Packit 6c4009
Packit 6c4009
	.align	4
Packit 6c4009
	stq	t4, 8(sp)
Packit 6c4009
	excb
Packit 6c4009
	ldt	$f0, 0(sp)
Packit 6c4009
	mt_fpcr	$f3
Packit 6c4009
	cfi_rel_offset (t4, 8)
Packit 6c4009
	cfi_restore ($f0)
Packit 6c4009
Packit 6c4009
	subq	QY, X, R
Packit 6c4009
	mov	Y, SY
Packit 6c4009
	mov	1, S
Packit 6c4009
	bgt	R, $q_high
Packit 6c4009
Packit 6c4009
$q_high_ret:
Packit 6c4009
	subq	X, QY, R
Packit 6c4009
	mov	Y, SY
Packit 6c4009
	mov	1, S
Packit 6c4009
	bgt	R, $q_low
Packit 6c4009
Packit 6c4009
$q_low_ret:
Packit 6c4009
	ldq	t4, 8(sp)
Packit 6c4009
	ldq	t0, 16(sp)
Packit 6c4009
	ldq	t1, 24(sp)
Packit 6c4009
	ldq	t2, 32(sp)
Packit 6c4009
Packit 6c4009
	ldq	t3, 40(sp)
Packit 6c4009
	ldt	$f3, 48(sp)
Packit 6c4009
	lda	sp, FRAME(sp)
Packit 6c4009
	cfi_remember_state
Packit 6c4009
	cfi_restore (t0)
Packit 6c4009
	cfi_restore (t1)
Packit 6c4009
	cfi_restore (t2)
Packit 6c4009
	cfi_restore (t3)
Packit 6c4009
	cfi_restore (t4)
Packit 6c4009
	cfi_restore ($f3)
Packit 6c4009
	cfi_def_cfa_offset (0)
Packit 6c4009
	ret	$31, (RA), 1
Packit 6c4009
Packit 6c4009
	.align	4
Packit 6c4009
	cfi_restore_state
Packit 6c4009
	/* The quotient that we computed was too large.  We need to reduce
Packit 6c4009
	   it by S such that Y*S >= R.  Obviously the closer we get to the
Packit 6c4009
	   correct value the better, but overshooting high is ok, as we'll
Packit 6c4009
	   fix that up later.  */
Packit 6c4009
0:
Packit 6c4009
	addq	SY, SY, SY
Packit 6c4009
	addq	S, S, S
Packit 6c4009
$q_high:
Packit 6c4009
	cmpult	SY, R, AT
Packit 6c4009
	bne	AT, 0b
Packit 6c4009
Packit 6c4009
	subq	Q, S, Q
Packit 6c4009
	unop
Packit 6c4009
	subq	QY, SY, QY
Packit 6c4009
	br	$q_high_ret
Packit 6c4009
Packit 6c4009
	.align	4
Packit 6c4009
	/* The quotient that we computed was too small.  Divide Y by the
Packit 6c4009
	   current remainder (R) and add that to the existing quotient (Q).
Packit 6c4009
	   The expectation, of course, is that R is much smaller than X.  */
Packit 6c4009
	/* Begin with a shift-up loop.  Compute S such that Y*S >= R.  We
Packit 6c4009
	   already have a copy of Y in SY and the value 1 in S.  */
Packit 6c4009
0:
Packit 6c4009
	addq	SY, SY, SY
Packit 6c4009
	addq	S, S, S
Packit 6c4009
$q_low:
Packit 6c4009
	cmpult	SY, R, AT
Packit 6c4009
	bne	AT, 0b
Packit 6c4009
Packit 6c4009
	/* Shift-down and subtract loop.  Each iteration compares our scaled
Packit 6c4009
	   Y (SY) with the remainder (R); if SY <= R then X is divisible by
Packit 6c4009
	   Y's scalar (S) so add it to the quotient (Q).  */
Packit 6c4009
2:	addq	Q, S, t3
Packit 6c4009
	srl	S, 1, S
Packit 6c4009
	cmpule	SY, R, AT
Packit 6c4009
	subq	R, SY, t4
Packit 6c4009
Packit 6c4009
	cmovne	AT, t3, Q
Packit 6c4009
	cmovne	AT, t4, R
Packit 6c4009
	srl	SY, 1, SY
Packit 6c4009
	bne	S, 2b
Packit 6c4009
Packit 6c4009
	br	$q_low_ret
Packit 6c4009
Packit 6c4009
	.align	4
Packit 6c4009
	cfi_restore_state
Packit 6c4009
$y_is_neg:
Packit 6c4009
	/* If we get here, Y is so big that bit 63 is set.  The results
Packit 6c4009
	   from the divide will be completely wrong.  Fortunately, the
Packit 6c4009
	   quotient must be either 0 or 1, so just compute it directly.  */
Packit 6c4009
	cmpule	Y, X, RV
Packit 6c4009
	excb
Packit 6c4009
	mt_fpcr	$f3
Packit 6c4009
	ldt	$f0, 0(sp)
Packit 6c4009
	ldt	$f3, 48(sp)
Packit 6c4009
	lda	sp, FRAME(sp)
Packit 6c4009
	cfi_restore ($f0)
Packit 6c4009
	cfi_restore ($f3)
Packit 6c4009
	cfi_def_cfa_offset (0)
Packit 6c4009
	ret	$31, (RA), 1
Packit 6c4009
Packit 6c4009
	cfi_endproc
Packit 6c4009
	.size	__divqu, .-__divqu
Packit 6c4009
Packit 6c4009
	DO_DIVBYZERO