Tree - source-git/glibc - CentOS Git server

source-git / glibc

Blame sysdeps/alpha/divq.S

Blob History Raw

Packit	6c4009	`/* Copyright (C) 2004-2018 Free Software Foundation, Inc.`
Packit	6c4009	`This file is part of the GNU C Library.`
Packit	6c4009
Packit	6c4009	`The GNU C Library is free software; you can redistribute it and/or`
Packit	6c4009	`modify it under the terms of the GNU Lesser General Public`
Packit	6c4009	`License as published by the Free Software Foundation; either`
Packit	6c4009	`version 2.1 of the License, or (at your option) any later version.`
Packit	6c4009
Packit	6c4009	`The GNU C Library is distributed in the hope that it will be useful,`
Packit	6c4009	`but WITHOUT ANY WARRANTY; without even the implied warranty of`
Packit	6c4009	`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
Packit	6c4009	`Lesser General Public License for more details.`
Packit	6c4009
Packit	6c4009	`You should have received a copy of the GNU Lesser General Public`
Packit	6c4009	`License along with the GNU C Library. If not, see`
Packit	6c4009	`<http://www.gnu.org/licenses/>. */`
Packit	6c4009
Packit	6c4009	`#include "div_libc.h"`
Packit	6c4009
Packit	6c4009
Packit	6c4009	`/* 64-bit signed long divide. These are not normal C functions. Argument`
Packit	6c4009	`registers are t10 and t11, the result goes in t12. Only t12 and AT may`
Packit	6c4009	`be clobbered.`
Packit	6c4009
Packit	6c4009	`Theory of operation here is that we can use the FPU divider for virtually`
Packit	6c4009	`all operands that we see: all dividend values between -253 and 253-1`
Packit	6c4009	`can be computed directly. Note that divisor values need not be checked`
Packit	6c4009	`against that range because the rounded fp value will be close enough such`
Packit	6c4009	`that the quotient is < 1, which will properly be truncated to zero when we`
Packit	6c4009	`convert back to integer.`
Packit	6c4009
Packit	6c4009	`When the dividend is outside the range for which we can compute exact`
Packit	6c4009	`results, we use the fp quotent as an estimate from which we begin refining`
Packit	6c4009	`an exact integral value. This reduces the number of iterations in the`
Packit	6c4009	`shift-and-subtract loop significantly.`
Packit	6c4009
Packit	6c4009	`The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE`
Packit	6c4009	`for cvttq/c even without /sui being set. It will not, however, properly`
Packit	6c4009	`raise the exception, so we don't have to worry about FPCR_INED being clear`
Packit	6c4009	`and so dying by SIGFPE. */`
Packit	6c4009
Packit	6c4009	`.text`
Packit	6c4009	`.align 4`
Packit	6c4009	`.globl __divq`
Packit	6c4009	`.type __divq, @funcnoplt`
Packit	6c4009	`.usepv __divq, no`
Packit	6c4009
Packit	6c4009	`cfi_startproc`
Packit	6c4009	`cfi_return_column (RA)`
Packit	6c4009	`__divq:`
Packit	6c4009	`lda sp, -FRAME(sp)`
Packit	6c4009	`cfi_def_cfa_offset (FRAME)`
Packit	6c4009	`CALL_MCOUNT`
Packit	6c4009
Packit	6c4009	`/* Get the fp divide insn issued as quickly as possible. After`
Packit	6c4009	`that's done, we have at least 22 cycles until its results are`
Packit	6c4009	`ready -- all the time in the world to figure out how we're`
Packit	6c4009	`going to use the results. */`
Packit	6c4009	`stt $f0, 0(sp)`
Packit	6c4009	`excb`
Packit	6c4009	`beq Y, DIVBYZERO`
Packit	6c4009
Packit	6c4009	`stt $f1, 8(sp)`
Packit	6c4009	`stt $f3, 48(sp)`
Packit	6c4009	`cfi_rel_offset ($f0, 0)`
Packit	6c4009	`cfi_rel_offset ($f1, 8)`
Packit	6c4009	`cfi_rel_offset ($f3, 48)`
Packit	6c4009	`mf_fpcr $f3`
Packit	6c4009
Packit	6c4009	`_ITOFT2 X, $f0, 16, Y, $f1, 24`
Packit	6c4009	`cvtqt $f0, $f0`
Packit	6c4009	`cvtqt $f1, $f1`
Packit	6c4009	`divt/c $f0, $f1, $f0`
Packit	6c4009
Packit	6c4009	`/* Check to see if X fit in the double as an exact value. */`
Packit	6c4009	`sll X, (64-53), AT`
Packit	6c4009	`ldt $f1, 8(sp)`
Packit	6c4009	`sra AT, (64-53), AT`
Packit	6c4009	`cmpeq X, AT, AT`
Packit	6c4009	`beq AT, $x_big`
Packit	6c4009
Packit	6c4009	`/* If we get here, we're expecting exact results from the division.`
Packit	6c4009	`Do nothing else besides convert and clean up. */`
Packit	6c4009	`cvttq/c $f0, $f0`
Packit	6c4009	`excb`
Packit	6c4009	`mt_fpcr $f3`
Packit	6c4009	`_FTOIT $f0, RV, 16`
Packit	6c4009
Packit	6c4009	`ldt $f0, 0(sp)`
Packit	6c4009	`ldt $f3, 48(sp)`
Packit	6c4009	`cfi_restore ($f1)`
Packit	6c4009	`cfi_remember_state`
Packit	6c4009	`cfi_restore ($f0)`
Packit	6c4009	`cfi_restore ($f3)`
Packit	6c4009	`cfi_def_cfa_offset (0)`
Packit	6c4009	`lda sp, FRAME(sp)`
Packit	6c4009	`ret $31, (RA), 1`
Packit	6c4009
Packit	6c4009	`.align 4`
Packit	6c4009	`cfi_restore_state`
Packit	6c4009	`$x_big:`
Packit	6c4009	`/* If we get here, X is large enough that we don't expect exact`
Packit	6c4009	`results, and neither X nor Y got mis-translated for the fp`
Packit	6c4009	`division. Our task is to take the fp result, figure out how`
Packit	6c4009	`far it's off from the correct result and compute a fixup. */`
Packit	6c4009	`stq t0, 16(sp)`
Packit	6c4009	`stq t1, 24(sp)`
Packit	6c4009	`stq t2, 32(sp)`
Packit	6c4009	`stq t5, 40(sp)`
Packit	6c4009	`cfi_rel_offset (t0, 16)`
Packit	6c4009	`cfi_rel_offset (t1, 24)`
Packit	6c4009	`cfi_rel_offset (t2, 32)`
Packit	6c4009	`cfi_rel_offset (t5, 40)`
Packit	6c4009
Packit	6c4009	`#define Q RV /* quotient */`
Packit	6c4009	`#define R t0 /* remainder */`
Packit	6c4009	`#define SY t1 /* scaled Y */`
Packit	6c4009	`#define S t2 /* scalar */`
Packit	6c4009	`#define QY t3 /* QY /`
Packit	6c4009
Packit	6c4009	`/* The fixup code below can only handle unsigned values. */`
Packit	6c4009	`or X, Y, AT`
Packit	6c4009	`mov $31, t5`
Packit	6c4009	`blt AT, $fix_sign_in`
Packit	6c4009	`$fix_sign_in_ret1:`
Packit	6c4009	`cvttq/c $f0, $f0`
Packit	6c4009
Packit	6c4009	`_FTOIT $f0, Q, 8`
Packit	6c4009	`.align 3`
Packit	6c4009	`$fix_sign_in_ret2:`
Packit	6c4009	`ldt $f0, 0(sp)`
Packit	6c4009	`stq t3, 0(sp)`
Packit	6c4009	`cfi_restore ($f0)`
Packit	6c4009	`cfi_rel_offset (t3, 0)`
Packit	6c4009
Packit	6c4009	`mulq Q, Y, QY`
Packit	6c4009	`excb`
Packit	6c4009	`stq t4, 8(sp)`
Packit	6c4009	`mt_fpcr $f3`
Packit	6c4009	`cfi_rel_offset (t4, 8)`
Packit	6c4009
Packit	6c4009	`subq QY, X, R`
Packit	6c4009	`mov Y, SY`
Packit	6c4009	`mov 1, S`
Packit	6c4009	`bgt R, $q_high`
Packit	6c4009
Packit	6c4009	`$q_high_ret:`
Packit	6c4009	`subq X, QY, R`
Packit	6c4009	`mov Y, SY`
Packit	6c4009	`mov 1, S`
Packit	6c4009	`bgt R, $q_low`
Packit	6c4009
Packit	6c4009	`$q_low_ret:`
Packit	6c4009	`ldq t0, 16(sp)`
Packit	6c4009	`ldq t1, 24(sp)`
Packit	6c4009	`ldq t2, 32(sp)`
Packit	6c4009	`bne t5, $fix_sign_out`
Packit	6c4009
Packit	6c4009	`$fix_sign_out_ret:`
Packit	6c4009	`ldq t3, 0(sp)`
Packit	6c4009	`ldq t4, 8(sp)`
Packit	6c4009	`ldq t5, 40(sp)`
Packit	6c4009	`ldt $f3, 48(sp)`
Packit	6c4009	`lda sp, FRAME(sp)`
Packit	6c4009	`cfi_remember_state`
Packit	6c4009	`cfi_restore (t0)`
Packit	6c4009	`cfi_restore (t1)`
Packit	6c4009	`cfi_restore (t2)`
Packit	6c4009	`cfi_restore (t3)`
Packit	6c4009	`cfi_restore (t4)`
Packit	6c4009	`cfi_restore (t5)`
Packit	6c4009	`cfi_restore ($f3)`
Packit	6c4009	`cfi_def_cfa_offset (0)`
Packit	6c4009	`ret $31, (RA), 1`
Packit	6c4009
Packit	6c4009	`.align 4`
Packit	6c4009	`cfi_restore_state`
Packit	6c4009	`/* The quotient that we computed was too large. We need to reduce`
Packit	6c4009	`it by S such that Y*S >= R. Obviously the closer we get to the`
Packit	6c4009	`correct value the better, but overshooting high is ok, as we'll`
Packit	6c4009	`fix that up later. */`
Packit	6c4009	`0:`
Packit	6c4009	`addq SY, SY, SY`
Packit	6c4009	`addq S, S, S`
Packit	6c4009	`$q_high:`
Packit	6c4009	`cmpult SY, R, AT`
Packit	6c4009	`bne AT, 0b`
Packit	6c4009
Packit	6c4009	`subq Q, S, Q`
Packit	6c4009	`unop`
Packit	6c4009	`subq QY, SY, QY`
Packit	6c4009	`br $q_high_ret`
Packit	6c4009
Packit	6c4009	`.align 4`
Packit	6c4009	`/* The quotient that we computed was too small. Divide Y by the`
Packit	6c4009	`current remainder (R) and add that to the existing quotient (Q).`
Packit	6c4009	`The expectation, of course, is that R is much smaller than X. */`
Packit	6c4009	`/* Begin with a shift-up loop. Compute S such that Y*S >= R. We`
Packit	6c4009	`already have a copy of Y in SY and the value 1 in S. */`
Packit	6c4009	`0:`
Packit	6c4009	`addq SY, SY, SY`
Packit	6c4009	`addq S, S, S`
Packit	6c4009	`$q_low:`
Packit	6c4009	`cmpult SY, R, AT`
Packit	6c4009	`bne AT, 0b`
Packit	6c4009
Packit	6c4009	`/* Shift-down and subtract loop. Each iteration compares our scaled`
Packit	6c4009	`Y (SY) with the remainder (R); if SY <= R then X is divisible by`
Packit	6c4009	`Y's scalar (S) so add it to the quotient (Q). */`
Packit	6c4009	`2: addq Q, S, t3`
Packit	6c4009	`srl S, 1, S`
Packit	6c4009	`cmpule SY, R, AT`
Packit	6c4009	`subq R, SY, t4`
Packit	6c4009
Packit	6c4009	`cmovne AT, t3, Q`
Packit	6c4009	`cmovne AT, t4, R`
Packit	6c4009	`srl SY, 1, SY`
Packit	6c4009	`bne S, 2b`
Packit	6c4009
Packit	6c4009	`br $q_low_ret`
Packit	6c4009
Packit	6c4009	`.align 4`
Packit	6c4009	`$fix_sign_in:`
Packit	6c4009	`/* If we got here, then X\|Y is negative. Need to adjust everything`
Packit	6c4009	`such that we're doing unsigned division in the fixup loop. */`
Packit	6c4009	`/* T5 records the changes we had to make:`
Packit	6c4009	`bit 0: set if result should be negative.`
Packit	6c4009	`bit 2: set if X was negated.`
Packit	6c4009	`bit 3: set if Y was negated.`
Packit	6c4009	`*/`
Packit	6c4009	`xor X, Y, AT`
Packit	6c4009	`cmplt AT, 0, t5`
Packit	6c4009	`cmplt X, 0, AT`
Packit	6c4009	`negq X, t0`
Packit	6c4009
Packit	6c4009	`s4addq AT, t5, t5`
Packit	6c4009	`cmovne AT, t0, X`
Packit	6c4009	`cmplt Y, 0, AT`
Packit	6c4009	`negq Y, t0`
Packit	6c4009
Packit	6c4009	`s8addq AT, t5, t5`
Packit	6c4009	`cmovne AT, t0, Y`
Packit	6c4009	`unop`
Packit	6c4009	`blbc t5, $fix_sign_in_ret1`
Packit	6c4009
Packit	6c4009	`cvttq/c $f0, $f0`
Packit	6c4009	`_FTOIT $f0, Q, 8`
Packit	6c4009	`.align 3`
Packit	6c4009	`negq Q, Q`
Packit	6c4009	`br $fix_sign_in_ret2`
Packit	6c4009
Packit	6c4009	`.align 4`
Packit	6c4009	`$fix_sign_out:`
Packit	6c4009	`/* Now we get to undo what we did above. */`
Packit	6c4009	`/* ??? Is this really faster than just increasing the size of`
Packit	6c4009	`the stack frame and storing X and Y in memory? */`
Packit	6c4009	`and t5, 8, AT`
Packit	6c4009	`negq Y, t4`
Packit	6c4009	`cmovne AT, t4, Y`
Packit	6c4009
Packit	6c4009	`and t5, 4, AT`
Packit	6c4009	`negq X, t4`
Packit	6c4009	`cmovne AT, t4, X`
Packit	6c4009
Packit	6c4009	`negq RV, t4`
Packit	6c4009	`cmovlbs t5, t4, RV`
Packit	6c4009
Packit	6c4009	`br $fix_sign_out_ret`
Packit	6c4009
Packit	6c4009	`cfi_endproc`
Packit	6c4009	`.size __divq, .-__divq`
Packit	6c4009
Packit	6c4009	`DO_DIVBYZERO`

source-git / glibc

Source Code

Blame sysdeps/alpha/divq.S