Tree - source-git/glibc - CentOS Git server

source-git / glibc

Blame sysdeps/alpha/divqu.S

Blob History Raw

Packit	6c4009	`/* Copyright (C) 2004-2018 Free Software Foundation, Inc.`
Packit	6c4009	`This file is part of the GNU C Library.`
Packit	6c4009
Packit	6c4009	`The GNU C Library is free software; you can redistribute it and/or`
Packit	6c4009	`modify it under the terms of the GNU Lesser General Public`
Packit	6c4009	`License as published by the Free Software Foundation; either`
Packit	6c4009	`version 2.1 of the License, or (at your option) any later version.`
Packit	6c4009
Packit	6c4009	`The GNU C Library is distributed in the hope that it will be useful,`
Packit	6c4009	`but WITHOUT ANY WARRANTY; without even the implied warranty of`
Packit	6c4009	`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
Packit	6c4009	`Lesser General Public License for more details.`
Packit	6c4009
Packit	6c4009	`You should have received a copy of the GNU Lesser General Public`
Packit	6c4009	`License along with the GNU C Library. If not, see`
Packit	6c4009	`<http://www.gnu.org/licenses/>. */`
Packit	6c4009
Packit	6c4009	`#include "div_libc.h"`
Packit	6c4009
Packit	6c4009
Packit	6c4009	`/* 64-bit unsigned long divide. These are not normal C functions. Argument`
Packit	6c4009	`registers are t10 and t11, the result goes in t12. Only t12 and AT may be`
Packit	6c4009	`clobbered.`
Packit	6c4009
Packit	6c4009	`Theory of operation here is that we can use the FPU divider for virtually`
Packit	6c4009	`all operands that we see: all dividend values between -253 and 253-1`
Packit	6c4009	`can be computed directly. Note that divisor values need not be checked`
Packit	6c4009	`against that range because the rounded fp value will be close enough such`
Packit	6c4009	`that the quotient is < 1, which will properly be truncated to zero when we`
Packit	6c4009	`convert back to integer.`
Packit	6c4009
Packit	6c4009	`When the dividend is outside the range for which we can compute exact`
Packit	6c4009	`results, we use the fp quotent as an estimate from which we begin refining`
Packit	6c4009	`an exact integral value. This reduces the number of iterations in the`
Packit	6c4009	`shift-and-subtract loop significantly.`
Packit	6c4009
Packit	6c4009	`The FPCR save/restore is due to the fact that the EV6 _will_ set FPCR_INE`
Packit	6c4009	`for cvttq/c even without /sui being set. It will not, however, properly`
Packit	6c4009	`raise the exception, so we don't have to worry about FPCR_INED being clear`
Packit	6c4009	`and so dying by SIGFPE. */`
Packit	6c4009
Packit	6c4009	`.text`
Packit	6c4009	`.align 4`
Packit	6c4009	`.globl __divqu`
Packit	6c4009	`.type __divqu, @funcnoplt`
Packit	6c4009	`.usepv __divqu, no`
Packit	6c4009
Packit	6c4009	`cfi_startproc`
Packit	6c4009	`cfi_return_column (RA)`
Packit	6c4009	`__divqu:`
Packit	6c4009	`lda sp, -FRAME(sp)`
Packit	6c4009	`cfi_def_cfa_offset (FRAME)`
Packit	6c4009	`CALL_MCOUNT`
Packit	6c4009
Packit	6c4009	`/* Get the fp divide insn issued as quickly as possible. After`
Packit	6c4009	`that's done, we have at least 22 cycles until its results are`
Packit	6c4009	`ready -- all the time in the world to figure out how we're`
Packit	6c4009	`going to use the results. */`
Packit	6c4009	`stt $f0, 0(sp)`
Packit	6c4009	`excb`
Packit	6c4009	`beq Y, DIVBYZERO`
Packit	6c4009
Packit	6c4009	`stt $f1, 8(sp)`
Packit	6c4009	`stt $f3, 48(sp)`
Packit	6c4009	`cfi_rel_offset ($f0, 0)`
Packit	6c4009	`cfi_rel_offset ($f1, 8)`
Packit	6c4009	`cfi_rel_offset ($f3, 48)`
Packit	6c4009	`mf_fpcr $f3`
Packit	6c4009
Packit	6c4009	`_ITOFT2 X, $f0, 16, Y, $f1, 24`
Packit	6c4009	`cvtqt $f0, $f0`
Packit	6c4009	`cvtqt $f1, $f1`
Packit	6c4009	`blt X, $x_is_neg`
Packit	6c4009	`divt/c $f0, $f1, $f0`
Packit	6c4009
Packit	6c4009	`/* Check to see if Y was mis-converted as signed value. */`
Packit	6c4009	`ldt $f1, 8(sp)`
Packit	6c4009	`blt Y, $y_is_neg`
Packit	6c4009
Packit	6c4009	`/* Check to see if X fit in the double as an exact value. */`
Packit	6c4009	`srl X, 53, AT`
Packit	6c4009	`bne AT, $x_big`
Packit	6c4009
Packit	6c4009	`/* If we get here, we're expecting exact results from the division.`
Packit	6c4009	`Do nothing else besides convert and clean up. */`
Packit	6c4009	`cvttq/c $f0, $f0`
Packit	6c4009	`excb`
Packit	6c4009	`mt_fpcr $f3`
Packit	6c4009	`_FTOIT $f0, RV, 16`
Packit	6c4009
Packit	6c4009	`ldt $f0, 0(sp)`
Packit	6c4009	`ldt $f3, 48(sp)`
Packit	6c4009	`cfi_remember_state`
Packit	6c4009	`cfi_restore ($f0)`
Packit	6c4009	`cfi_restore ($f1)`
Packit	6c4009	`cfi_restore ($f3)`
Packit	6c4009	`cfi_def_cfa_offset (0)`
Packit	6c4009	`lda sp, FRAME(sp)`
Packit	6c4009	`ret $31, (RA), 1`
Packit	6c4009
Packit	6c4009	`.align 4`
Packit	6c4009	`cfi_restore_state`
Packit	6c4009	`$x_is_neg:`
Packit	6c4009	`/* If we get here, X is so big that bit 63 is set, which made the`
Packit	6c4009	`conversion come out negative. Fix it up lest we not even get`
Packit	6c4009	`a good estimate. */`
Packit	6c4009	`ldah AT, 0x5f80 /* 2*64 as float. /`
Packit	6c4009	`stt $f2, 24(sp)`
Packit	6c4009	`cfi_rel_offset ($f2, 24)`
Packit	6c4009	`_ITOFS AT, $f2, 16`
Packit	6c4009
Packit	6c4009	`.align 4`
Packit	6c4009	`addt $f0, $f2, $f0`
Packit	6c4009	`unop`
Packit	6c4009	`divt/c $f0, $f1, $f0`
Packit	6c4009	`unop`
Packit	6c4009
Packit	6c4009	`/* Ok, we've now the divide issued. Continue with other checks. */`
Packit	6c4009	`ldt $f1, 8(sp)`
Packit	6c4009	`unop`
Packit	6c4009	`ldt $f2, 24(sp)`
Packit	6c4009	`blt Y, $y_is_neg`
Packit	6c4009	`cfi_restore ($f1)`
Packit	6c4009	`cfi_restore ($f2)`
Packit	6c4009	`cfi_remember_state /* for y_is_neg */`
Packit	6c4009
Packit	6c4009	`.align 4`
Packit	6c4009	`$x_big:`
Packit	6c4009	`/* If we get here, X is large enough that we don't expect exact`
Packit	6c4009	`results, and neither X nor Y got mis-translated for the fp`
Packit	6c4009	`division. Our task is to take the fp result, figure out how`
Packit	6c4009	`far it's off from the correct result and compute a fixup. */`
Packit	6c4009	`stq t0, 16(sp)`
Packit	6c4009	`stq t1, 24(sp)`
Packit	6c4009	`stq t2, 32(sp)`
Packit	6c4009	`stq t3, 40(sp)`
Packit	6c4009	`cfi_rel_offset (t0, 16)`
Packit	6c4009	`cfi_rel_offset (t1, 24)`
Packit	6c4009	`cfi_rel_offset (t2, 32)`
Packit	6c4009	`cfi_rel_offset (t3, 40)`
Packit	6c4009
Packit	6c4009	`#define Q RV /* quotient */`
Packit	6c4009	`#define R t0 /* remainder */`
Packit	6c4009	`#define SY t1 /* scaled Y */`
Packit	6c4009	`#define S t2 /* scalar */`
Packit	6c4009	`#define QY t3 /* QY /`
Packit	6c4009
Packit	6c4009	`cvttq/c $f0, $f0`
Packit	6c4009	`_FTOIT $f0, Q, 8`
Packit	6c4009	`mulq Q, Y, QY`
Packit	6c4009
Packit	6c4009	`.align 4`
Packit	6c4009	`stq t4, 8(sp)`
Packit	6c4009	`excb`
Packit	6c4009	`ldt $f0, 0(sp)`
Packit	6c4009	`mt_fpcr $f3`
Packit	6c4009	`cfi_rel_offset (t4, 8)`
Packit	6c4009	`cfi_restore ($f0)`
Packit	6c4009
Packit	6c4009	`subq QY, X, R`
Packit	6c4009	`mov Y, SY`
Packit	6c4009	`mov 1, S`
Packit	6c4009	`bgt R, $q_high`
Packit	6c4009
Packit	6c4009	`$q_high_ret:`
Packit	6c4009	`subq X, QY, R`
Packit	6c4009	`mov Y, SY`
Packit	6c4009	`mov 1, S`
Packit	6c4009	`bgt R, $q_low`
Packit	6c4009
Packit	6c4009	`$q_low_ret:`
Packit	6c4009	`ldq t4, 8(sp)`
Packit	6c4009	`ldq t0, 16(sp)`
Packit	6c4009	`ldq t1, 24(sp)`
Packit	6c4009	`ldq t2, 32(sp)`
Packit	6c4009
Packit	6c4009	`ldq t3, 40(sp)`
Packit	6c4009	`ldt $f3, 48(sp)`
Packit	6c4009	`lda sp, FRAME(sp)`
Packit	6c4009	`cfi_remember_state`
Packit	6c4009	`cfi_restore (t0)`
Packit	6c4009	`cfi_restore (t1)`
Packit	6c4009	`cfi_restore (t2)`
Packit	6c4009	`cfi_restore (t3)`
Packit	6c4009	`cfi_restore (t4)`
Packit	6c4009	`cfi_restore ($f3)`
Packit	6c4009	`cfi_def_cfa_offset (0)`
Packit	6c4009	`ret $31, (RA), 1`
Packit	6c4009
Packit	6c4009	`.align 4`
Packit	6c4009	`cfi_restore_state`
Packit	6c4009	`/* The quotient that we computed was too large. We need to reduce`
Packit	6c4009	`it by S such that Y*S >= R. Obviously the closer we get to the`
Packit	6c4009	`correct value the better, but overshooting high is ok, as we'll`
Packit	6c4009	`fix that up later. */`
Packit	6c4009	`0:`
Packit	6c4009	`addq SY, SY, SY`
Packit	6c4009	`addq S, S, S`
Packit	6c4009	`$q_high:`
Packit	6c4009	`cmpult SY, R, AT`
Packit	6c4009	`bne AT, 0b`
Packit	6c4009
Packit	6c4009	`subq Q, S, Q`
Packit	6c4009	`unop`
Packit	6c4009	`subq QY, SY, QY`
Packit	6c4009	`br $q_high_ret`
Packit	6c4009
Packit	6c4009	`.align 4`
Packit	6c4009	`/* The quotient that we computed was too small. Divide Y by the`
Packit	6c4009	`current remainder (R) and add that to the existing quotient (Q).`
Packit	6c4009	`The expectation, of course, is that R is much smaller than X. */`
Packit	6c4009	`/* Begin with a shift-up loop. Compute S such that Y*S >= R. We`
Packit	6c4009	`already have a copy of Y in SY and the value 1 in S. */`
Packit	6c4009	`0:`
Packit	6c4009	`addq SY, SY, SY`
Packit	6c4009	`addq S, S, S`
Packit	6c4009	`$q_low:`
Packit	6c4009	`cmpult SY, R, AT`
Packit	6c4009	`bne AT, 0b`
Packit	6c4009
Packit	6c4009	`/* Shift-down and subtract loop. Each iteration compares our scaled`
Packit	6c4009	`Y (SY) with the remainder (R); if SY <= R then X is divisible by`
Packit	6c4009	`Y's scalar (S) so add it to the quotient (Q). */`
Packit	6c4009	`2: addq Q, S, t3`
Packit	6c4009	`srl S, 1, S`
Packit	6c4009	`cmpule SY, R, AT`
Packit	6c4009	`subq R, SY, t4`
Packit	6c4009
Packit	6c4009	`cmovne AT, t3, Q`
Packit	6c4009	`cmovne AT, t4, R`
Packit	6c4009	`srl SY, 1, SY`
Packit	6c4009	`bne S, 2b`
Packit	6c4009
Packit	6c4009	`br $q_low_ret`
Packit	6c4009
Packit	6c4009	`.align 4`
Packit	6c4009	`cfi_restore_state`
Packit	6c4009	`$y_is_neg:`
Packit	6c4009	`/* If we get here, Y is so big that bit 63 is set. The results`
Packit	6c4009	`from the divide will be completely wrong. Fortunately, the`
Packit	6c4009	`quotient must be either 0 or 1, so just compute it directly. */`
Packit	6c4009	`cmpule Y, X, RV`
Packit	6c4009	`excb`
Packit	6c4009	`mt_fpcr $f3`
Packit	6c4009	`ldt $f0, 0(sp)`
Packit	6c4009	`ldt $f3, 48(sp)`
Packit	6c4009	`lda sp, FRAME(sp)`
Packit	6c4009	`cfi_restore ($f0)`
Packit	6c4009	`cfi_restore ($f3)`
Packit	6c4009	`cfi_def_cfa_offset (0)`
Packit	6c4009	`ret $31, (RA), 1`
Packit	6c4009
Packit	6c4009	`cfi_endproc`
Packit	6c4009	`.size __divqu, .-__divqu`
Packit	6c4009
Packit	6c4009	`DO_DIVBYZERO`

source-git / glibc

Source Code

Blame sysdeps/alpha/divqu.S