Blame sysdeps/ia64/fpu/s_atanl.S

Packit 6c4009
.file "atanl.s"
Packit 6c4009
Packit 6c4009
Packit 6c4009
// Copyright (c) 2000 - 2005, Intel Corporation
Packit 6c4009
// All rights reserved.
Packit 6c4009
//
Packit 6c4009
// Contributed 2000 by the Intel Numerics Group, Intel Corporation
Packit 6c4009
//
Packit 6c4009
// Redistribution and use in source and binary forms, with or without
Packit 6c4009
// modification, are permitted provided that the following conditions are
Packit 6c4009
// met:
Packit 6c4009
//
Packit 6c4009
// * Redistributions of source code must retain the above copyright
Packit 6c4009
// notice, this list of conditions and the following disclaimer.
Packit 6c4009
//
Packit 6c4009
// * Redistributions in binary form must reproduce the above copyright
Packit 6c4009
// notice, this list of conditions and the following disclaimer in the
Packit 6c4009
// documentation and/or other materials provided with the distribution.
Packit 6c4009
//
Packit 6c4009
// * The name of Intel Corporation may not be used to endorse or promote
Packit 6c4009
// products derived from this software without specific prior written
Packit 6c4009
// permission.
Packit 6c4009
Packit 6c4009
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
Packit 6c4009
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
Packit 6c4009
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
Packit 6c4009
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
Packit 6c4009
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
Packit 6c4009
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
Packit 6c4009
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
Packit 6c4009
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
Packit 6c4009
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
Packit 6c4009
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
Packit 6c4009
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Packit 6c4009
//
Packit 6c4009
// Intel Corporation is the author of this code, and requests that all
Packit 6c4009
// problem reports or change requests be submitted to it directly at
Packit 6c4009
// http://www.intel.com/software/products/opensource/libraries/num.htm.
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
//*********************************************************************
Packit 6c4009
//
Packit 6c4009
// History
Packit 6c4009
// 02/02/00 (hand-optimized)
Packit 6c4009
// 04/04/00 Unwind support added
Packit 6c4009
// 08/15/00 Bundle added after call to __libm_error_support to properly
Packit 6c4009
//          set [the previously overwritten] GR_Parameter_RESULT.
Packit 6c4009
// 03/13/01 Fixed flags when denormal raised on intermediate result
Packit 6c4009
// 01/08/02 Improved speed.
Packit 6c4009
// 02/06/02 Corrected .section statement
Packit 6c4009
// 05/20/02 Cleaned up namespace and sf0 syntax
Packit 6c4009
// 02/10/03 Reordered header: .section, .global, .proc, .align;
Packit 6c4009
//          used data8 for long double table values
Packit 6c4009
// 03/31/05 Reformatted delimiters between data tables
Packit 6c4009
//
Packit 6c4009
//*********************************************************************
Packit 6c4009
//
Packit 6c4009
// Function:   atanl(x) = inverse tangent(x), for double extended x values
Packit 6c4009
// Function:   atan2l(y,x) = atan(y/x), for double extended y, x values
Packit 6c4009
//
Packit 6c4009
// API
Packit 6c4009
//
Packit 6c4009
//  long double atanl  (long double x)
Packit 6c4009
//  long double atan2l (long double y, long double x)
Packit 6c4009
//
Packit 6c4009
//*********************************************************************
Packit 6c4009
//
Packit 6c4009
// Resources Used:
Packit 6c4009
//
Packit 6c4009
//    Floating-Point Registers: f8 (Input and Return Value)
Packit 6c4009
//                              f9 (Input for atan2l)
Packit 6c4009
//                              f10-f15, f32-f83
Packit 6c4009
//
Packit 6c4009
//    General Purpose Registers:
Packit 6c4009
//      r32-r51
Packit 6c4009
//      r49-r52 (Arguments to error support for 0,0 case)
Packit 6c4009
//
Packit 6c4009
//    Predicate Registers:      p6-p15
Packit 6c4009
//
Packit 6c4009
//*********************************************************************
Packit 6c4009
//
Packit 6c4009
// IEEE Special Conditions:
Packit 6c4009
//
Packit 6c4009
//    Denormal fault raised on denormal inputs
Packit 6c4009
//    Underflow exceptions may occur
Packit 6c4009
//    Special error handling for the y=0 and x=0 case
Packit 6c4009
//    Inexact raised when appropriate by algorithm
Packit 6c4009
//
Packit 6c4009
//    atanl(SNaN) = QNaN
Packit 6c4009
//    atanl(QNaN) = QNaN
Packit 6c4009
//    atanl(+/-0) = +/- 0
Packit 6c4009
//    atanl(+/-Inf) = +/-pi/2
Packit 6c4009
//
Packit 6c4009
//    atan2l(Any NaN for x or y) = QNaN
Packit 6c4009
//    atan2l(+/-0,x) = +/-0 for x > 0
Packit 6c4009
//    atan2l(+/-0,x) = +/-pi for x < 0
Packit 6c4009
//    atan2l(+/-0,+0) = +/-0
Packit 6c4009
//    atan2l(+/-0,-0) = +/-pi
Packit 6c4009
//    atan2l(y,+/-0) = pi/2 y > 0
Packit 6c4009
//    atan2l(y,+/-0) = -pi/2 y < 0
Packit 6c4009
//    atan2l(+/-y, Inf) = +/-0 for finite y > 0
Packit 6c4009
//    atan2l(+/-Inf, x) = +/-pi/2 for finite x
Packit 6c4009
//    atan2l(+/-y, -Inf) = +/-pi for finite  y > 0
Packit 6c4009
//    atan2l(+/-Inf, Inf) = +/-pi/4
Packit 6c4009
//    atan2l(+/-Inf, -Inf) = +/-3pi/4
Packit 6c4009
//
Packit 6c4009
//*********************************************************************
Packit 6c4009
//
Packit 6c4009
// Mathematical Description
Packit 6c4009
// ---------------------------
Packit 6c4009
//
Packit 6c4009
// The function ATANL( Arg_Y, Arg_X ) returns the "argument"
Packit 6c4009
// or the "phase" of the complex number
Packit 6c4009
//
Packit 6c4009
//           Arg_X + i Arg_Y
Packit 6c4009
//
Packit 6c4009
// or equivalently, the angle in radians from the positive
Packit 6c4009
// x-axis to the line joining the origin and the point
Packit 6c4009
// (Arg_X,Arg_Y)
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
//        (Arg_X, Arg_Y) x
Packit 6c4009
//                        \
Packit 6c4009
//                \
Packit 6c4009
//                 \
Packit 6c4009
//                  \
Packit 6c4009
//                   \ angle between is ATANL(Arg_Y,Arg_X)
Packit 6c4009
Packit 6c4009
Packit 6c4009
Packit 6c4009
Packit 6c4009
//                    \
Packit 6c4009
//                     ------------------> X-axis
Packit 6c4009
Packit 6c4009
//                   Origin
Packit 6c4009
//
Packit 6c4009
// Moreover, this angle is reported in the range [-pi,pi] thus
Packit 6c4009
//
Packit 6c4009
//      -pi <= ATANL( Arg_Y, Arg_X ) <= pi.
Packit 6c4009
//
Packit 6c4009
// From the geometry, it is easy to define ATANL when one of
Packit 6c4009
// Arg_X or Arg_Y is +-0 or +-inf:
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
//      \ Y |
Packit 6c4009
//     X \  |  +0  | -0  |  +inf |  -inf  |  finite non-zero
Packit 6c4009
//        \ |      |     |       |        |
Packit 6c4009
//    ______________________________________________________
Packit 6c4009
//          |            |       |        |
Packit 6c4009
//     +-0  |   Invalid/ |  pi/2 | -pi/2  |  sign(Y)*pi/2
Packit 6c4009
//          |    qNaN    |       |        |
Packit 6c4009
//  --------------------------------------------------------
Packit 6c4009
//          |      |     |       |        |
Packit 6c4009
//     +inf |  +0  | -0  |  pi/4 | -pi/4  |  sign(Y)*0
Packit 6c4009
//  --------------------------------------------------------
Packit 6c4009
//          |      |     |       |        |
Packit 6c4009
//     -inf |  +pi | -pi | 3pi/4 | -3pi/4 |  sign(Y)*pi
Packit 6c4009
//  --------------------------------------------------------
Packit 6c4009
//   finite |    X>0?    |  pi/2 | -pi/2  |  normal case
Packit 6c4009
//  non-zero| sign(Y)*0: |       |        |
Packit 6c4009
//       | sign(Y)*pi |       |        |
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
// One must take note that ATANL is NOT the arctangent of the
Packit 6c4009
// value Arg_Y/Arg_X; but rather ATANL and arctan are related
Packit 6c4009
// in a slightly more complicated way as follows:
Packit 6c4009
//
Packit 6c4009
// Let U := max(|Arg_X|, |Arg_Y|);  V := min(|Arg_X|, |Arg_Y|);
Packit 6c4009
// sign_X be the sign bit of Arg_X, i.e., sign_X is 0 or 1;
Packit 6c4009
// s_X    be the sign     of Arg_X, i.e., s_X = (-1)^sign_X;
Packit 6c4009
//
Packit 6c4009
// sign_Y be the sign bit of Arg_Y, i.e., sign_Y is 0 or 1;
Packit 6c4009
// s_Y    be the sign     of Arg_Y, i.e., s_Y = (-1)^sign_Y;
Packit 6c4009
//
Packit 6c4009
// swap   be 0  if |Arg_X| >= |Arg_Y|  and 1 otherwise.
Packit 6c4009
//
Packit 6c4009
// Then, ATANL(Arg_Y, Arg_X) =
Packit 6c4009
//
Packit 6c4009
//       /    arctan(V/U)     \      sign_X = 0 & swap = 0
Packit 6c4009
//       | pi/2 - arctan(V/U) |      sign_X = 0 & swap = 1
Packit 6c4009
// s_Y * |                    |
Packit 6c4009
//       |  pi  - arctan(V/U) |      sign_X = 1 & swap = 0
Packit 6c4009
//       \ pi/2 + arctan(V/U) /      sign_X = 1 & swap = 1
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
// This relationship also suggest that the algorithm's major
Packit 6c4009
// task is to calculate arctan(V/U) for 0 < V <= U; and the
Packit 6c4009
// final Result is given by
Packit 6c4009
//
Packit 6c4009
//      s_Y * { (P_hi + P_lo) + sigma * arctan(V/U) }
Packit 6c4009
//
Packit 6c4009
// where
Packit 6c4009
//
Packit 6c4009
//   (P_hi,P_lo) represents M(sign_X,swap)*(pi/2) accurately
Packit 6c4009
//
Packit 6c4009
//   M(sign_X,swap) = 0  for sign_X = 0 and swap = 0
Packit 6c4009
//              1  for swap   = 1
Packit 6c4009
//              2  for sign_X = 1 and swap = 0
Packit 6c4009
//
Packit 6c4009
// and
Packit 6c4009
//
Packit 6c4009
//   sigma = { (sign_X  XOR  swap) :  -1.0 : 1.0 }
Packit 6c4009
//
Packit 6c4009
//      =  (-1) ^ ( sign_X XOR swap )
Packit 6c4009
//
Packit 6c4009
// Both (P_hi,P_lo) and sigma can be stored in a table and fetched
Packit 6c4009
// using (sign_X,swap) as an index. (P_hi, P_lo) can be stored as a
Packit 6c4009
// double-precision, and single-precision pair; and sigma can
Packit 6c4009
// obviously be just a single-precision number.
Packit 6c4009
//
Packit 6c4009
// In the algorithm we propose, arctan(V/U) is calculated to high accuracy
Packit 6c4009
// as A_hi + A_lo. Consequently, the Result ATANL( Arg_Y, Arg_X ) is
Packit 6c4009
// given by
Packit 6c4009
//
Packit 6c4009
//    s_Y*P_hi + s_Y*sigma*A_hi + s_Y*(sigma*A_lo + P_lo)
Packit 6c4009
//
Packit 6c4009
// We now discuss the calculation of arctan(V/U) for 0 < V <= U.
Packit 6c4009
//
Packit 6c4009
// For (V/U) < 2^(-3), we use a simple polynomial of the form
Packit 6c4009
//
Packit 6c4009
//      z + z^3*(P_1 + z^2*(P_2 + z^2*(P_3 + ... + P_8)))
Packit 6c4009
//
Packit 6c4009
// where z = V/U.
Packit 6c4009
//
Packit 6c4009
// For the sake of accuracy, the first term "z" must approximate V/U to
Packit 6c4009
// extra precision. For z^3 and higher power, a working precision
Packit 6c4009
// approximation to V/U suffices. Thus, we obtain:
Packit 6c4009
//
Packit 6c4009
//      z_hi + z_lo = V/U  to extra precision and
Packit 6c4009
//      z           = V/U  to working precision
Packit 6c4009
//
Packit 6c4009
// The value arctan(V/U) is delivered as two pieces (A_hi, A_lo)
Packit 6c4009
//
Packit 6c4009
//      (A_hi,A_lo) = (z_hi, z^3*(P_1 + ... + P_8) + z_lo).
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
// For 2^(-3) <= (V/U) <= 1, we use a table-driven approach.
Packit 6c4009
// Consider
Packit 6c4009
//
Packit 6c4009
//      (V/U) = 2^k * 1.b_1 b_2 .... b_63 b_64 b_65 ....
Packit 6c4009
//
Packit 6c4009
// Define
Packit 6c4009
//
Packit 6c4009
//       z_hi = 2^k * 1.b_1 b_2 b_3 b_4 1
Packit 6c4009
//
Packit 6c4009
// then
Packit 6c4009
//                                            /                \
Packit 6c4009
//                                            |  (V/U) - z_hi  |
Packit 6c4009
Packit 6c4009
//      arctan(V/U) = arctan(z_hi) + acrtan| -------------- |
Packit 6c4009
//                                            | 1 + (V/U)*z_hi |
Packit 6c4009
//                                            \                /
Packit 6c4009
//
Packit 6c4009
//                                            /                \
Packit 6c4009
//                                            |   V - z_hi*U   |
Packit 6c4009
Packit 6c4009
//                  = arctan(z_hi) + acrtan| -------------- |
Packit 6c4009
//                                            |   U + V*z_hi   |
Packit 6c4009
//                                            \                /
Packit 6c4009
//
Packit 6c4009
//                  = arctan(z_hi) + acrtan( V' / U' )
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
// where
Packit 6c4009
//
Packit 6c4009
//      V' = V - U*z_hi;   U' = U + V*z_hi.
Packit 6c4009
//
Packit 6c4009
// Let
Packit 6c4009
//
Packit 6c4009
//      w_hi + w_lo  = V'/U' to extra precision and
Packit 6c4009
//           w       = V'/U' to working precision
Packit 6c4009
//
Packit 6c4009
// then we can approximate arctan(V'/U') by
Packit 6c4009
//
Packit 6c4009
//      arctan(V'/U') = w_hi + w_lo
Packit 6c4009
//                     + w^3*(Q_1 + w^2*(Q_2 + w^2*(Q_3 + w^2*Q_4)))
Packit 6c4009
//
Packit 6c4009
//                       = w_hi + w_lo + poly
Packit 6c4009
//
Packit 6c4009
// Finally, arctan(z_hi) is calculated beforehand and stored in a table
Packit 6c4009
// as Tbl_hi, Tbl_lo. Thus,
Packit 6c4009
//
Packit 6c4009
//      (A_hi, A_lo) = (Tbl_hi, w_hi+(poly+(w_lo+Tbl_lo)))
Packit 6c4009
//
Packit 6c4009
// This completes the mathematical description.
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
// Algorithm
Packit 6c4009
// -------------
Packit 6c4009
//
Packit 6c4009
// Step 0. Check for unsupported format.
Packit 6c4009
//
Packit 6c4009
//    If
Packit 6c4009
//       ( expo(Arg_X) not zero AND msb(Arg_X) = 0 ) OR
Packit 6c4009
//       ( expo(Arg_Y) not zero AND msb(Arg_Y) = 0 )
Packit 6c4009
//
Packit 6c4009
//    then one of the arguments is unsupported. Generate an
Packit 6c4009
//    invalid and return qNaN.
Packit 6c4009
//
Packit 6c4009
// Step 1. Initialize
Packit 6c4009
//
Packit 6c4009
//    Normalize Arg_X and Arg_Y and set the following
Packit 6c4009
//
Packit 6c4009
//    sign_X :=  sign_bit(Arg_X)
Packit 6c4009
//    s_Y    := (sign_bit(Arg_Y)==0? 1.0 : -1.0)
Packit 6c4009
//    swap   := (|Arg_X| >= |Arg_Y|?   0 :  1  )
Packit 6c4009
//    U      := max( |Arg_X|, |Arg_Y| )
Packit 6c4009
//    V      := min( |Arg_X|, |Arg_Y| )
Packit 6c4009
//
Packit 6c4009
//    execute: frcpa E, pred, V, U
Packit 6c4009
//    If pred is 0, go to Step 5 for special cases handling.
Packit 6c4009
//
Packit 6c4009
// Step 2. Decide on branch.
Packit 6c4009
//
Packit 6c4009
//    Q := E * V
Packit 6c4009
//    If Q < 2^(-3) go to Step 4 for simple polynomial case.
Packit 6c4009
//
Packit 6c4009
// Step 3. Table-driven algorithm.
Packit 6c4009
//
Packit 6c4009
//    Q is represented as
Packit 6c4009
//
Packit 6c4009
//      2^(-k) * 1.b_1 b_2 b_3 ... b_63; k = 0,-1,-2,-3
Packit 6c4009
//
Packit 6c4009
// and that if k = 0, b_1 = b_2 = b_3 = b_4 = 0.
Packit 6c4009
//
Packit 6c4009
// Define
Packit 6c4009
//
Packit 6c4009
//      z_hi := 2^(-k) * 1.b_1 b_2 b_3 b_4 1
Packit 6c4009
//
Packit 6c4009
// (note that there are 49 possible values of z_hi).
Packit 6c4009
//
Packit 6c4009
//      ...We now calculate V' and U'. While V' is representable
Packit 6c4009
//      ...as a 64-bit number because of cancellation, U' is
Packit 6c4009
//      ...not in general a 64-bit number. Obtaining U' accurately
Packit 6c4009
//      ...requires two working precision numbers
Packit 6c4009
//
Packit 6c4009
//      U_prime_hi := U + V * z_hi            ...WP approx. to U'
Packit 6c4009
//      U_prime_lo := ( U - U_prime_hi ) + V*z_hi ...observe order
Packit 6c4009
//      V_prime    := V - U * z_hi             ...this is exact
Packit 6c4009
//
Packit 6c4009
//         C_hi := frcpa (1.0, U_prime_hi)  ...C_hi approx 1/U'_hi
Packit 6c4009
//
Packit 6c4009
//      loop 3 times
Packit 6c4009
//         C_hi := C_hi + C_hi*(1.0 - C_hi*U_prime_hi)
Packit 6c4009
//
Packit 6c4009
//      ...at this point C_hi is (1/U_prime_hi) to roughly 64 bits
Packit 6c4009
//
Packit 6c4009
//      w_hi := V_prime * C_hi     ...w_hi is V_prime/U_prime to
Packit 6c4009
//                     ...roughly working precision
Packit 6c4009
//
Packit 6c4009
//         ...note that we want w_hi + w_lo to approximate
Packit 6c4009
//      ...V_prime/(U_prime_hi + U_prime_lo) to extra precision
Packit 6c4009
//         ...but for now, w_hi is good enough for the polynomial
Packit 6c4009
//      ...calculation.
Packit 6c4009
//
Packit 6c4009
//         wsq  := w_hi*w_hi
Packit 6c4009
//      poly := w_hi*wsq*(Q_1 + wsq*(Q_2 + wsq*(Q_3 + wsq*Q_4)))
Packit 6c4009
//
Packit 6c4009
//      Fetch
Packit 6c4009
//      (Tbl_hi, Tbl_lo) = atan(z_hi) indexed by (k,b_1,b_2,b_3,b_4)
Packit 6c4009
//      ...Tbl_hi is a double-precision number
Packit 6c4009
//      ...Tbl_lo is a single-precision number
Packit 6c4009
//
Packit 6c4009
//         (P_hi, P_lo) := M(sign_X,swap)*(Pi_by_2_hi, Pi_by_2_lo)
Packit 6c4009
//      ...as discussed previous. Again; the implementation can
Packit 6c4009
//      ...chose to fetch P_hi and P_lo from a table indexed by
Packit 6c4009
//      ...(sign_X, swap).
Packit 6c4009
//      ...P_hi is a double-precision number;
Packit 6c4009
//      ...P_lo is a single-precision number.
Packit 6c4009
//
Packit 6c4009
//      ...calculate w_lo so that w_hi + w_lo is V'/U' accurately
Packit 6c4009
//         w_lo := ((V_prime - w_hi*U_prime_hi) -
Packit 6c4009
//              w_hi*U_prime_lo) * C_hi     ...observe order
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
//      ...Ready to deliver arctan(V'/U') as A_hi, A_lo
Packit 6c4009
//      A_hi := Tbl_hi
Packit 6c4009
//      A_lo := w_hi + (poly + (Tbl_lo + w_lo)) ...observe order
Packit 6c4009
//
Packit 6c4009
//      ...Deliver final Result
Packit 6c4009
//      ...s_Y*P_hi + s_Y*sigma*A_hi + s_Y*(sigma*A_lo + P_lo)
Packit 6c4009
//
Packit 6c4009
//      sigma := ( (sign_X XOR swap) ? -1.0 : 1.0 )
Packit 6c4009
//      ...sigma can be obtained by a table lookup using
Packit 6c4009
//      ...(sign_X,swap) as index and stored as single precision
Packit 6c4009
//         ...sigma should be calculated earlier
Packit 6c4009
//
Packit 6c4009
//      P_hi := s_Y*P_hi
Packit 6c4009
//      A_hi := s_Y*A_hi
Packit 6c4009
//
Packit 6c4009
//      Res_hi := P_hi + sigma*A_hi     ...this is exact because
Packit 6c4009
//                          ...both P_hi and Tbl_hi
Packit 6c4009
//                          ...are double-precision
Packit 6c4009
//                          ...and |Tbl_hi| > 2^(-4)
Packit 6c4009
//                          ...P_hi is either 0 or
Packit 6c4009
//                          ...between (1,4)
Packit 6c4009
//
Packit 6c4009
//      Res_lo := sigma*A_lo + P_lo
Packit 6c4009
//
Packit 6c4009
//      Return Res_hi + s_Y*Res_lo in user-defined rounding control
Packit 6c4009
//
Packit 6c4009
// Step 4. Simple polynomial case.
Packit 6c4009
//
Packit 6c4009
//    ...E and Q are inherited from Step 2.
Packit 6c4009
//
Packit 6c4009
//    A_hi := Q     ...Q is inherited from Step 2 Q approx V/U
Packit 6c4009
//
Packit 6c4009
//    loop 3 times
Packit 6c4009
//       E := E + E2(1.0 - E*U1
Packit 6c4009
//    ...at this point E approximates 1/U to roughly working precision
Packit 6c4009
//
Packit 6c4009
//    z := V * E     ...z approximates V/U to roughly working precision
Packit 6c4009
//    zsq := z * z
Packit 6c4009
//    z4 := zsq * zsq; z8 := z4 * z4
Packit 6c4009
//
Packit 6c4009
//    poly1 := P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
Packit 6c4009
//    poly2 := zsq*(P_1 + zsq*(P_2 + zsq*P_3))
Packit 6c4009
//
Packit 6c4009
//    poly  := poly1 + z8*poly2
Packit 6c4009
//
Packit 6c4009
//    z_lo := (V - A_hi*U)*E
Packit 6c4009
//
Packit 6c4009
//    A_lo := z*poly + z_lo
Packit 6c4009
//    ...A_hi, A_lo approximate arctan(V/U) accurately
Packit 6c4009
//
Packit 6c4009
//    (P_hi, P_lo) := M(sign_X,swap)*(Pi_by_2_hi, Pi_by_2_lo)
Packit 6c4009
//    ...one can store the M(sign_X,swap) as single precision
Packit 6c4009
//    ...values
Packit 6c4009
//
Packit 6c4009
//    ...Deliver final Result
Packit 6c4009
//    ...s_Y*P_hi + s_Y*sigma*A_hi + s_Y*(sigma*A_lo + P_lo)
Packit 6c4009
//
Packit 6c4009
//    sigma := ( (sign_X XOR swap) ? -1.0 : 1.0 )
Packit 6c4009
//    ...sigma can be obtained by a table lookup using
Packit 6c4009
//    ...(sign_X,swap) as index and stored as single precision
Packit 6c4009
//    ...sigma should be calculated earlier
Packit 6c4009
//
Packit 6c4009
//    P_hi := s_Y*P_hi
Packit 6c4009
//    A_hi := s_Y*A_hi
Packit 6c4009
//
Packit 6c4009
//    Res_hi := P_hi + sigma*A_hi          ...need to compute
Packit 6c4009
//                          ...P_hi + sigma*A_hi
Packit 6c4009
//                          ...exactly
Packit 6c4009
//
Packit 6c4009
//    tmp    := (P_hi - Res_hi) + sigma*A_hi
Packit 6c4009
//
Packit 6c4009
//    Res_lo := s_Y*(sigma*A_lo + P_lo) + tmp
Packit 6c4009
//
Packit 6c4009
//    Return Res_hi + Res_lo in user-defined rounding control
Packit 6c4009
//
Packit 6c4009
// Step 5. Special Cases
Packit 6c4009
//
Packit 6c4009
//    These are detected early in the function by fclass instructions.
Packit 6c4009
//
Packit 6c4009
//    We are in one of those special cases when X or Y is 0,+-inf or NaN
Packit 6c4009
//
Packit 6c4009
//    If one of X and Y is NaN, return X+Y (which will generate
Packit 6c4009
//    invalid in case one is a signaling NaN). Otherwise,
Packit 6c4009
//    return the Result as described in the table
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
//      \ Y |
Packit 6c4009
//     X \  |  +0  | -0  |  +inf |  -inf  |  finite non-zero
Packit 6c4009
//        \ |      |     |       |        |
Packit 6c4009
//    ______________________________________________________
Packit 6c4009
//          |            |       |        |
Packit 6c4009
//     +-0  |   Invalid/ |  pi/2 | -pi/2  |  sign(Y)*pi/2
Packit 6c4009
//          |    qNaN    |       |        |
Packit 6c4009
//  --------------------------------------------------------
Packit 6c4009
//          |      |     |       |        |
Packit 6c4009
//     +inf |  +0  | -0  |  pi/4 | -pi/4  |  sign(Y)*0
Packit 6c4009
//  --------------------------------------------------------
Packit 6c4009
//          |      |     |       |        |
Packit 6c4009
//     -inf |  +pi | -pi | 3pi/4 | -3pi/4 |  sign(Y)*pi
Packit 6c4009
//  --------------------------------------------------------
Packit 6c4009
//   finite |    X>0?    |  pi/2 | -pi/2  |
Packit 6c4009
//  non-zero| sign(Y)*0: |       |        |      N/A
Packit 6c4009
//       | sign(Y)*pi |       |        |
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
Packit 6c4009
ArgY_orig   =   f8
Packit 6c4009
Result      =   f8
Packit 6c4009
FR_RESULT   =   f8
Packit 6c4009
ArgX_orig   =   f9
Packit 6c4009
ArgX        =   f10
Packit 6c4009
FR_X        =   f10
Packit 6c4009
ArgY        =   f11
Packit 6c4009
FR_Y        =   f11
Packit 6c4009
s_Y         =   f12
Packit 6c4009
U           =   f13
Packit 6c4009
V           =   f14
Packit 6c4009
E           =   f15
Packit 6c4009
Q           =   f32
Packit 6c4009
z_hi        =   f33
Packit 6c4009
U_prime_hi  =   f34
Packit 6c4009
U_prime_lo  =   f35
Packit 6c4009
V_prime     =   f36
Packit 6c4009
C_hi        =   f37
Packit 6c4009
w_hi        =   f38
Packit 6c4009
w_lo        =   f39
Packit 6c4009
wsq         =   f40
Packit 6c4009
poly        =   f41
Packit 6c4009
Tbl_hi      =   f42
Packit 6c4009
Tbl_lo      =   f43
Packit 6c4009
P_hi        =   f44
Packit 6c4009
P_lo        =   f45
Packit 6c4009
A_hi        =   f46
Packit 6c4009
A_lo        =   f47
Packit 6c4009
sigma       =   f48
Packit 6c4009
Res_hi      =   f49
Packit 6c4009
Res_lo      =   f50
Packit 6c4009
Z           =   f52
Packit 6c4009
zsq         =   f53
Packit 6c4009
z4          =   f54
Packit 6c4009
z8          =   f54
Packit 6c4009
poly1       =   f55
Packit 6c4009
poly2       =   f56
Packit 6c4009
z_lo        =   f57
Packit 6c4009
tmp         =   f58
Packit 6c4009
P_1         =   f59
Packit 6c4009
Q_1         =   f60
Packit 6c4009
P_2         =   f61
Packit 6c4009
Q_2         =   f62
Packit 6c4009
P_3         =   f63
Packit 6c4009
Q_3         =   f64
Packit 6c4009
P_4         =   f65
Packit 6c4009
Q_4         =   f66
Packit 6c4009
P_5         =   f67
Packit 6c4009
P_6         =   f68
Packit 6c4009
P_7         =   f69
Packit 6c4009
P_8         =   f70
Packit 6c4009
U_hold      =   f71
Packit 6c4009
TWO_TO_NEG3 =   f72
Packit 6c4009
C_hi_hold   =   f73
Packit 6c4009
E_hold      =   f74
Packit 6c4009
M           =   f75
Packit 6c4009
ArgX_abs    =   f76
Packit 6c4009
ArgY_abs    =   f77
Packit 6c4009
Result_lo   =   f78
Packit 6c4009
A_temp      =   f79
Packit 6c4009
FR_temp     =   f80
Packit 6c4009
Xsq         =   f81
Packit 6c4009
Ysq         =   f82
Packit 6c4009
tmp_small   =   f83
Packit 6c4009
Packit 6c4009
GR_SAVE_PFS   = r33
Packit 6c4009
GR_SAVE_B0    = r34
Packit 6c4009
GR_SAVE_GP    = r35
Packit 6c4009
sign_X        = r36
Packit 6c4009
sign_Y        = r37
Packit 6c4009
swap          = r38
Packit 6c4009
table_ptr1    = r39
Packit 6c4009
table_ptr2    = r40
Packit 6c4009
k             = r41
Packit 6c4009
lookup        = r42
Packit 6c4009
exp_ArgX      = r43
Packit 6c4009
exp_ArgY      = r44
Packit 6c4009
exponent_Q    = r45
Packit 6c4009
significand_Q = r46
Packit 6c4009
special       = r47
Packit 6c4009
sp_exp_Q      = r48
Packit 6c4009
sp_exp_4sig_Q = r49
Packit 6c4009
table_base    = r50
Packit 6c4009
int_temp      = r51
Packit 6c4009
Packit 6c4009
GR_Parameter_X      = r49
Packit 6c4009
GR_Parameter_Y      = r50
Packit 6c4009
GR_Parameter_RESULT = r51
Packit 6c4009
GR_Parameter_TAG    = r52
Packit 6c4009
GR_temp             = r52
Packit 6c4009
Packit 6c4009
RODATA
Packit 6c4009
.align 16
Packit 6c4009
Packit 6c4009
LOCAL_OBJECT_START(Constants_atan)
Packit 6c4009
//       double pi/2
Packit 6c4009
data8 0x3FF921FB54442D18
Packit 6c4009
//       single lo_pi/2, two**(-3)
Packit 6c4009
data4 0x248D3132, 0x3E000000
Packit 6c4009
data8 0xAAAAAAAAAAAAAAA3, 0xBFFD // P_1
Packit 6c4009
data8 0xCCCCCCCCCCCC54B2, 0x3FFC // P_2
Packit 6c4009
data8 0x9249249247E4D0C2, 0xBFFC // P_3
Packit 6c4009
data8 0xE38E38E058870889, 0x3FFB // P_4
Packit 6c4009
data8 0xBA2E895B290149F8, 0xBFFB // P_5
Packit 6c4009
data8 0x9D88E6D4250F733D, 0x3FFB // P_6
Packit 6c4009
data8 0x884E51FFFB8745A0, 0xBFFB // P_7
Packit 6c4009
data8 0xE1C7412B394396BD, 0x3FFA // P_8
Packit 6c4009
data8 0xAAAAAAAAAAAAA52F, 0xBFFD // Q_1
Packit 6c4009
data8 0xCCCCCCCCC75B60D3, 0x3FFC // Q_2
Packit 6c4009
data8 0x924923AD011F1940, 0xBFFC // Q_3
Packit 6c4009
data8 0xE36F716D2A5F89BD, 0x3FFB // Q_4
Packit 6c4009
//
Packit 6c4009
//    Entries Tbl_hi  (double precision)
Packit 6c4009
//    B = 1+Index/16+1/32  Index = 0
Packit 6c4009
//    Entries Tbl_lo (single precision)
Packit 6c4009
//    B = 1+Index/16+1/32  Index = 0
Packit 6c4009
//
Packit 6c4009
data8 0x3FE9A000A935BD8E
Packit 6c4009
data4 0x23ACA08F, 0x00000000
Packit 6c4009
//
Packit 6c4009
//    Entries Tbl_hi  (double precision) Index = 0,1,...,15
Packit 6c4009
//    B = 2^(-1)*(1+Index/16+1/32)
Packit 6c4009
//    Entries Tbl_lo (single precision)
Packit 6c4009
//    Index = 0,1,...,15  B = 2^(-1)*(1+Index/16+1/32)
Packit 6c4009
//
Packit 6c4009
data8 0x3FDE77EB7F175A34
Packit 6c4009
data4 0x238729EE, 0x00000000
Packit 6c4009
data8 0x3FE0039C73C1A40B
Packit 6c4009
data4 0x249334DB, 0x00000000
Packit 6c4009
data8 0x3FE0C6145B5B43DA
Packit 6c4009
data4 0x22CBA7D1, 0x00000000
Packit 6c4009
data8 0x3FE1835A88BE7C13
Packit 6c4009
data4 0x246310E7, 0x00000000
Packit 6c4009
data8 0x3FE23B71E2CC9E6A
Packit 6c4009
data4 0x236210E5, 0x00000000
Packit 6c4009
data8 0x3FE2EE628406CBCA
Packit 6c4009
data4 0x2462EAF5, 0x00000000
Packit 6c4009
data8 0x3FE39C391CD41719
Packit 6c4009
data4 0x24B73EF3, 0x00000000
Packit 6c4009
data8 0x3FE445065B795B55
Packit 6c4009
data4 0x24C11260, 0x00000000
Packit 6c4009
data8 0x3FE4E8DE5BB6EC04
Packit 6c4009
data4 0x242519EE, 0x00000000
Packit 6c4009
data8 0x3FE587D81F732FBA
Packit 6c4009
data4 0x24D4346C, 0x00000000
Packit 6c4009
data8 0x3FE6220D115D7B8D
Packit 6c4009
data4 0x24ED487B, 0x00000000
Packit 6c4009
data8 0x3FE6B798920B3D98
Packit 6c4009
data4 0x2495FF1E, 0x00000000
Packit 6c4009
data8 0x3FE748978FBA8E0F
Packit 6c4009
data4 0x223D9531, 0x00000000
Packit 6c4009
data8 0x3FE7D528289FA093
Packit 6c4009
data4 0x242B0411, 0x00000000
Packit 6c4009
data8 0x3FE85D69576CC2C5
Packit 6c4009
data4 0x2335B374, 0x00000000
Packit 6c4009
data8 0x3FE8E17AA99CC05D
Packit 6c4009
data4 0x24C27CFB, 0x00000000
Packit 6c4009
//
Packit 6c4009
//    Entries Tbl_hi  (double precision) Index = 0,1,...,15
Packit 6c4009
//    B = 2^(-2)*(1+Index/16+1/32)
Packit 6c4009
//    Entries Tbl_lo (single precision)
Packit 6c4009
//    Index = 0,1,...,15  B = 2^(-2)*(1+Index/16+1/32)
Packit 6c4009
//
Packit 6c4009
data8 0x3FD025FA510665B5
Packit 6c4009
data4 0x24263482, 0x00000000
Packit 6c4009
data8 0x3FD1151A362431C9
Packit 6c4009
data4 0x242C8DC9, 0x00000000
Packit 6c4009
data8 0x3FD2025567E47C95
Packit 6c4009
data4 0x245CF9BA, 0x00000000
Packit 6c4009
data8 0x3FD2ED987A823CFE
Packit 6c4009
data4 0x235C892C, 0x00000000
Packit 6c4009
data8 0x3FD3D6D129271134
Packit 6c4009
data4 0x2389BE52, 0x00000000
Packit 6c4009
data8 0x3FD4BDEE586890E6
Packit 6c4009
data4 0x24436471, 0x00000000
Packit 6c4009
data8 0x3FD5A2E0175E0F4E
Packit 6c4009
data4 0x2389DBD4, 0x00000000
Packit 6c4009
data8 0x3FD685979F5FA6FD
Packit 6c4009
data4 0x2476D43F, 0x00000000
Packit 6c4009
data8 0x3FD7660752817501
Packit 6c4009
data4 0x24711774, 0x00000000
Packit 6c4009
data8 0x3FD84422B8DF95D7
Packit 6c4009
data4 0x23EBB501, 0x00000000
Packit 6c4009
data8 0x3FD91FDE7CD0C662
Packit 6c4009
data4 0x23883A0C, 0x00000000
Packit 6c4009
data8 0x3FD9F93066168001
Packit 6c4009
data4 0x240DF63F, 0x00000000
Packit 6c4009
data8 0x3FDAD00F5422058B
Packit 6c4009
data4 0x23FE261A, 0x00000000
Packit 6c4009
data8 0x3FDBA473378624A5
Packit 6c4009
data4 0x23A8CD0E, 0x00000000
Packit 6c4009
data8 0x3FDC76550AAD71F8
Packit 6c4009
data4 0x2422D1D0, 0x00000000
Packit 6c4009
data8 0x3FDD45AEC9EC862B
Packit 6c4009
data4 0x2344A109, 0x00000000
Packit 6c4009
//
Packit 6c4009
//    Entries Tbl_hi  (double precision) Index = 0,1,...,15
Packit 6c4009
//    B = 2^(-3)*(1+Index/16+1/32)
Packit 6c4009
//    Entries Tbl_lo (single precision)
Packit 6c4009
//    Index = 0,1,...,15  B = 2^(-3)*(1+Index/16+1/32)
Packit 6c4009
//
Packit 6c4009
data8 0x3FC068D584212B3D
Packit 6c4009
data4 0x239874B6, 0x00000000
Packit 6c4009
data8 0x3FC1646541060850
Packit 6c4009
data4 0x2335E774, 0x00000000
Packit 6c4009
data8 0x3FC25F6E171A535C
Packit 6c4009
data4 0x233E36BE, 0x00000000
Packit 6c4009
data8 0x3FC359E8EDEB99A3
Packit 6c4009
data4 0x239680A3, 0x00000000
Packit 6c4009
data8 0x3FC453CEC6092A9E
Packit 6c4009
data4 0x230FB29E, 0x00000000
Packit 6c4009
data8 0x3FC54D18BA11570A
Packit 6c4009
data4 0x230C1418, 0x00000000
Packit 6c4009
data8 0x3FC645BFFFB3AA73
Packit 6c4009
data4 0x23F0564A, 0x00000000
Packit 6c4009
data8 0x3FC73DBDE8A7D201
Packit 6c4009
data4 0x23D4A5E1, 0x00000000
Packit 6c4009
data8 0x3FC8350BE398EBC7
Packit 6c4009
data4 0x23D4ADDA, 0x00000000
Packit 6c4009
data8 0x3FC92BA37D050271
Packit 6c4009
data4 0x23BCB085, 0x00000000
Packit 6c4009
data8 0x3FCA217E601081A5
Packit 6c4009
data4 0x23BC841D, 0x00000000
Packit 6c4009
data8 0x3FCB1696574D780B
Packit 6c4009
data4 0x23CF4A8E, 0x00000000
Packit 6c4009
data8 0x3FCC0AE54D768466
Packit 6c4009
data4 0x23BECC90, 0x00000000
Packit 6c4009
data8 0x3FCCFE654E1D5395
Packit 6c4009
data4 0x2323DCD2, 0x00000000
Packit 6c4009
data8 0x3FCDF110864C9D9D
Packit 6c4009
data4 0x23F53F3A, 0x00000000
Packit 6c4009
data8 0x3FCEE2E1451D980C
Packit 6c4009
data4 0x23CCB11F, 0x00000000
Packit 6c4009
//
Packit 6c4009
data8 0x400921FB54442D18, 0x3CA1A62633145C07 // PI two doubles
Packit 6c4009
data8 0x3FF921FB54442D18, 0x3C91A62633145C07 // PI_by_2 two dbles
Packit 6c4009
data8 0x3FE921FB54442D18, 0x3C81A62633145C07 // PI_by_4 two dbles
Packit 6c4009
data8 0x4002D97C7F3321D2, 0x3C9A79394C9E8A0A // 3PI_by_4 two dbles
Packit 6c4009
LOCAL_OBJECT_END(Constants_atan)
Packit 6c4009
Packit 6c4009
Packit 6c4009
.section .text
Packit 6c4009
GLOBAL_IEEE754_ENTRY(atanl)
Packit 6c4009
Packit 6c4009
// Use common code with atan2l after setting x=1.0
Packit 6c4009
{ .mfi
Packit 6c4009
      alloc r32 = ar.pfs, 0, 17, 4, 0
Packit 6c4009
      fma.s1 Ysq = ArgY_orig, ArgY_orig, f0          // Form y*y
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      addl table_ptr1 = @ltoff(Constants_atan#), gp  // Address of table pointer
Packit 6c4009
      fma.s1 Xsq = f1, f1, f0                        // Form x*x
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      ld8 table_ptr1 = [table_ptr1]                  // Get table pointer
Packit 6c4009
      fnorm.s1 ArgY = ArgY_orig
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fnorm.s1 ArgX = f1
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      getf.exp sign_X = f1               // Get signexp of x
Packit 6c4009
      fmerge.s ArgX_abs = f0, f1         // Form |x|
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fnorm.s1 ArgX_orig = f1
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      getf.exp sign_Y = ArgY_orig        // Get signexp of y
Packit 6c4009
      fmerge.s ArgY_abs = f0, ArgY_orig  // Form |y|
Packit 6c4009
      mov table_base = table_ptr1        // Save base pointer to tables
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      ldfd P_hi = [table_ptr1],8         // Load double precision hi part of pi
Packit 6c4009
      fclass.m p8,p0 = ArgY_orig, 0x1e7  // Test y natval, nan, inf, zero
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3
Packit 6c4009
      nop.f 999
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 M = f1, f1, f0              // Set M = 1.0
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     Check for everything - if false, then must be pseudo-zero
Packit 6c4009
//     or pseudo-nan (IA unsupporteds).
Packit 6c4009
//
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p0,p12 = f1, 0x1FF        // Test x unsupported
Packit 6c4009
(p8)  br.cond.spnt ATANL_Y_SPECIAL       // Branch if y natval, nan, inf, zero
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//     U = max(ArgX_abs,ArgY_abs)
Packit 6c4009
//     V = min(ArgX_abs,ArgY_abs)
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fcmp.ge.s1 p6,p7 = Xsq, Ysq        // Test for |x| >= |y| using squares
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 V = ArgX_abs, f1, f0        // Set V assuming |x| < |y|
Packit 6c4009
      br.cond.sptk ATANL_COMMON          // Branch to common code
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
GLOBAL_IEEE754_END(atanl)
Packit 6c4009
libm_alias_ldouble_other (__atan, atan)
Packit 6c4009
Packit 6c4009
GLOBAL_IEEE754_ENTRY(atan2l)
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      alloc r32 = ar.pfs, 0, 17, 4, 0
Packit 6c4009
      fma.s1 Ysq = ArgY_orig, ArgY_orig, f0          // Form y*y
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      addl table_ptr1 = @ltoff(Constants_atan#), gp  // Address of table pointer
Packit 6c4009
      fma.s1 Xsq = ArgX_orig, ArgX_orig, f0          // Form x*x
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      ld8 table_ptr1 = [table_ptr1]                  // Get table pointer
Packit 6c4009
      fnorm.s1 ArgY = ArgY_orig
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fnorm.s1 ArgX = ArgX_orig
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      getf.exp sign_X = ArgX_orig        // Get signexp of x
Packit 6c4009
      fmerge.s ArgX_abs = f0, ArgX_orig  // Form |x|
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      getf.exp sign_Y = ArgY_orig        // Get signexp of y
Packit 6c4009
      fmerge.s ArgY_abs = f0, ArgY_orig  // Form |y|
Packit 6c4009
      mov table_base = table_ptr1        // Save base pointer to tables
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      ldfd P_hi = [table_ptr1],8         // Load double precision hi part of pi
Packit 6c4009
      fclass.m p8,p0 = ArgY_orig, 0x1e7  // Test y natval, nan, inf, zero
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      ldfps P_lo, TWO_TO_NEG3 = [table_ptr1], 8 // Load P_lo and constant 2^-3
Packit 6c4009
      fclass.m p9,p0 = ArgX_orig, 0x1e7  // Test x natval, nan, inf, zero
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 M = f1, f1, f0              // Set M = 1.0
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     Check for everything - if false, then must be pseudo-zero
Packit 6c4009
//     or pseudo-nan (IA unsupporteds).
Packit 6c4009
//
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p0,p12 = ArgX_orig, 0x1FF // Test x unsupported
Packit 6c4009
(p8)  br.cond.spnt ATANL_Y_SPECIAL       // Branch if y natval, nan, inf, zero
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//     U = max(ArgX_abs,ArgY_abs)
Packit 6c4009
//     V = min(ArgX_abs,ArgY_abs)
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fcmp.ge.s1 p6,p7 = Xsq, Ysq        // Test for |x| >= |y| using squares
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 V = ArgX_abs, f1, f0        // Set V assuming |x| < |y|
Packit 6c4009
(p9)  br.cond.spnt ATANL_X_SPECIAL       // Branch if x natval, nan, inf, zero
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
// Now common code for atanl and atan2l
Packit 6c4009
ATANL_COMMON:
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p0,p13 = ArgY_orig, 0x1FF // Test y unsupported
Packit 6c4009
      shr sign_X = sign_X, 17            // Get sign bit of x
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 U = ArgY_abs, f1, f0        // Set U assuming |x| < |y|
Packit 6c4009
      adds table_ptr1 = 176, table_ptr1  // Point to Q4
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
(p6)  add swap = r0, r0                  // Set swap=0 if |x| >= |y|
Packit 6c4009
(p6)  frcpa.s1 E, p0 = ArgY_abs, ArgX_abs // Compute E if |x| >= |y|
Packit 6c4009
      shr sign_Y = sign_Y, 17            // Get sign bit of y
Packit 6c4009
}
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
(p6)  fma.s1 V = ArgY_abs, f1, f0        // Set V if |x| >= |y|
Packit 6c4009
(p12) br.cond.spnt ATANL_UNSUPPORTED     // Branch if x unsupported
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
// Set p8 if y >=0
Packit 6c4009
// Set p9 if y < 0
Packit 6c4009
// Set p10 if |x| >= |y| and x >=0
Packit 6c4009
// Set p11 if |x| >= |y| and x < 0
Packit 6c4009
{ .mfi
Packit 6c4009
      cmp.eq p8, p9 = 0, sign_Y          // Test for y >= 0
Packit 6c4009
(p7)  frcpa.s1 E, p0 = ArgX_abs, ArgY_abs // Compute E if |x| < |y|
Packit 6c4009
(p7)  add swap = 1, r0                   // Set swap=1 if |x| < |y|
Packit 6c4009
}
Packit 6c4009
{ .mfb
Packit 6c4009
(p6)  cmp.eq.unc p10, p11 = 0, sign_X    // If |x| >= |y|, test for x >= 0
Packit 6c4009
(p6)  fma.s1 U = ArgX_abs, f1, f0        // Set U if |x| >= |y|
Packit 6c4009
(p13) br.cond.spnt ATANL_UNSUPPORTED     // Branch if y unsupported
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     if p8, s_Y = 1.0
Packit 6c4009
//     if p9, s_Y = -1.0
Packit 6c4009
//
Packit 6c4009
.pred.rel "mutex",p8,p9
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p8)  fadd.s1 s_Y = f0, f1               // If y >= 0 set s_Y = 1.0
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p9)  fsub.s1 s_Y = f0, f1               // If y < 0 set s_Y = -1.0
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
.pred.rel "mutex",p10,p11
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p10) fsub.s1 M = M, f1                  // If |x| >= |y| and x >=0, set M=0
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p11) fadd.s1 M = M, f1                  // If |x| >= |y| and x < 0, set M=2.0
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fcmp.eq.s0 p0, p9 = ArgX_orig, ArgY_orig // Dummy to set denormal flag
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
// *************************************************
Packit 6c4009
// ********************* STEP2 *********************
Packit 6c4009
// *************************************************
Packit 6c4009
//
Packit 6c4009
//     Q = E * V
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 Q = E, V
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fnma.s1 E_hold = E, U, f1           // E_hold = 1.0 - E*U (1) if POLY path
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
// Create a single precision representation of the signexp of Q with the
Packit 6c4009
// 4 most significant bits of the significand followed by a 1 and then 18 0's
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 P_hi = M, P_hi
Packit 6c4009
      dep.z special = 0x1, 18, 1           // Form 0x0000000000040000
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 P_lo = M, P_lo
Packit 6c4009
      add table_ptr2 = 32, table_ptr1
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 A_temp = Q, f1, f0            // Set A_temp if POLY path
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 E = E, E_hold, E              // E = E + E*E_hold (1) if POLY path
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     Is Q < 2**(-3)?
Packit 6c4009
//     swap = xor(swap,sign_X)
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fcmp.lt.s1 p9, p0 = Q, TWO_TO_NEG3    // Test Q < 2^-3
Packit 6c4009
      xor swap = sign_X, swap
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//     P_hi = s_Y * P_hi
Packit 6c4009
{ .mmf
Packit 6c4009
      getf.exp exponent_Q =  Q              // Get signexp of Q
Packit 6c4009
      cmp.eq.unc p7, p6 = 0x00000, swap
Packit 6c4009
      fmpy.s1 P_hi = s_Y, P_hi
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     if (PR_1) sigma = -1.0
Packit 6c4009
//     if (PR_2) sigma =  1.0
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      getf.sig significand_Q = Q            // Get significand of Q
Packit 6c4009
(p6)  fsub.s1 sigma = f0, f1
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfb
Packit 6c4009
(p9)  add table_ptr1 = 128, table_base      // Point to P8 if POLY path
Packit 6c4009
(p7)  fadd.s1 sigma = f0, f1
Packit 6c4009
(p9)  br.cond.spnt ATANL_POLY               // Branch to POLY if 0 < Q < 2^-3
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
// *************************************************
Packit 6c4009
// ******************** STEP3 **********************
Packit 6c4009
// *************************************************
Packit 6c4009
//
Packit 6c4009
//     lookup = b_1 b_2 b_3 B_4
Packit 6c4009
//
Packit 6c4009
{ .mmi
Packit 6c4009
      nop.m 999
Packit 6c4009
      nop.m 999
Packit 6c4009
      andcm k = 0x0003, exponent_Q  // k=0,1,2,3 for exp_Q=0,-1,-2,-3
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//  Generate sign_exp_Q b_1 b_2 b_3 b_4 1 0 0 0 ... 0  in single precision
Packit 6c4009
//  representation.  Note sign of Q is always 0.
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      cmp.eq p8, p9 = 0x0000, k             // Test k=0
Packit 6c4009
      nop.f 999
Packit 6c4009
      extr.u lookup = significand_Q, 59, 4  // Extract b_1 b_2 b_3 b_4 for index
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      sub sp_exp_Q = 0x7f, k                // Form single prec biased exp of Q
Packit 6c4009
      nop.f 999
Packit 6c4009
      sub k = k, r0, 1                      // Decrement k
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//     Form pointer to B index table
Packit 6c4009
{ .mfi
Packit 6c4009
      ldfe Q_4 = [table_ptr1], -16          // Load Q_4
Packit 6c4009
      nop.f 999
Packit 6c4009
(p9)  shl k = k, 8                          // k = 0, 256, or 512
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
(p9)  shladd table_ptr2 = lookup, 4, table_ptr2
Packit 6c4009
      nop.f 999
Packit 6c4009
      shladd sp_exp_4sig_Q = sp_exp_Q, 4, lookup // Shift and add in 4 high bits
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mmi
Packit 6c4009
(p8)  add table_ptr2 = -16, table_ptr2      // Pointer if original k was 0
Packit 6c4009
(p9)  add table_ptr2 = k, table_ptr2        // Pointer if k was 1, 2, 3
Packit 6c4009
      dep special = sp_exp_4sig_Q, special, 19, 13 // Form z_hi as single prec
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//     z_hi = s exp 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0
Packit 6c4009
{ .mmi
Packit 6c4009
      ldfd Tbl_hi = [table_ptr2], 8         // Load Tbl_hi from index table
Packit 6c4009
;;
Packit 6c4009
      setf.s z_hi = special                 // Form z_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mmi
Packit 6c4009
      ldfs Tbl_lo = [table_ptr2], 8         // Load Tbl_lo from index table
Packit 6c4009
;;
Packit 6c4009
      ldfe Q_3 = [table_ptr1], -16          // Load Q_3
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mmi
Packit 6c4009
      ldfe Q_2 = [table_ptr1], -16          // Load Q_2
Packit 6c4009
      nop.m 999
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mmf
Packit 6c4009
      ldfe Q_1 = [table_ptr1], -16          // Load Q_1
Packit 6c4009
      nop.m 999
Packit 6c4009
      nop.f 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 U_prime_hi = V, z_hi, U        // U_prime_hi = U + V * z_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fnma.s1 V_prime = U, z_hi, V          // V_prime =  V - U * z_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      mov A_hi = Tbl_hi                     // Start with A_hi = Tbl_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fsub.s1 U_hold = U, U_prime_hi        // U_hold = U - U_prime_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      frcpa.s1 C_hi, p0 = f1, U_prime_hi    // C_hi = frcpa(1,U_prime_hi)
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 A_hi = s_Y, A_hi              // A_hi = s_Y * A_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 U_prime_lo = z_hi, V, U_hold   // U_prime_lo =  U_hold + V * z_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//     C_hi_hold = 1 - C_hi * U_prime_hi (1)
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 Res_hi = sigma, A_hi, P_hi   // Res_hi = P_hi + sigma * A_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (1)
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//     C_hi_hold = 1 - C_hi * U_prime_hi (2)
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (2)
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//     C_hi_hold = 1 - C_hi * U_prime_hi (3)
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (3)
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 w_hi = V_prime, C_hi           // w_hi = V_prime * C_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 wsq = w_hi, w_hi               // wsq = w_hi * w_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fnma.s1 w_lo = w_hi, U_prime_hi, V_prime // w_lo = V_prime-w_hi*U_prime_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 poly =  wsq, Q_4, Q_3           // poly = Q_3 + wsq * Q_4
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fnma.s1 w_lo = w_hi, U_prime_lo, w_lo  // w_lo = w_lo - w_hi * U_prime_lo
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 poly = wsq, poly, Q_2           // poly = Q_2 + wsq * poly
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 w_lo = C_hi, w_lo              // w_lo =  = w_lo * C_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 poly = wsq, poly, Q_1           // poly = Q_1 + wsq * poly
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fadd.s1 A_lo = Tbl_lo, w_lo            // A_lo = Tbl_lo + w_lo
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s0 Q_1 =  Q_1, Q_1                // Dummy operation to raise inexact
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 poly = wsq, poly               // poly = wsq * poly
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 poly = w_hi, poly              // poly = w_hi * poly
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fadd.s1 A_lo = A_lo, poly              // A_lo = A_lo + poly
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fadd.s1 A_lo = A_lo, w_hi              // A_lo = A_lo + w_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 Res_lo = sigma, A_lo, P_lo      // Res_lo = P_lo + sigma * A_lo
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     Result  =  Res_hi + Res_lo * s_Y  (User Supplied Rounding Mode)
Packit 6c4009
//
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s0 Result = Res_lo, s_Y, Res_hi
Packit 6c4009
      br.ret.sptk   b0                        // Exit table path 2^-3 <= V/U < 1
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
Packit 6c4009
ATANL_POLY:
Packit 6c4009
// Here if 0 < V/U < 2^-3
Packit 6c4009
//
Packit 6c4009
// ***********************************************
Packit 6c4009
// ******************** STEP4 ********************
Packit 6c4009
// ***********************************************
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     Following:
Packit 6c4009
//     Iterate 3 times E = E + E*(1.0 - E*U)
Packit 6c4009
//     Also load P_8, P_7, P_6, P_5, P_4
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      ldfe P_8 = [table_ptr1], -16            // Load P_8
Packit 6c4009
      fnma.s1 z_lo = A_temp, U, V             // z_lo = V - A_temp * U
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fnma.s1 E_hold = E, U, f1               // E_hold = 1.0 - E*U (2)
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mmi
Packit 6c4009
      ldfe P_7 = [table_ptr1], -16            // Load P_7
Packit 6c4009
;;
Packit 6c4009
      ldfe P_6 = [table_ptr1], -16            // Load P_6
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      ldfe P_5 = [table_ptr1], -16            // Load P_5
Packit 6c4009
      fma.s1 E = E, E_hold, E                 // E = E + E_hold*E (2)
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mmi
Packit 6c4009
      ldfe P_4 = [table_ptr1], -16            // Load P_4
Packit 6c4009
;;
Packit 6c4009
      ldfe P_3 = [table_ptr1], -16            // Load P_3
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      ldfe P_2 = [table_ptr1], -16            // Load P_2
Packit 6c4009
      fnma.s1 E_hold = E, U, f1               // E_hold = 1.0 - E*U (3)
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mlx
Packit 6c4009
      nop.m 999
Packit 6c4009
      movl         int_temp = 0x24005         // Signexp for small neg number
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mmf
Packit 6c4009
      ldfe P_1 = [table_ptr1], -16            // Load P_1
Packit 6c4009
      setf.exp     tmp_small = int_temp       // Form small neg number
Packit 6c4009
      fma.s1 E = E, E_hold, E                 // E = E + E_hold*E (3)
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
// At this point E approximates 1/U to roughly working precision
Packit 6c4009
// Z = V*E approximates V/U
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 Z = V, E                         // Z = V * E
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 z_lo = z_lo, E                   // z_lo = z_lo * E
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     Now what we want to do is
Packit 6c4009
//     poly1 = P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))
Packit 6c4009
//     poly2 = zsq*(P_1 + zsq*(P_2 + zsq*P_3))
Packit 6c4009
//
Packit 6c4009
//
Packit 6c4009
//     Fixup added to force inexact later -
Packit 6c4009
//     A_hi = A_temp + z_lo
Packit 6c4009
//     z_lo = (A_temp - A_hi) + z_lo
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 zsq = Z, Z                        // zsq = Z * Z
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fadd.s1 A_hi = A_temp, z_lo               // A_hi = A_temp + z_lo
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 poly1 = zsq, P_8, P_7              // poly1 = P_7 + zsq * P_8
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 poly2 = zsq, P_3, P_2              // poly2 = P_2 + zsq * P_3
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 z4 = zsq, zsq                     // z4 = zsq * zsq
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fsub.s1 A_temp = A_temp, A_hi             // A_temp = A_temp - A_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmerge.s     tmp = A_hi, A_hi             // Copy tmp = A_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 poly1 = zsq, poly1, P_6            // poly1 = P_6 + zsq * poly1
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 poly2 = zsq, poly2, P_1            // poly2 = P_2 + zsq * poly2
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 z8 = z4, z4                       // z8 = z4 * z4
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fadd.s1 z_lo = A_temp, z_lo               // z_lo = (A_temp - A_hi) + z_lo
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 poly1 = zsq, poly1, P_5            // poly1 = P_5 + zsq * poly1
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 poly2 = poly2, zsq                // poly2 = zsq * poly2
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//     Create small GR double in case need to raise underflow
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 poly1 = zsq, poly1, P_4            // poly1 = P_4 + zsq * poly1
Packit 6c4009
      dep GR_temp = -1,r0,0,53
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//     Create small double in case need to raise underflow
Packit 6c4009
{ .mfi
Packit 6c4009
      setf.d FR_temp = GR_temp
Packit 6c4009
      fma.s1 poly = z8, poly1, poly2            // poly = poly2 + z8 * poly1
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 A_lo = Z, poly, z_lo               // A_lo = z_lo + Z * poly
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fadd.s1      A_hi = tmp, A_lo             // A_hi = tmp + A_lo
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fsub.s1      tmp = tmp, A_hi              // tmp = tmp - A_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s1 A_hi = s_Y, A_hi                  // A_hi = s_Y * A_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fadd.s1      A_lo = tmp, A_lo             // A_lo = tmp + A_lo
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 Res_hi = sigma, A_hi, P_hi         // Res_hi = P_hi + sigma * A_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fsub.s1 tmp =  P_hi, Res_hi               // tmp = P_hi - Res_hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     Test if A_lo is zero
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p6,p0 = A_lo, 0x007              // Test A_lo = 0
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p6)  mov          A_lo = tmp_small             // If A_lo zero, make very small
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 tmp = A_hi, sigma, tmp             // tmp = sigma * A_hi  + tmp
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 sigma =  A_lo, sigma, P_lo         // sigma = A_lo * sigma  + P_lo
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fma.s1 Res_lo = s_Y, sigma, tmp           // Res_lo = s_Y * sigma + tmp
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     Test if Res_lo is denormal
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p14, p15 = Res_lo, 0x0b
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     Compute Result = Res_lo + Res_hi.  Use s3 if Res_lo is denormal.
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p14) fadd.s3 Result = Res_lo, Res_hi     // Result for Res_lo denormal
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p15) fadd.s0 Result = Res_lo, Res_hi     // Result for Res_lo normal
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     If Res_lo is denormal test if Result equals zero
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p14) fclass.m.unc p14, p0 = Result, 0x07
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     If Res_lo is denormal and Result equals zero, raise inexact, underflow
Packit 6c4009
//     by squaring small double
Packit 6c4009
//
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
(p14) fmpy.d.s0 FR_temp = FR_temp, FR_temp
Packit 6c4009
      br.ret.sptk   b0                     // Exit POLY path, 0 < Q < 2^-3
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
Packit 6c4009
ATANL_UNSUPPORTED:
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmpy.s0 Result = ArgX,ArgY
Packit 6c4009
      br.ret.sptk   b0
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
// Here if y natval, nan, inf, zero
Packit 6c4009
ATANL_Y_SPECIAL:
Packit 6c4009
// Here if x natval, nan, inf, zero
Packit 6c4009
ATANL_X_SPECIAL:
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p13,p12 = ArgY_orig, 0x0c3  // Test y nan
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p15,p14 = ArgY_orig, 0x103  // Test y natval
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p12) fclass.m p13,p0 = ArgX_orig, 0x0c3  // Test x nan
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p14) fclass.m p15,p0 = ArgX_orig, 0x103  // Test x natval
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
(p13) fmpy.s0 Result = ArgX_orig, ArgY_orig // Result nan if x or y nan
Packit 6c4009
(p13) br.ret.spnt b0                      // Exit if x or y nan
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
(p15) fmpy.s0 Result = ArgX_orig, ArgY_orig // Result natval if x or y natval
Packit 6c4009
(p15) br.ret.spnt b0                      // Exit if x or y natval
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
Packit 6c4009
// Here if x or y inf or zero
Packit 6c4009
ATANL_SPECIAL_HANDLING:
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p6, p7 = ArgY_orig, 0x007        // Test y zero
Packit 6c4009
      mov special = 992                         // Offset to table
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfb
Packit 6c4009
      add table_ptr1 = table_base, special      // Point to 3pi/4
Packit 6c4009
      fcmp.eq.s0 p0, p9 = ArgX_orig, ArgY_orig  // Dummy to set denormal flag
Packit 6c4009
(p7)  br.cond.spnt ATANL_ArgY_Not_ZERO          // Branch if y not zero
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
// Here if y zero
Packit 6c4009
{ .mmf
Packit 6c4009
      ldfd  Result = [table_ptr1], 8            // Get pi high
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p14, p0 = ArgX, 0x035            // Test for x>=+0
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mmf
Packit 6c4009
      nop.m 999
Packit 6c4009
      ldfd  Result_lo = [table_ptr1], -8        // Get pi lo
Packit 6c4009
      fclass.m p15, p0 = ArgX, 0x036            // Test for x<=-0
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     Return sign_Y * 0 when  ArgX > +0
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p14) fmerge.s Result = ArgY, f0               // If x>=+0, y=0, hi sgn(y)*0
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p13, p0 = ArgX, 0x007           // Test for x=0
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p14) fmerge.s Result_lo = ArgY, f0            // If x>=+0, y=0, lo sgn(y)*0
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
(p13) mov GR_Parameter_TAG = 36                // Error tag for x=0, y=0
Packit 6c4009
      nop.f 999
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     Return sign_Y * pi when  ArgX < -0
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p15) fmerge.s Result = ArgY, Result           // If x<0, y=0, hi=sgn(y)*pi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p15) fmerge.s Result_lo = ArgY, Result_lo     // If x<0, y=0, lo=sgn(y)*pi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
//
Packit 6c4009
//     Call error support function for atan(0,0)
Packit 6c4009
//
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
      fadd.s0 Result = Result, Result_lo
Packit 6c4009
(p13) br.cond.spnt __libm_error_region         // Branch if atan(0,0)
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mib
Packit 6c4009
      nop.m 999
Packit 6c4009
      nop.i 999
Packit 6c4009
      br.ret.sptk   b0                         // Exit for y=0, x not 0
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
// Here if y not zero
Packit 6c4009
ATANL_ArgY_Not_ZERO:
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p0, p10 = ArgY, 0x023           // Test y inf
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p6, p0 = ArgX, 0x017            // Test for 0 <= |x| < inf
Packit 6c4009
(p10) br.cond.spnt  ATANL_ArgY_Not_INF         // Branch if 0 < |y| < inf
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
// Here if y=inf
Packit 6c4009
//
Packit 6c4009
//     Return +PI/2 when ArgY = +Inf and ArgX = +/-0 or normal
Packit 6c4009
//     Return -PI/2 when ArgY = -Inf and ArgX = +/-0 or normal
Packit 6c4009
//     Return +PI/4 when ArgY = +Inf and ArgX = +Inf
Packit 6c4009
//     Return -PI/4 when ArgY = -Inf and ArgX = +Inf
Packit 6c4009
//     Return +3PI/4 when ArgY = +Inf and ArgX = -Inf
Packit 6c4009
//     Return -3PI/4 when ArgY = -Inf and ArgX = -Inf
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p7, p0 = ArgX, 0x021            // Test for x=+inf
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
(p6)  add table_ptr1 =  16, table_ptr1         // Point to pi/2, if x finite
Packit 6c4009
      fclass.m p8, p0 = ArgX, 0x022            // Test for x=-inf
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mmi
Packit 6c4009
(p7)  add table_ptr1 =  32, table_ptr1         // Point to pi/4 if x=+inf
Packit 6c4009
;;
Packit 6c4009
(p8)  add table_ptr1 =  48, table_ptr1         // Point to 3pi/4 if x=-inf
Packit 6c4009
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mmi
Packit 6c4009
      ldfd Result = [table_ptr1], 8            // Load pi/2, pi/4, or 3pi/4 hi
Packit 6c4009
;;
Packit 6c4009
      ldfd Result_lo = [table_ptr1], -8        // Load pi/2, pi/4, or 3pi/4 lo
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmerge.s Result = ArgY, Result           // Merge sgn(y) in hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fmerge.s Result_lo = ArgY, Result_lo     // Merge sgn(y) in lo
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
      fadd.s0 Result = Result, Result_lo       // Compute complete result
Packit 6c4009
      br.ret.sptk   b0                         // Exit for y=inf
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
// Here if y not INF, and x=0 or INF
Packit 6c4009
ATANL_ArgY_Not_INF:
Packit 6c4009
//
Packit 6c4009
//     Return +PI/2 when ArgY NOT Inf, ArgY > 0 and ArgX = +/-0
Packit 6c4009
//     Return -PI/2 when ArgY NOT Inf, ArgY < 0 and ArgX = +/-0
Packit 6c4009
//     Return +0    when ArgY NOT Inf, ArgY > 0 and ArgX = +Inf
Packit 6c4009
//     Return -0    when ArgY NOT Inf, ArgY > 0 and ArgX = +Inf
Packit 6c4009
//     Return +PI   when ArgY NOT Inf, ArgY > 0 and ArgX = -Inf
Packit 6c4009
//     Return -PI   when ArgY NOT Inf, ArgY > 0 and ArgX = -Inf
Packit 6c4009
//
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p7, p9 = ArgX, 0x021            // Test for x=+inf
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
      fclass.m p6, p0 = ArgX, 0x007            // Test for x=0
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
(p6)  add table_ptr1 = 16, table_ptr1          // Point to pi/2
Packit 6c4009
      fclass.m p8, p0 = ArgX, 0x022            // Test for x=-inf
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
.pred.rel "mutex",p7,p9
Packit 6c4009
{ .mfi
Packit 6c4009
(p9)  ldfd Result = [table_ptr1], 8           // Load pi or pi/2 hi
Packit 6c4009
(p7)  fmerge.s Result = ArgY, f0              // If y not inf, x=+inf, sgn(y)*0
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
(p9)  ldfd Result_lo = [table_ptr1], -8       // Load pi or pi/2 lo
Packit 6c4009
(p7)  fnorm.s0 Result = Result                // If y not inf, x=+inf normalize
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p9)  fmerge.s Result = ArgY, Result          // Merge sgn(y) in hi
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfi
Packit 6c4009
      nop.m 999
Packit 6c4009
(p9)  fmerge.s Result_lo = ArgY, Result_lo    // Merge sgn(y) in lo
Packit 6c4009
      nop.i 999
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
{ .mfb
Packit 6c4009
      nop.m 999
Packit 6c4009
(p9)  fadd.s0 Result = Result, Result_lo      // Compute complete result
Packit 6c4009
      br.ret.spnt   b0                        // Exit for y not inf, x=0,inf
Packit 6c4009
}
Packit 6c4009
;;
Packit 6c4009
Packit 6c4009
GLOBAL_IEEE754_END(atan2l)
Packit 6c4009
libm_alias_ldouble_other (__atan2, atan2)
Packit 6c4009
Packit 6c4009
LOCAL_LIBM_ENTRY(__libm_error_region)
Packit 6c4009
.prologue
Packit 6c4009
{ .mfi
Packit 6c4009
        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
Packit 6c4009
        nop.f 0
Packit 6c4009
.save   ar.pfs,GR_SAVE_PFS
Packit 6c4009
        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
Packit 6c4009
}
Packit 6c4009
{ .mfi
Packit 6c4009
.fframe 64
Packit 6c4009
        add sp=-64,sp                           // Create new stack
Packit 6c4009
        nop.f 0
Packit 6c4009
        mov GR_SAVE_GP=gp                       // Save gp
Packit 6c4009
};;
Packit 6c4009
{ .mmi
Packit 6c4009
        stfe [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
Packit 6c4009
        add GR_Parameter_X = 16,sp              // Parameter 1 address
Packit 6c4009
.save   b0, GR_SAVE_B0
Packit 6c4009
        mov GR_SAVE_B0=b0                       // Save b0
Packit 6c4009
};;
Packit 6c4009
.body
Packit 6c4009
{ .mib
Packit 6c4009
        stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
Packit 6c4009
        add   GR_Parameter_RESULT = 0,GR_Parameter_Y
Packit 6c4009
        nop.b 0                                 // Parameter 3 address
Packit 6c4009
}
Packit 6c4009
{ .mib
Packit 6c4009
        stfe [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
Packit 6c4009
        add   GR_Parameter_Y = -16,GR_Parameter_Y
Packit 6c4009
        br.call.sptk b0=__libm_error_support#  // Call error handling function
Packit 6c4009
};;
Packit 6c4009
{ .mmi
Packit 6c4009
        nop.m 0
Packit 6c4009
        nop.m 0
Packit 6c4009
        add   GR_Parameter_RESULT = 48,sp
Packit 6c4009
};;
Packit 6c4009
{ .mmi
Packit 6c4009
        ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
Packit 6c4009
.restore sp
Packit 6c4009
        add   sp = 64,sp                       // Restore stack pointer
Packit 6c4009
        mov   b0 = GR_SAVE_B0                  // Restore return address
Packit 6c4009
};;
Packit 6c4009
{ .mib
Packit 6c4009
        mov   gp = GR_SAVE_GP                  // Restore gp
Packit 6c4009
        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
Packit 6c4009
        br.ret.sptk     b0                     // Return
Packit 6c4009
};;
Packit 6c4009
Packit 6c4009
LOCAL_LIBM_END(__libm_error_region#)
Packit 6c4009
.type   __libm_error_support#,@function
Packit 6c4009
.global __libm_error_support#