Tree - source-git/glibc - CentOS Git server

source-git / glibc

Blame sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c

Blob History Raw

Packit	6c4009	`/* Compute sine and cosine of argument optimized with vector.`
Packit	6c4009	`Copyright (C) 2017 Free Software Foundation, Inc.`
Packit	6c4009	`This file is part of the GNU C Library.`
Packit	6c4009
Packit	6c4009	`The GNU C Library is free software; you can redistribute it and/or`
Packit	6c4009	`modify it under the terms of the GNU Lesser General Public`
Packit	6c4009	`License as published by the Free Software Foundation; either`
Packit	6c4009	`version 2.1 of the License, or (at your option) any later version.`
Packit	6c4009
Packit	6c4009	`The GNU C Library is distributed in the hope that it will be useful,`
Packit	6c4009	`but WITHOUT ANY WARRANTY; without even the implied warranty of`
Packit	6c4009	`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
Packit	6c4009	`Lesser General Public License for more details.`
Packit	6c4009
Packit	6c4009	`You should have received a copy of the GNU Lesser General Public`
Packit	6c4009	`License along with the GNU C Library; if not, see`
Packit	6c4009	`<http://www.gnu.org/licenses/>. */`
Packit	6c4009
Packit	6c4009	`#include <errno.h>`
Packit	6c4009	`#include <math.h>`
Packit	6c4009	`#include <math_private.h>`
Packit	6c4009	`#include <x86intrin.h>`
Packit	6c4009	`#include <libm-alias-float.h>`
Packit	6c4009	`#include "s_sincosf.h"`
Packit	6c4009
Packit	6c4009	`#define SINCOSF __sincosf_fma`
Packit	6c4009
Packit	6c4009	`#ifndef SINCOSF`
Packit	6c4009	`# define SINCOSF_FUNC __sincosf`
Packit	6c4009	`#else`
Packit	6c4009	`# define SINCOSF_FUNC SINCOSF`
Packit	6c4009	`#endif`
Packit	6c4009
Packit	6c4009	`/* Chebyshev constants for sin and cos, range -PI/4 - PI/4. */`
Packit	6c4009	`static const __v2df V0 = { -0x1.5555555551cd9p-3, -0x1.ffffffffe98aep-2};`
Packit	6c4009	`static const __v2df V1 = { 0x1.1111110c2688bp-7, 0x1.55555545c50c7p-5 };`
Packit	6c4009	`static const __v2df V2 = { -0x1.a019f8b4bd1f9p-13, -0x1.6c16b348b6874p-10 };`
Packit	6c4009	`static const __v2df V3 = { 0x1.71d7264e6b5b4p-19, 0x1.a00eb9ac43ccp-16 };`
Packit	6c4009	`static const __v2df V4 = { -0x1.a947e1674b58ap-26, -0x1.23c97dd8844d7p-22 };`
Packit	6c4009
Packit	6c4009	`/* Chebyshev constants for sin and cos, range 2^-27 - 2^-5. */`
Packit	6c4009	`static const __v2df VC0 = { -0x1.555555543d49dp-3, -0x1.fffffff5cc6fdp-2 };`
Packit	6c4009	`static const __v2df VC1 = { 0x1.110f475cec8c5p-7, 0x1.55514b178dac5p-5 };`
Packit	6c4009
Packit	6c4009	`static const __v2df v2ones = { 1.0, 1.0 };`
Packit	6c4009
Packit	6c4009	`/* Compute the sine and cosine values using Chebyshev polynomials where`
Packit	6c4009	`THETA is the range reduced absolute value of the input`
Packit	6c4009	`and it is less than Pi/4,`
Packit	6c4009	`N is calculated as trunc(\|x\|/(Pi/4)) + 1 and it is used to decide`
Packit	6c4009	`whether a sine or cosine approximation is more accurate and`
Packit	6c4009	`SIGNBIT is used to add the correct sign after the Chebyshev`
Packit	6c4009	`polynomial is computed. */`
Packit	6c4009	`static void`
Packit	6c4009	`reduced_sincos (const double theta, const unsigned int n,`
Packit	6c4009	`const unsigned int signbit, float sinx, float cosx)`
Packit	6c4009	`{`
Packit	6c4009	`__v2df v2x, v2sx, v2cx;`
Packit	6c4009	`const __v2df v2theta = { theta, theta };`
Packit	6c4009	`const __v2df v2theta2 = v2theta * v2theta;`
Packit	6c4009	`/* Here sinf() and cosf() are calculated using sin Chebyshev polynomial:`
Packit	6c4009	`x+x^3(S0+x^2(S1+x^2(S2+x^2(S3+x^2S4)))). /`
Packit	6c4009	`v2x = V3 + v2theta2 * V4; /* S3+x^2S4. /`
Packit	6c4009	`v2x = V2 + v2theta2 * v2x; /* S2+x^2(S3+x^2S4). */`
Packit	6c4009	`v2x = V1 + v2theta2 * v2x; /* S1+x^2(S2+x^2(S3+x^2S4)). /`
Packit	6c4009	`v2x = V0 + v2theta2 * v2x; /* S0+x^2(S1+x^2(S2+x^2(S3+x^2S4))). */`
Packit	6c4009	`v2x = v2theta2 * v2x;`
Packit	6c4009	`v2cx = v2ones + v2x;`
Packit	6c4009	`v2sx = v2theta + v2theta * v2x;`
Packit	6c4009	`/* We are operating on \|x\|, so we need to add back the original`
Packit	6c4009	`signbit for sinf. */`
Packit	6c4009	`/* Determine positive or negative primary interval. */`
Packit	6c4009	`/* Are we in the primary interval of sin or cos? */`
Packit	6c4009	`if ((n & 2) == 0)`
Packit	6c4009	`{`
Packit	6c4009	`const __v2df v2sign =`
Packit	6c4009	`{`
Packit	6c4009	`ones[((n >> 2) & 1) ^ signbit],`
Packit	6c4009	`ones[((n + 2) >> 2) & 1]`
Packit	6c4009	`};`
Packit	6c4009	`v2cx[0] = v2sx[0];`
Packit	6c4009	`v2cx *= v2sign;`
Packit	6c4009	`__v4sf v4sx = _mm_cvtpd_ps (v2cx);`
Packit	6c4009	`*sinx = v4sx[0];`
Packit	6c4009	`*cosx = v4sx[1];`
Packit	6c4009	`}`
Packit	6c4009	`else`
Packit	6c4009	`{`
Packit	6c4009	`const __v2df v2sign =`
Packit	6c4009	`{`
Packit	6c4009	`ones[((n + 2) >> 2) & 1],`
Packit	6c4009	`ones[((n >> 2) & 1) ^ signbit]`
Packit	6c4009	`};`
Packit	6c4009	`v2cx[0] = v2sx[0];`
Packit	6c4009	`v2cx *= v2sign;`
Packit	6c4009	`__v4sf v4sx = _mm_cvtpd_ps (v2cx);`
Packit	6c4009	`*sinx = v4sx[1];`
Packit	6c4009	`*cosx = v4sx[0];`
Packit	6c4009	`}`
Packit	6c4009	`}`
Packit	6c4009
Packit	6c4009	`void`
Packit	6c4009	`SINCOSF_FUNC (float x, float sinx, float cosx)`
Packit	6c4009	`{`
Packit	6c4009	`double theta = x;`
Packit	6c4009	`double abstheta = fabs (theta);`
Packit	6c4009	`uint32_t ix, xi;`
Packit	6c4009	`GET_FLOAT_WORD (xi, x);`
Packit	6c4009	`/* \|x\| */`
Packit	6c4009	`ix = xi & 0x7fffffff;`
Packit	6c4009	`/* If \|x\|< Pi/4. */`
Packit	6c4009	`if (ix < 0x3f490fdb)`
Packit	6c4009	`{`
Packit	6c4009	`if (ix >= 0x3d000000) /* \|x\| >= 2^-5. */`
Packit	6c4009	`{`
Packit	6c4009	`__v2df v2x, v2sx, v2cx;`
Packit	6c4009	`const __v2df v2theta = { theta, theta };`
Packit	6c4009	`const __v2df v2theta2 = v2theta * v2theta;`
Packit	6c4009	`/* Chebyshev polynomial of the form for sin and cos. */`
Packit	6c4009	`v2x = V3 + v2theta2 * V4;`
Packit	6c4009	`v2x = V2 + v2theta2 * v2x;`
Packit	6c4009	`v2x = V1 + v2theta2 * v2x;`
Packit	6c4009	`v2x = V0 + v2theta2 * v2x;`
Packit	6c4009	`v2x = v2theta2 * v2x;`
Packit	6c4009	`v2cx = v2ones + v2x;`
Packit	6c4009	`v2sx = v2theta + v2theta * v2x;`
Packit	6c4009	`v2cx[0] = v2sx[0];`
Packit	6c4009	`__v4sf v4sx = _mm_cvtpd_ps (v2cx);`
Packit	6c4009	`*sinx = v4sx[0];`
Packit	6c4009	`*cosx = v4sx[1];`
Packit	6c4009	`}`
Packit	6c4009	`else if (ix >= 0x32000000) /* \|x\| >= 2^-27. */`
Packit	6c4009	`{`
Packit	6c4009	`/* A simpler Chebyshev approximation is close enough for this range:`
Packit	6c4009	`for sin: x+x^3(SS0+x^2SS1)`
Packit	6c4009	`for cos: 1.0+x^2(CC0+x^3CC1). */`
Packit	6c4009	`__v2df v2x, v2sx, v2cx;`
Packit	6c4009	`const __v2df v2theta = { theta, theta };`
Packit	6c4009	`const __v2df v2theta2 = v2theta * v2theta;`
Packit	6c4009	`v2x = VC0 + v2theta * v2theta2 * VC1;`
Packit	6c4009	`v2x = v2theta2 * v2x;`
Packit	6c4009	`v2cx = v2ones + v2x;`
Packit	6c4009	`v2sx = v2theta + v2theta * v2x;`
Packit	6c4009	`v2cx[0] = v2sx[0];`
Packit	6c4009	`__v4sf v4sx = _mm_cvtpd_ps (v2cx);`
Packit	6c4009	`*sinx = v4sx[0];`
Packit	6c4009	`*cosx = v4sx[1];`
Packit	6c4009	`}`
Packit	6c4009	`else`
Packit	6c4009	`{`
Packit	6c4009	`/* Handle some special cases. */`
Packit	6c4009	`if (ix)`
Packit	6c4009	`sinx = theta - (theta SMALL);`
Packit	6c4009	`else`
Packit	6c4009	`*sinx = theta;`
Packit	6c4009	`*cosx = 1.0 - abstheta;`
Packit	6c4009	`}`
Packit	6c4009	`}`
Packit	6c4009	`else /* \|x\| >= Pi/4. */`
Packit	6c4009	`{`
Packit	6c4009	`unsigned int signbit = xi >> 31;`
Packit	6c4009	`if (ix < 0x40e231d6) /* \|x\| < 9Pi/4. /`
Packit	6c4009	`{`
Packit	6c4009	`/* There are cases where FE_UPWARD rounding mode can`
Packit	6c4009	`produce a result of abstheta * inv_PI_4 == 9,`
Packit	6c4009	`where abstheta < 9pi/4, so the domain for`
Packit	6c4009	`pio2_table must go to 5 (9 / 2 + 1). */`
Packit	6c4009	`unsigned int n = (abstheta * inv_PI_4) + 1;`
Packit	6c4009	`theta = abstheta - pio2_table[n / 2];`
Packit	6c4009	`reduced_sincos (theta, n, signbit, sinx, cosx);`
Packit	6c4009	`}`
Packit	6c4009	`else if (ix < 0x7f800000)`
Packit	6c4009	`{`
Packit	6c4009	`if (ix < 0x4b000000) /* \|x\| < 2^23. */`
Packit	6c4009	`{`
Packit	6c4009	`unsigned int n = ((unsigned int) (abstheta * inv_PI_4)) + 1;`
Packit	6c4009	`double x = n / 2;`
Packit	6c4009	`theta = (abstheta - x * PI_2_hi) - x * PI_2_lo;`
Packit	6c4009	`/* Argument reduction needed. */`
Packit	6c4009	`reduced_sincos (theta, n, signbit, sinx, cosx);`
Packit	6c4009	`}`
Packit	6c4009	`else /* \|x\| >= 2^23. */`
Packit	6c4009	`{`
Packit	6c4009	`x = fabsf (x);`
Packit	6c4009	`int exponent`
Packit	6c4009	`= (ix >> FLOAT_EXPONENT_SHIFT) - FLOAT_EXPONENT_BIAS;`
Packit	6c4009	`exponent += 3;`
Packit	6c4009	`exponent /= 28;`
Packit	6c4009	`double a = invpio4_table[exponent] * x;`
Packit	6c4009	`double b = invpio4_table[exponent + 1] * x;`
Packit	6c4009	`double c = invpio4_table[exponent + 2] * x;`
Packit	6c4009	`double d = invpio4_table[exponent + 3] * x;`
Packit	6c4009	`uint64_t l = a;`
Packit	6c4009	`l &= ~0x7;`
Packit	6c4009	`a -= l;`
Packit	6c4009	`double e = a + b;`
Packit	6c4009	`l = e;`
Packit	6c4009	`e = a - l;`
Packit	6c4009	`if (l & 1)`
Packit	6c4009	`{`
Packit	6c4009	`e -= 1.0;`
Packit	6c4009	`e += b;`
Packit	6c4009	`e += c;`
Packit	6c4009	`e += d;`
Packit	6c4009	`e *= M_PI_4;`
Packit	6c4009	`reduced_sincos (e, l + 1, signbit, sinx, cosx);`
Packit	6c4009	`}`
Packit	6c4009	`else`
Packit	6c4009	`{`
Packit	6c4009	`e += b;`
Packit	6c4009	`e += c;`
Packit	6c4009	`e += d;`
Packit	6c4009	`if (e <= 1.0)`
Packit	6c4009	`{`
Packit	6c4009	`e *= M_PI_4;`
Packit	6c4009	`reduced_sincos (e, l + 1, signbit, sinx, cosx);`
Packit	6c4009	`}`
Packit	6c4009	`else`
Packit	6c4009	`{`
Packit	6c4009	`l++;`
Packit	6c4009	`e -= 2.0;`
Packit	6c4009	`e *= M_PI_4;`
Packit	6c4009	`reduced_sincos (e, l + 1, signbit, sinx, cosx);`
Packit	6c4009	`}`
Packit	6c4009	`}`
Packit	6c4009	`}`
Packit	6c4009	`}`
Packit	6c4009	`else`
Packit	6c4009	`{`
Packit	6c4009	`if (ix == 0x7f800000)`
Packit	6c4009	`__set_errno (EDOM);`
Packit	6c4009	`/* sin/cos(Inf or NaN) is NaN. */`
Packit	6c4009	`sinx = cosx = x - x;`
Packit	6c4009	`}`
Packit	6c4009	`}`
Packit	6c4009	`}`
Packit	6c4009
Packit	6c4009	`#ifndef SINCOSF`
Packit	6c4009	`libm_alias_float (__sincos, sincos)`
Packit	6c4009	`#endif`

source-git / glibc

Source Code

Blame sysdeps/x86_64/fpu/multiarch/s_sincosf-fma.c