Blame sysdeps/i386/fpu/s_cbrtf.S

Packit 6c4009
/* Compute cubic root of float value.
Packit 6c4009
   Copyright (C) 1997-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
Packit 6c4009
   Ulrich Drepper <drepper@cygnus.com>, 1997.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <machine/asm.h>
Packit 6c4009
#include <libm-alias-float.h>
Packit 6c4009
Packit 6c4009
        .section .rodata
Packit 6c4009
Packit 6c4009
        .align ALIGNARG(4)
Packit 6c4009
        .type f3,@object
Packit 6c4009
f3:	.double 0.191502161678719066
Packit 6c4009
        ASM_SIZE_DIRECTIVE(f3)
Packit 6c4009
        .type f2,@object
Packit 6c4009
f2:	.double 0.697570460207922770
Packit 6c4009
        ASM_SIZE_DIRECTIVE(f2)
Packit 6c4009
        .type f1,@object
Packit 6c4009
f1:	.double 0.492659620528969547
Packit 6c4009
        ASM_SIZE_DIRECTIVE(f1)
Packit 6c4009
Packit 6c4009
#define CBRT2		1.2599210498948731648
Packit 6c4009
#define ONE_CBRT2	0.793700525984099737355196796584
Packit 6c4009
#define SQR_CBRT2	1.5874010519681994748
Packit 6c4009
#define ONE_SQR_CBRT2	0.629960524947436582364439673883
Packit 6c4009
Packit 6c4009
	.type factor,@object
Packit 6c4009
        .align ALIGNARG(4)
Packit 6c4009
factor:	.double ONE_SQR_CBRT2
Packit 6c4009
	.double ONE_CBRT2
Packit 6c4009
	.double 1.0
Packit 6c4009
	.double CBRT2
Packit 6c4009
	.double SQR_CBRT2
Packit 6c4009
	ASM_SIZE_DIRECTIVE(factor)
Packit 6c4009
Packit 6c4009
        .type two25,@object
Packit 6c4009
two25:	.byte 0, 0, 0, 0x4c
Packit 6c4009
        ASM_SIZE_DIRECTIVE(two25)
Packit 6c4009
Packit 6c4009
#ifdef PIC
Packit 6c4009
#define MO(op) op##@GOTOFF(%ebx)
Packit 6c4009
#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
Packit 6c4009
#else
Packit 6c4009
#define MO(op) op
Packit 6c4009
#define MOX(op,x) op(x)
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
ENTRY(__cbrtf)
Packit 6c4009
	movl	4(%esp), %eax
Packit 6c4009
	xorl	%ecx, %ecx
Packit 6c4009
	movl	%eax, %edx
Packit 6c4009
	andl	$0x7fffffff, %eax
Packit 6c4009
	jz	1f
Packit 6c4009
	cmpl	$0x7f800000, %eax
Packit 6c4009
	jae	1f
Packit 6c4009
Packit 6c4009
#ifdef PIC
Packit 6c4009
	pushl	%ebx
Packit 6c4009
	cfi_adjust_cfa_offset (4)
Packit 6c4009
	cfi_rel_offset (ebx, 0)
Packit 6c4009
	LOAD_PIC_REG (bx)
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
	cmpl	$0x00800000, %eax
Packit 6c4009
	jae	2f
Packit 6c4009
Packit 6c4009
#ifdef PIC
Packit 6c4009
	flds	8(%esp)
Packit 6c4009
#else
Packit 6c4009
	flds	4(%esp)
Packit 6c4009
#endif
Packit 6c4009
	fmuls	MO(two25)
Packit 6c4009
	movl	$-25, %ecx
Packit 6c4009
#ifdef PIC
Packit 6c4009
	fstps	8(%esp)
Packit 6c4009
	movl	8(%esp), %eax
Packit 6c4009
#else
Packit 6c4009
	fstps	4(%esp)
Packit 6c4009
	movl	4(%esp), %eax
Packit 6c4009
#endif
Packit 6c4009
	movl	%eax, %edx
Packit 6c4009
	andl	$0x7fffffff, %eax
Packit 6c4009
Packit 6c4009
2:	shrl	$23, %eax
Packit 6c4009
	andl	$0x807fffff, %edx
Packit 6c4009
	subl	$126, %eax
Packit 6c4009
	orl	$0x3f000000, %edx
Packit 6c4009
	addl	%eax, %ecx
Packit 6c4009
#ifdef PIC
Packit 6c4009
	movl	%edx, 8(%esp)
Packit 6c4009
Packit 6c4009
	flds	8(%esp)			/* xm */
Packit 6c4009
#else
Packit 6c4009
	movl	%edx, 4(%esp)
Packit 6c4009
Packit 6c4009
	flds	4(%esp)			/* xm */
Packit 6c4009
#endif
Packit 6c4009
	fabs
Packit 6c4009
Packit 6c4009
	/* The following code has two tracks:
Packit 6c4009
	    a) compute the normalized cbrt value
Packit 6c4009
	    b) compute xe/3 and xe%3
Packit 6c4009
	   The right track computes the value for b) and this is done
Packit 6c4009
	   in an optimized way by avoiding division.
Packit 6c4009
Packit 6c4009
	   But why two tracks at all?  Very easy: efficiency.  Some FP
Packit 6c4009
	   instruction can overlap with a certain amount of integer (and
Packit 6c4009
	   FP) instructions.  So we get (except for the imull) all
Packit 6c4009
	   instructions for free.  */
Packit 6c4009
Packit 6c4009
	fld	%st(0)			/* xm : xm */
Packit 6c4009
	fmull	MO(f3)			/* f3*xm : xm */
Packit 6c4009
			movl	$1431655766, %eax
Packit 6c4009
	fsubrl	MO(f2)			/* f2-f3*xm : xm */
Packit 6c4009
			imull	%ecx
Packit 6c4009
	fmul	%st(1)			/* (f2-f3*xm)*xm : xm */
Packit 6c4009
			movl	%ecx, %eax
Packit 6c4009
	faddl	MO(f1)			/* u:=f1+(f2-f3*xm)*xm : xm */
Packit 6c4009
			sarl	$31, %eax
Packit 6c4009
	fld	%st			/* u : u : xm */
Packit 6c4009
			subl	%eax, %edx
Packit 6c4009
	fmul	%st(1)			/* u*u : u : xm */
Packit 6c4009
	fld	%st(2)			/* xm : u*u : u : xm */
Packit 6c4009
	fadd	%st			/* 2*xm : u*u : u : xm */
Packit 6c4009
	fxch	%st(1)			/* u*u : 2*xm : u : xm */
Packit 6c4009
	fmul	%st(2)			/* t2:=u*u*u : 2*xm : u : xm */
Packit 6c4009
			movl	%edx, %eax
Packit 6c4009
	fadd	%st, %st(1)		/* t2 : t2+2*xm : u : xm */
Packit 6c4009
			leal	(%edx,%edx,2),%edx
Packit 6c4009
	fadd	%st(0)			/* 2*t2 : t2+2*xm : u : xm */
Packit 6c4009
			subl	%edx, %ecx
Packit 6c4009
	faddp	%st, %st(3)		/* t2+2*xm : u : 2*t2+xm */
Packit 6c4009
			shll	$3, %ecx
Packit 6c4009
	fmulp				/* u*(t2+2*xm) : 2*t2+xm */
Packit 6c4009
	fdivp	%st, %st(1)		/* u*(t2+2*xm)/(2*t2+xm) */
Packit 6c4009
	fmull	MOX(16+factor,%ecx)	/* u*(t2+2*xm)/(2*t2+xm)*FACT */
Packit 6c4009
	pushl	%eax
Packit 6c4009
	cfi_adjust_cfa_offset (4)
Packit 6c4009
	fildl	(%esp)			/* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
Packit 6c4009
	fxch				/* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
Packit 6c4009
	fscale				/* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
Packit 6c4009
	popl	%edx
Packit 6c4009
	cfi_adjust_cfa_offset (-4)
Packit 6c4009
#ifdef PIC
Packit 6c4009
	movl	8(%esp), %eax
Packit 6c4009
	popl	%ebx
Packit 6c4009
	cfi_adjust_cfa_offset (-4)
Packit 6c4009
	cfi_restore (ebx)
Packit 6c4009
#else
Packit 6c4009
	movl	4(%esp), %eax
Packit 6c4009
#endif
Packit 6c4009
	testl	%eax, %eax
Packit 6c4009
	fstp	%st(1)
Packit 6c4009
	jns	4f
Packit 6c4009
	fchs
Packit 6c4009
4:	ret
Packit 6c4009
Packit 6c4009
	/* Return the argument.  */
Packit 6c4009
1:	flds	4(%esp)
Packit 6c4009
	ret
Packit 6c4009
END(__cbrtf)
Packit 6c4009
libm_alias_float (__cbrt, cbrt)