Blame sysdeps/i386/fpu/s_cbrtl.S

Packit 6c4009
/* Compute cubic root of long double value.
Packit 6c4009
   Copyright (C) 1997-2018 Free Software Foundation, Inc.
Packit 6c4009
   This file is part of the GNU C Library.
Packit 6c4009
   Contributed by Dirk Alboth <dirka@uni-paderborn.de> and
Packit 6c4009
   Ulrich Drepper <drepper@cygnus.com>, 1997.
Packit 6c4009
Packit 6c4009
   The GNU C Library is free software; you can redistribute it and/or
Packit 6c4009
   modify it under the terms of the GNU Lesser General Public
Packit 6c4009
   License as published by the Free Software Foundation; either
Packit 6c4009
   version 2.1 of the License, or (at your option) any later version.
Packit 6c4009
Packit 6c4009
   The GNU C Library is distributed in the hope that it will be useful,
Packit 6c4009
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 6c4009
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 6c4009
   Lesser General Public License for more details.
Packit 6c4009
Packit 6c4009
   You should have received a copy of the GNU Lesser General Public
Packit 6c4009
   License along with the GNU C Library; if not, see
Packit 6c4009
   <http://www.gnu.org/licenses/>.  */
Packit 6c4009
Packit 6c4009
#include <libm-alias-ldouble.h>
Packit 6c4009
#include <machine/asm.h>
Packit 6c4009
Packit 6c4009
        .section .rodata
Packit 6c4009
Packit 6c4009
        .align ALIGNARG(4)
Packit 6c4009
        .type f8,@object
Packit 6c4009
f8:	.tfloat 0.161617097923756032
Packit 6c4009
	ASM_SIZE_DIRECTIVE(f8)
Packit 6c4009
        .align ALIGNARG(4)
Packit 6c4009
        .type f7,@object
Packit 6c4009
f7:	.tfloat -0.988553671195413709
Packit 6c4009
	ASM_SIZE_DIRECTIVE(f7)
Packit 6c4009
        .align ALIGNARG(4)
Packit 6c4009
        .type f6,@object
Packit 6c4009
f6:	.tfloat 2.65298938441952296
Packit 6c4009
	ASM_SIZE_DIRECTIVE(f6)
Packit 6c4009
        .align ALIGNARG(4)
Packit 6c4009
        .type f5,@object
Packit 6c4009
f5:	.tfloat -4.11151425200350531
Packit 6c4009
	ASM_SIZE_DIRECTIVE(f5)
Packit 6c4009
        .align ALIGNARG(4)
Packit 6c4009
        .type f4,@object
Packit 6c4009
f4:	.tfloat 4.09559907378707839
Packit 6c4009
	ASM_SIZE_DIRECTIVE(f4)
Packit 6c4009
        .align ALIGNARG(4)
Packit 6c4009
        .type f3,@object
Packit 6c4009
f3:	.tfloat -2.82414939754975962
Packit 6c4009
	ASM_SIZE_DIRECTIVE(f3)
Packit 6c4009
        .align ALIGNARG(4)
Packit 6c4009
        .type f2,@object
Packit 6c4009
f2:	.tfloat 1.67595307700780102
Packit 6c4009
	ASM_SIZE_DIRECTIVE(f2)
Packit 6c4009
        .align ALIGNARG(4)
Packit 6c4009
        .type f1,@object
Packit 6c4009
f1:	.tfloat 0.338058687610520237
Packit 6c4009
	ASM_SIZE_DIRECTIVE(f1)
Packit 6c4009
Packit 6c4009
#define CBRT2		1.2599210498948731648
Packit 6c4009
#define ONE_CBRT2	0.793700525984099737355196796584
Packit 6c4009
#define SQR_CBRT2	1.5874010519681994748
Packit 6c4009
#define ONE_SQR_CBRT2	0.629960524947436582364439673883
Packit 6c4009
Packit 6c4009
	/* We make the entries in the following table all 16 bytes
Packit 6c4009
	   wide to avoid having to implement a multiplication by 10.  */
Packit 6c4009
	.type factor,@object
Packit 6c4009
        .align ALIGNARG(4)
Packit 6c4009
factor:	.tfloat ONE_SQR_CBRT2
Packit 6c4009
	.byte 0, 0, 0, 0, 0, 0
Packit 6c4009
	.tfloat ONE_CBRT2
Packit 6c4009
	.byte 0, 0, 0, 0, 0, 0
Packit 6c4009
	.tfloat 1.0
Packit 6c4009
	.byte 0, 0, 0, 0, 0, 0
Packit 6c4009
	.tfloat CBRT2
Packit 6c4009
	.byte 0, 0, 0, 0, 0, 0
Packit 6c4009
	.tfloat SQR_CBRT2
Packit 6c4009
	ASM_SIZE_DIRECTIVE(factor)
Packit 6c4009
Packit 6c4009
        .type two64,@object
Packit 6c4009
        .align ALIGNARG(4)
Packit 6c4009
two64:  .byte 0, 0, 0, 0, 0, 0, 0xf0, 0x43
Packit 6c4009
        ASM_SIZE_DIRECTIVE(two64)
Packit 6c4009
Packit 6c4009
#ifdef PIC
Packit 6c4009
#define MO(op) op##@GOTOFF(%ebx)
Packit 6c4009
#define MOX(op,x) op##@GOTOFF(%ebx,x,1)
Packit 6c4009
#else
Packit 6c4009
#define MO(op) op
Packit 6c4009
#define MOX(op,x) op(x)
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
	.text
Packit 6c4009
ENTRY(__cbrtl)
Packit 6c4009
	movl	4(%esp), %ecx
Packit 6c4009
	movl	12(%esp), %eax
Packit 6c4009
	orl	8(%esp), %ecx
Packit 6c4009
	movl	%eax, %edx
Packit 6c4009
	andl	$0x7fff, %eax
Packit 6c4009
	orl	%eax, %ecx
Packit 6c4009
	jz	1f
Packit 6c4009
	xorl	%ecx, %ecx
Packit 6c4009
	cmpl	$0x7fff, %eax
Packit 6c4009
	je	1f
Packit 6c4009
Packit 6c4009
#ifdef PIC
Packit 6c4009
	pushl	%ebx
Packit 6c4009
	cfi_adjust_cfa_offset (4)
Packit 6c4009
	cfi_rel_offset (ebx, 0)
Packit 6c4009
	LOAD_PIC_REG (bx)
Packit 6c4009
#endif
Packit 6c4009
Packit 6c4009
	cmpl	$0, %eax
Packit 6c4009
	jne	2f
Packit 6c4009
Packit 6c4009
#ifdef PIC
Packit 6c4009
	fldt	8(%esp)
Packit 6c4009
#else
Packit 6c4009
	fldt	4(%esp)
Packit 6c4009
#endif
Packit 6c4009
	fmull	MO(two64)
Packit 6c4009
	movl	$-64, %ecx
Packit 6c4009
#ifdef PIC
Packit 6c4009
	fstpt	8(%esp)
Packit 6c4009
	movl	16(%esp), %eax
Packit 6c4009
#else
Packit 6c4009
	fstpt	4(%esp)
Packit 6c4009
	movl	12(%esp), %eax
Packit 6c4009
#endif
Packit 6c4009
	movl	%eax, %edx
Packit 6c4009
	andl	$0x7fff, %eax
Packit 6c4009
Packit 6c4009
2:	andl	$0x8000, %edx
Packit 6c4009
	subl	$16382, %eax
Packit 6c4009
	orl	$0x3ffe, %edx
Packit 6c4009
	addl	%eax, %ecx
Packit 6c4009
#ifdef PIC
Packit 6c4009
	movl	%edx, 16(%esp)
Packit 6c4009
Packit 6c4009
	fldt	8(%esp)			/* xm */
Packit 6c4009
#else
Packit 6c4009
	movl	%edx, 12(%esp)
Packit 6c4009
Packit 6c4009
	fldt	4(%esp)			/* xm */
Packit 6c4009
#endif
Packit 6c4009
	fabs
Packit 6c4009
Packit 6c4009
	/* The following code has two tracks:
Packit 6c4009
	    a) compute the normalized cbrt value
Packit 6c4009
	    b) compute xe/3 and xe%3
Packit 6c4009
	   The right track computes the value for b) and this is done
Packit 6c4009
	   in an optimized way by avoiding division.
Packit 6c4009
Packit 6c4009
	   But why two tracks at all?  Very easy: efficiency.  Some FP
Packit 6c4009
	   instruction can overlap with a certain amount of integer (and
Packit 6c4009
	   FP) instructions.  So we get (except for the imull) all
Packit 6c4009
	   instructions for free.  */
Packit 6c4009
Packit 6c4009
	fldt	MO(f8)			/* f8 : xm */
Packit 6c4009
	fmul	%st(1)			/* f8*xm : xm */
Packit 6c4009
Packit 6c4009
	fldt	MO(f7)
Packit 6c4009
	faddp				/* f7+f8*xm : xm */
Packit 6c4009
	fmul	%st(1)			/* (f7+f8*xm)*xm : xm */
Packit 6c4009
			movl	$1431655766, %eax
Packit 6c4009
	fldt	MO(f6)
Packit 6c4009
	faddp				/* f6+(f7+f8*xm)*xm : xm */
Packit 6c4009
			imull	%ecx
Packit 6c4009
	fmul	%st(1)			/* (f6+(f7+f8*xm)*xm)*xm : xm */
Packit 6c4009
			movl	%ecx, %eax
Packit 6c4009
	fldt	MO(f5)
Packit 6c4009
	faddp				/* f5+(f6+(f7+f8*xm)*xm)*xm : xm */
Packit 6c4009
			sarl	$31, %eax
Packit 6c4009
	fmul	%st(1)			/* (f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */
Packit 6c4009
			subl	%eax, %edx
Packit 6c4009
	fldt	MO(f4)
Packit 6c4009
	faddp				/* f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm : xm */
Packit 6c4009
	fmul	%st(1)			/* (f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */
Packit 6c4009
	fldt	MO(f3)
Packit 6c4009
	faddp				/* f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm : xm */
Packit 6c4009
	fmul	%st(1)			/* (f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */
Packit 6c4009
	fldt	MO(f2)
Packit 6c4009
	faddp				/* f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm : xm */
Packit 6c4009
	fmul	%st(1)			/* (f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */
Packit 6c4009
	fldt	MO(f1)
Packit 6c4009
	faddp				/* u:=f1+(f2+(f3+(f4+(f5+(f6+(f7+f8*xm)*xm)*xm)*xm)*xm)*xm)*xm : xm */
Packit 6c4009
Packit 6c4009
	fld	%st			/* u : u : xm */
Packit 6c4009
	fmul	%st(1)			/* u*u : u : xm */
Packit 6c4009
	fld	%st(2)			/* xm : u*u : u : xm */
Packit 6c4009
	fadd	%st			/* 2*xm : u*u : u : xm */
Packit 6c4009
	fxch	%st(1)			/* u*u : 2*xm : u : xm */
Packit 6c4009
	fmul	%st(2)			/* t2:=u*u*u : 2*xm : u : xm */
Packit 6c4009
			movl	%edx, %eax
Packit 6c4009
	fadd	%st, %st(1)		/* t2 : t2+2*xm : u : xm */
Packit 6c4009
			leal	(%edx,%edx,2),%edx
Packit 6c4009
	fadd	%st(0)			/* 2*t2 : t2+2*xm : u : xm */
Packit 6c4009
			subl	%edx, %ecx
Packit 6c4009
	faddp	%st, %st(3)		/* t2+2*xm : u : 2*t2+xm */
Packit 6c4009
			shll	$4, %ecx
Packit 6c4009
	fmulp				/* u*(t2+2*xm) : 2*t2+xm */
Packit 6c4009
	fdivp	%st, %st(1)		/* u*(t2+2*xm)/(2*t2+xm) */
Packit 6c4009
	fldt	MOX(32+factor,%ecx)
Packit 6c4009
	fmulp				/* u*(t2+2*xm)/(2*t2+xm)*FACT */
Packit 6c4009
	pushl	%eax
Packit 6c4009
	cfi_adjust_cfa_offset (4)
Packit 6c4009
	fildl	(%esp)			/* xe/3 : u*(t2+2*xm)/(2*t2+xm)*FACT */
Packit 6c4009
	fxch				/* u*(t2+2*xm)/(2*t2+xm)*FACT : xe/3 */
Packit 6c4009
	fscale				/* u*(t2+2*xm)/(2*t2+xm)*FACT*2^xe/3 */
Packit 6c4009
	popl	%edx
Packit 6c4009
	cfi_adjust_cfa_offset (-4)
Packit 6c4009
#ifdef PIC
Packit 6c4009
	movl	16(%esp), %eax
Packit 6c4009
	popl	%ebx
Packit 6c4009
	cfi_adjust_cfa_offset (-4)
Packit 6c4009
	cfi_restore (ebx)
Packit 6c4009
#else
Packit 6c4009
	movl	12(%esp), %eax
Packit 6c4009
#endif
Packit 6c4009
	testl	$0x8000, %eax
Packit 6c4009
	fstp	%st(1)
Packit 6c4009
	jz	4f
Packit 6c4009
	fchs
Packit 6c4009
4:	ret
Packit 6c4009
Packit 6c4009
	/* Return the argument.  */
Packit 6c4009
1:	fldt	4(%esp)
Packit 6c4009
	fadd	%st
Packit 6c4009
	ret
Packit 6c4009
END(__cbrtl)
Packit 6c4009
libm_alias_ldouble (__cbrt, cbrt)