Blame src/libmpg123/dct36_3dnow.S

Packit c32a2d
/*
Packit c32a2d
	dct64_3dnow.s: Replacement of dct36() with AMD's 3DNow! SIMD operations support
Packit c32a2d
Packit c32a2d
	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
Packit c32a2d
	see COPYING and AUTHORS files in distribution or http://mpg123.org
Packit c32a2d
	initially written by Syuuhei Kashiyama
Packit c32a2d
Packit c32a2d
	This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
Packit c32a2d
	<squash@mb.kcom.ne.jp>,only two types of changes have been made:
Packit c32a2d
Packit c32a2d
	- remove PREFETCH instruction for speedup
Packit c32a2d
	- change function name for support 3DNow! automatic detect
Packit c32a2d
Packit c32a2d
	You can find Kashiyama's original 3dnow! support patch
Packit c32a2d
	(for mpg123-0.59o) at
Packit c32a2d
	http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
Packit c32a2d
Packit c32a2d
	by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
Packit c32a2d
	                   <kim@comtec.co.jp>               - after  1.Apr.1999
Packit c32a2d
Packit c32a2d
	Replacement of dct36() with AMD's 3DNow! SIMD operations support
Packit c32a2d
Packit c32a2d
	Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
Packit c32a2d
Packit c32a2d
	The author of this program disclaim whole expressed or implied
Packit c32a2d
	warranties with regard to this program, and in no event shall the
Packit c32a2d
	author of this program liable to whatever resulted from the use of
Packit c32a2d
	this program. Use it at your own risk.
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#include "mangle.h"
Packit c32a2d
Packit c32a2d
	.globl ASM_NAME(dct36_3dnow)
Packit c32a2d
/*	.type	 ASM_NAME(dct36_3dnow),@function */
Packit c32a2d
ASM_NAME(dct36_3dnow):
Packit c32a2d
	pushl %ebp
Packit c32a2d
	movl %esp,%ebp
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	sub $4,%esp
Packit c32a2d
#endif
Packit c32a2d
	pushl %esi
Packit c32a2d
	pushl %ebx
Packit c32a2d
Packit c32a2d
	#undef _EBX_
Packit c32a2d
	#define _EBX_ %edi
Packit c32a2d
	PREPARE_GOT
Packit c32a2d
	GET_GOT
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	#define _COS9_ 0(%edi)
Packit c32a2d
	#define _tfcos36_ 0(%eax)
Packit c32a2d
	mov GLOBAL_VAR_PTR(tfcos36), %eax
Packit c32a2d
	mov GLOBAL_VAR_PTR(COS9), %edi
Packit c32a2d
	mov %eax, -4(%ebp)
Packit c32a2d
#else
Packit c32a2d
	#define _COS9_ GLOBAL_VAR(COS9)
Packit c32a2d
	#define _tfcos36_ GLOBAL_VAR(tfcos36)
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
	movl 8(%ebp),%eax
Packit c32a2d
	movl 12(%ebp),%esi
Packit c32a2d
	movl 16(%ebp),%ecx
Packit c32a2d
	movl 20(%ebp),%edx
Packit c32a2d
	movl 24(%ebp),%ebx
Packit c32a2d
Packit c32a2d
	femms
Packit c32a2d
	movq (%eax),%mm0
Packit c32a2d
	movq 4(%eax),%mm1
Packit c32a2d
	pfadd %mm1,%mm0
Packit c32a2d
	movq %mm0,4(%eax)
Packit c32a2d
	psrlq $32,%mm1
Packit c32a2d
	movq 12(%eax),%mm2
Packit c32a2d
	punpckldq %mm2,%mm1
Packit c32a2d
	pfadd %mm2,%mm1
Packit c32a2d
	movq %mm1,12(%eax)
Packit c32a2d
	psrlq $32,%mm2
Packit c32a2d
	movq 20(%eax),%mm3
Packit c32a2d
	punpckldq %mm3,%mm2
Packit c32a2d
	pfadd %mm3,%mm2
Packit c32a2d
	movq %mm2,20(%eax)
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	movq 28(%eax),%mm4
Packit c32a2d
	punpckldq %mm4,%mm3
Packit c32a2d
	pfadd %mm4,%mm3
Packit c32a2d
	movq %mm3,28(%eax)
Packit c32a2d
	psrlq $32,%mm4
Packit c32a2d
	movq 36(%eax),%mm5
Packit c32a2d
	punpckldq %mm5,%mm4
Packit c32a2d
	pfadd %mm5,%mm4
Packit c32a2d
	movq %mm4,36(%eax)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movq 44(%eax),%mm6
Packit c32a2d
	punpckldq %mm6,%mm5
Packit c32a2d
	pfadd %mm6,%mm5
Packit c32a2d
	movq %mm5,44(%eax)
Packit c32a2d
	psrlq $32,%mm6
Packit c32a2d
	movq 52(%eax),%mm7
Packit c32a2d
	punpckldq %mm7,%mm6
Packit c32a2d
	pfadd %mm7,%mm6
Packit c32a2d
	movq %mm6,52(%eax)
Packit c32a2d
	psrlq $32,%mm7
Packit c32a2d
	movq 60(%eax),%mm0
Packit c32a2d
	punpckldq %mm0,%mm7
Packit c32a2d
	pfadd %mm0,%mm7
Packit c32a2d
	movq %mm7,60(%eax)
Packit c32a2d
	psrlq $32,%mm0
Packit c32a2d
	movd 68(%eax),%mm1
Packit c32a2d
	pfadd %mm1,%mm0
Packit c32a2d
	movd %mm0,68(%eax)
Packit c32a2d
	movd 4(%eax),%mm0
Packit c32a2d
	movd 12(%eax),%mm1
Packit c32a2d
	punpckldq %mm1,%mm0
Packit c32a2d
	punpckldq 20(%eax),%mm1
Packit c32a2d
	pfadd %mm1,%mm0
Packit c32a2d
	movd %mm0,12(%eax)
Packit c32a2d
	psrlq $32,%mm0
Packit c32a2d
	movd %mm0,20(%eax)
Packit c32a2d
	psrlq $32,%mm1
Packit c32a2d
	movd 28(%eax),%mm2
Packit c32a2d
	punpckldq %mm2,%mm1
Packit c32a2d
	punpckldq 36(%eax),%mm2
Packit c32a2d
	pfadd %mm2,%mm1
Packit c32a2d
	movd %mm1,28(%eax)
Packit c32a2d
	psrlq $32,%mm1
Packit c32a2d
	movd %mm1,36(%eax)
Packit c32a2d
	psrlq $32,%mm2
Packit c32a2d
	movd 44(%eax),%mm3
Packit c32a2d
	punpckldq %mm3,%mm2
Packit c32a2d
	punpckldq 52(%eax),%mm3
Packit c32a2d
	pfadd %mm3,%mm2
Packit c32a2d
	movd %mm2,44(%eax)
Packit c32a2d
	psrlq $32,%mm2
Packit c32a2d
	movd %mm2,52(%eax)
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	movd 60(%eax),%mm4
Packit c32a2d
	punpckldq %mm4,%mm3
Packit c32a2d
	punpckldq 68(%eax),%mm4
Packit c32a2d
	pfadd %mm4,%mm3
Packit c32a2d
	movd %mm3,60(%eax)
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	movd %mm3,68(%eax)
Packit c32a2d
Packit c32a2d
	movq 24(%eax),%mm0
Packit c32a2d
	movq 48(%eax),%mm1
Packit c32a2d
	movd 12+_COS9_,%mm2
Packit c32a2d
	punpckldq %mm2,%mm2
Packit c32a2d
	movd 24+_COS9_,%mm3
Packit c32a2d
	punpckldq %mm3,%mm3
Packit c32a2d
	pfmul %mm2,%mm0
Packit c32a2d
	pfmul %mm3,%mm1
Packit c32a2d
	pushl %eax
Packit c32a2d
	movl $1,%eax
Packit c32a2d
	movd %eax,%mm7
Packit c32a2d
	pi2fd %mm7,%mm7
Packit c32a2d
	popl %eax
Packit c32a2d
	movq 8(%eax),%mm2
Packit c32a2d
	movd 4+_COS9_,%mm3
Packit c32a2d
	punpckldq %mm3,%mm3
Packit c32a2d
	pfmul %mm3,%mm2
Packit c32a2d
	pfadd %mm0,%mm2
Packit c32a2d
	movq 40(%eax),%mm3
Packit c32a2d
	movd 20+_COS9_,%mm4
Packit c32a2d
	punpckldq %mm4,%mm4
Packit c32a2d
	pfmul %mm4,%mm3
Packit c32a2d
	pfadd %mm3,%mm2
Packit c32a2d
	movq 56(%eax),%mm3
Packit c32a2d
	movd 28+_COS9_,%mm4
Packit c32a2d
	punpckldq %mm4,%mm4
Packit c32a2d
	pfmul %mm4,%mm3
Packit c32a2d
	pfadd %mm3,%mm2
Packit c32a2d
	movq (%eax),%mm3
Packit c32a2d
	movq 16(%eax),%mm4
Packit c32a2d
	movd 8+_COS9_,%mm5
Packit c32a2d
	punpckldq %mm5,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	pfadd %mm4,%mm3
Packit c32a2d
	movq 32(%eax),%mm4
Packit c32a2d
	movd 16+_COS9_,%mm5
Packit c32a2d
	punpckldq %mm5,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	pfadd %mm4,%mm3
Packit c32a2d
	pfadd %mm1,%mm3
Packit c32a2d
	movq 64(%eax),%mm4
Packit c32a2d
	movd 32+_COS9_,%mm5
Packit c32a2d
	punpckldq %mm5,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	pfadd %mm4,%mm3
Packit c32a2d
	movq %mm2,%mm4
Packit c32a2d
	pfadd %mm3,%mm4
Packit c32a2d
	movq %mm7,%mm5
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	mov -4(%ebp),%eax
Packit c32a2d
#endif
Packit c32a2d
	punpckldq 0+_tfcos36_,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfacc %mm5,%mm5
Packit c32a2d
	movd 108(%edx),%mm6
Packit c32a2d
	punpckldq 104(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd %mm5,36(%ecx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,32(%ecx)
Packit c32a2d
	movq %mm4,%mm6
Packit c32a2d
	punpckldq %mm6,%mm5
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	punpckhdq %mm5,%mm5
Packit c32a2d
	movd 32(%edx),%mm6
Packit c32a2d
	punpckldq 36(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd 32(%esi),%mm6
Packit c32a2d
	punpckldq 36(%esi),%mm6
Packit c32a2d
	pfadd %mm6,%mm5
Packit c32a2d
	movd %mm5,1024(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,1152(%ebx)
Packit c32a2d
	movq %mm3,%mm4
Packit c32a2d
	pfsub %mm2,%mm4
Packit c32a2d
	movq %mm7,%mm5
Packit c32a2d
	punpckldq 32+_tfcos36_,%mm5
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	mov 8(%ebp),%eax
Packit c32a2d
#endif
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfacc %mm5,%mm5
Packit c32a2d
	movd 140(%edx),%mm6
Packit c32a2d
	punpckldq 72(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd %mm5,68(%ecx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,0(%ecx)
Packit c32a2d
	movq %mm4,%mm6
Packit c32a2d
	punpckldq %mm6,%mm5
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	punpckhdq %mm5,%mm5
Packit c32a2d
	movd 0(%edx),%mm6
Packit c32a2d
	punpckldq 68(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd 0(%esi),%mm6
Packit c32a2d
	punpckldq 68(%esi),%mm6
Packit c32a2d
	pfadd %mm6,%mm5
Packit c32a2d
	movd %mm5,0(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,2176(%ebx)
Packit c32a2d
	movq 8(%eax),%mm2
Packit c32a2d
	movq 40(%eax),%mm3
Packit c32a2d
	pfsub %mm3,%mm2
Packit c32a2d
	movq 56(%eax),%mm3
Packit c32a2d
	pfsub %mm3,%mm2
Packit c32a2d
	movd 12+_COS9_,%mm3
Packit c32a2d
	punpckldq %mm3,%mm3
Packit c32a2d
	pfmul %mm3,%mm2
Packit c32a2d
	movq 16(%eax),%mm3
Packit c32a2d
	movq 32(%eax),%mm4
Packit c32a2d
	pfsub %mm4,%mm3
Packit c32a2d
	movq 64(%eax),%mm4
Packit c32a2d
	pfsub %mm4,%mm3
Packit c32a2d
	movd 24+_COS9_,%mm4
Packit c32a2d
	punpckldq %mm4,%mm4
Packit c32a2d
	pfmul %mm4,%mm3
Packit c32a2d
	movq 48(%eax),%mm4
Packit c32a2d
	pfsub %mm4,%mm3
Packit c32a2d
	movq (%eax),%mm4
Packit c32a2d
	pfadd %mm4,%mm3
Packit c32a2d
	movq %mm2,%mm4
Packit c32a2d
	pfadd %mm3,%mm4
Packit c32a2d
	movq %mm7,%mm5
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	mov -4(%ebp),%eax
Packit c32a2d
#endif
Packit c32a2d
	punpckldq 4+_tfcos36_,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfacc %mm5,%mm5
Packit c32a2d
	movd 112(%edx),%mm6
Packit c32a2d
	punpckldq 100(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd %mm5,40(%ecx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,28(%ecx)
Packit c32a2d
	movq %mm4,%mm6
Packit c32a2d
	punpckldq %mm6,%mm5
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	punpckhdq %mm5,%mm5
Packit c32a2d
	movd 28(%edx),%mm6
Packit c32a2d
	punpckldq 40(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd 28(%esi),%mm6
Packit c32a2d
	punpckldq 40(%esi),%mm6
Packit c32a2d
	pfadd %mm6,%mm5
Packit c32a2d
	movd %mm5,896(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,1280(%ebx)
Packit c32a2d
	movq %mm3,%mm4
Packit c32a2d
	pfsub %mm2,%mm4
Packit c32a2d
	movq %mm7,%mm5
Packit c32a2d
	punpckldq 28+_tfcos36_,%mm5
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	mov 8(%ebp),%eax
Packit c32a2d
#endif
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfacc %mm5,%mm5
Packit c32a2d
	movd 136(%edx),%mm6
Packit c32a2d
	punpckldq 76(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd %mm5,64(%ecx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,4(%ecx)
Packit c32a2d
	movq %mm4,%mm6
Packit c32a2d
	punpckldq %mm6,%mm5
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	punpckhdq %mm5,%mm5
Packit c32a2d
	movd 4(%edx),%mm6
Packit c32a2d
	punpckldq 64(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd 4(%esi),%mm6
Packit c32a2d
	punpckldq 64(%esi),%mm6
Packit c32a2d
	pfadd %mm6,%mm5
Packit c32a2d
	movd %mm5,128(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,2048(%ebx)
Packit c32a2d
Packit c32a2d
	movq 8(%eax),%mm2
Packit c32a2d
	movd 20+_COS9_,%mm3
Packit c32a2d
	punpckldq %mm3,%mm3
Packit c32a2d
	pfmul %mm3,%mm2
Packit c32a2d
	pfsub %mm0,%mm2
Packit c32a2d
	movq 40(%eax),%mm3
Packit c32a2d
	movd 28+_COS9_,%mm4
Packit c32a2d
	punpckldq %mm4,%mm4
Packit c32a2d
	pfmul %mm4,%mm3
Packit c32a2d
	pfsub %mm3,%mm2
Packit c32a2d
	movq 56(%eax),%mm3
Packit c32a2d
	movd 4+_COS9_,%mm4
Packit c32a2d
	punpckldq %mm4,%mm4
Packit c32a2d
	pfmul %mm4,%mm3
Packit c32a2d
	pfadd %mm3,%mm2
Packit c32a2d
	movq (%eax),%mm3
Packit c32a2d
	movq 16(%eax),%mm4
Packit c32a2d
	movd 32+_COS9_,%mm5
Packit c32a2d
	punpckldq %mm5,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	pfsub %mm4,%mm3
Packit c32a2d
	movq 32(%eax),%mm4
Packit c32a2d
	movd 8+_COS9_,%mm5
Packit c32a2d
	punpckldq %mm5,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	pfsub %mm4,%mm3
Packit c32a2d
	pfadd %mm1,%mm3
Packit c32a2d
	movq 64(%eax),%mm4
Packit c32a2d
	movd 16+_COS9_,%mm5
Packit c32a2d
	punpckldq %mm5,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	pfadd %mm4,%mm3
Packit c32a2d
	movq %mm2,%mm4
Packit c32a2d
	pfadd %mm3,%mm4
Packit c32a2d
	movq %mm7,%mm5
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	mov -4(%ebp),%eax
Packit c32a2d
#endif
Packit c32a2d
	punpckldq 8+_tfcos36_,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfacc %mm5,%mm5
Packit c32a2d
	movd 116(%edx),%mm6
Packit c32a2d
	punpckldq 96(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd %mm5,44(%ecx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,24(%ecx)
Packit c32a2d
	movq %mm4,%mm6
Packit c32a2d
	punpckldq %mm6,%mm5
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	punpckhdq %mm5,%mm5
Packit c32a2d
	movd 24(%edx),%mm6
Packit c32a2d
	punpckldq 44(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd 24(%esi),%mm6
Packit c32a2d
	punpckldq 44(%esi),%mm6
Packit c32a2d
	pfadd %mm6,%mm5
Packit c32a2d
	movd %mm5,768(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,1408(%ebx)
Packit c32a2d
	movq %mm3,%mm4
Packit c32a2d
	pfsub %mm2,%mm4
Packit c32a2d
	movq %mm7,%mm5
Packit c32a2d
	punpckldq 24+_tfcos36_,%mm5
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	mov 8(%ebp),%eax
Packit c32a2d
#endif
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfacc %mm5,%mm5
Packit c32a2d
	movd 132(%edx),%mm6
Packit c32a2d
	punpckldq 80(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd %mm5,60(%ecx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,8(%ecx)
Packit c32a2d
	movq %mm4,%mm6
Packit c32a2d
	punpckldq %mm6,%mm5
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	punpckhdq %mm5,%mm5
Packit c32a2d
	movd 8(%edx),%mm6
Packit c32a2d
	punpckldq 60(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd 8(%esi),%mm6
Packit c32a2d
	punpckldq 60(%esi),%mm6
Packit c32a2d
	pfadd %mm6,%mm5
Packit c32a2d
	movd %mm5,256(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,1920(%ebx)
Packit c32a2d
	movq 8(%eax),%mm2
Packit c32a2d
	movd 28+_COS9_,%mm3
Packit c32a2d
	punpckldq %mm3,%mm3
Packit c32a2d
	pfmul %mm3,%mm2
Packit c32a2d
	pfsub %mm0,%mm2
Packit c32a2d
	movq 40(%eax),%mm3
Packit c32a2d
	movd 4+_COS9_,%mm4
Packit c32a2d
	punpckldq %mm4,%mm4
Packit c32a2d
	pfmul %mm4,%mm3
Packit c32a2d
	pfadd %mm3,%mm2
Packit c32a2d
	movq 56(%eax),%mm3
Packit c32a2d
	movd 20+_COS9_,%mm4
Packit c32a2d
	punpckldq %mm4,%mm4
Packit c32a2d
	pfmul %mm4,%mm3
Packit c32a2d
	pfsub %mm3,%mm2
Packit c32a2d
	movq (%eax),%mm3
Packit c32a2d
	movq 16(%eax),%mm4
Packit c32a2d
	movd 16+_COS9_,%mm5
Packit c32a2d
	punpckldq %mm5,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	pfsub %mm4,%mm3
Packit c32a2d
	movq 32(%eax),%mm4
Packit c32a2d
	movd 32+_COS9_,%mm5
Packit c32a2d
	punpckldq %mm5,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	pfadd %mm4,%mm3
Packit c32a2d
	pfadd %mm1,%mm3
Packit c32a2d
	movq 64(%eax),%mm4
Packit c32a2d
	movd 8+_COS9_,%mm5
Packit c32a2d
	punpckldq %mm5,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	pfsub %mm4,%mm3
Packit c32a2d
	movq %mm2,%mm4
Packit c32a2d
	pfadd %mm3,%mm4
Packit c32a2d
	movq %mm7,%mm5
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	mov -4(%ebp),%eax
Packit c32a2d
#endif
Packit c32a2d
	punpckldq 12+_tfcos36_,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfacc %mm5,%mm5
Packit c32a2d
	movd 120(%edx),%mm6
Packit c32a2d
	punpckldq 92(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd %mm5,48(%ecx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,20(%ecx)
Packit c32a2d
	movq %mm4,%mm6
Packit c32a2d
	punpckldq %mm6,%mm5
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	punpckhdq %mm5,%mm5
Packit c32a2d
	movd 20(%edx),%mm6
Packit c32a2d
	punpckldq 48(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd 20(%esi),%mm6
Packit c32a2d
	punpckldq 48(%esi),%mm6
Packit c32a2d
	pfadd %mm6,%mm5
Packit c32a2d
	movd %mm5,640(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,1536(%ebx)
Packit c32a2d
	movq %mm3,%mm4
Packit c32a2d
	pfsub %mm2,%mm4
Packit c32a2d
	movq %mm7,%mm5
Packit c32a2d
	punpckldq 20+_tfcos36_,%mm5
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	mov 8(%ebp),%eax
Packit c32a2d
#endif
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfacc %mm5,%mm5
Packit c32a2d
	movd 128(%edx),%mm6
Packit c32a2d
	punpckldq 84(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd %mm5,56(%ecx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,12(%ecx)
Packit c32a2d
	movq %mm4,%mm6
Packit c32a2d
	punpckldq %mm6,%mm5
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	punpckhdq %mm5,%mm5
Packit c32a2d
	movd 12(%edx),%mm6
Packit c32a2d
	punpckldq 56(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd 12(%esi),%mm6
Packit c32a2d
	punpckldq 56(%esi),%mm6
Packit c32a2d
	pfadd %mm6,%mm5
Packit c32a2d
	movd %mm5,384(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,1792(%ebx)
Packit c32a2d
Packit c32a2d
	movq (%eax),%mm4
Packit c32a2d
	movq 16(%eax),%mm3
Packit c32a2d
	pfsub %mm3,%mm4
Packit c32a2d
	movq 32(%eax),%mm3
Packit c32a2d
	pfadd %mm3,%mm4
Packit c32a2d
	movq 48(%eax),%mm3
Packit c32a2d
	pfsub %mm3,%mm4
Packit c32a2d
	movq 64(%eax),%mm3
Packit c32a2d
	pfadd %mm3,%mm4
Packit c32a2d
	movq %mm7,%mm5
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	mov -4(%ebp),%eax
Packit c32a2d
#endif
Packit c32a2d
	punpckldq 16+_tfcos36_,%mm5
Packit c32a2d
	pfmul %mm5,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfacc %mm5,%mm5
Packit c32a2d
	movd 124(%edx),%mm6
Packit c32a2d
	punpckldq 88(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd %mm5,52(%ecx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,16(%ecx)
Packit c32a2d
	movq %mm4,%mm6
Packit c32a2d
	punpckldq %mm6,%mm5
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	punpckhdq %mm5,%mm5
Packit c32a2d
	movd 16(%edx),%mm6
Packit c32a2d
	punpckldq 52(%edx),%mm6
Packit c32a2d
	pfmul %mm6,%mm5
Packit c32a2d
	movd 16(%esi),%mm6
Packit c32a2d
	punpckldq 52(%esi),%mm6
Packit c32a2d
	pfadd %mm6,%mm5
Packit c32a2d
	movd %mm5,512(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,1664(%ebx)
Packit c32a2d
Packit c32a2d
	femms
Packit c32a2d
	RESTORE_GOT
Packit c32a2d
	popl %ebx
Packit c32a2d
	popl %esi
Packit c32a2d
	movl %ebp,%esp
Packit c32a2d
	popl %ebp
Packit c32a2d
	ret
Packit c32a2d
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	.section	__IMPORT,__pointers,non_lazy_symbol_pointers
Packit c32a2d
L_tfcos36:
Packit c32a2d
	.indirect_symbol	ASM_NAME(tfcos36)
Packit c32a2d
	.long	0
Packit c32a2d
L_COS9:
Packit c32a2d
	.indirect_symbol	ASM_NAME(COS9)
Packit c32a2d
	.long	0
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
NONEXEC_STACK