Blame src/libmpg123/dct64_3dnow.S

Packit c32a2d
/*
Packit c32a2d
	dct64_3dnow.s: Replacement of dct64() with AMD's 3DNow! SIMD operations support
Packit c32a2d
Packit c32a2d
	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
Packit c32a2d
	see COPYING and AUTHORS files in distribution or http://mpg123.org
Packit c32a2d
	initially written by Syuuhei Kashiyama
Packit c32a2d
Packit c32a2d
	Original "license" statement:
Packit c32a2d
	The author of this program disclaim whole expressed or implied
Packit c32a2d
	warranties with regard to this program, and in no event shall the
Packit c32a2d
	author of this program liable to whatever resulted from the use of
Packit c32a2d
	this program. Use it at your own risk.
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#include "mangle.h"
Packit c32a2d
Packit c32a2d
	.globl ASM_NAME(dct64_3dnow)
Packit c32a2d
/*	.type	 ASM_NAME(dct64_3dnow),@function */
Packit c32a2d
ASM_NAME(dct64_3dnow):
Packit c32a2d
	subl $256,%esp
Packit c32a2d
	pushl %ebp
Packit c32a2d
	pushl %edi
Packit c32a2d
	pushl %esi
Packit c32a2d
	pushl %ebx
Packit c32a2d
Packit c32a2d
	#undef _EBX_
Packit c32a2d
	#define _EBX_ %ecx
Packit c32a2d
	GET_GOT
Packit c32a2d
Packit c32a2d
	leal 16(%esp),%ebx
Packit c32a2d
	movl 284(%esp),%edi
Packit c32a2d
	movl 276(%esp),%ebp
Packit c32a2d
	movl 280(%esp),%edx
Packit c32a2d
	leal 128(%ebx),%esi
Packit c32a2d
Packit c32a2d
	/* femms */
Packit c32a2d
Packit c32a2d
	/* 1 */
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	movl GLOBAL_VAR_PTR(pnts),%eax
Packit c32a2d
	movl (%eax),%eax
Packit c32a2d
#else
Packit c32a2d
	movl GLOBAL_VAR(pnts),%eax
Packit c32a2d
#endif
Packit c32a2d
	movq 0(%edi),%mm0
Packit c32a2d
        movq %mm0,%mm1
Packit c32a2d
	movd 124(%edi),%mm2
Packit c32a2d
	punpckldq 120(%edi),%mm2
Packit c32a2d
	movq 0(%eax),%mm3
Packit c32a2d
	pfadd %mm2,%mm0
Packit c32a2d
	movq %mm0,0(%ebx)
Packit c32a2d
	pfsub %mm2,%mm1
Packit c32a2d
	pfmul %mm3,%mm1
Packit c32a2d
	movd %mm1,124(%ebx)
Packit c32a2d
	psrlq $32,%mm1
Packit c32a2d
	movd %mm1,120(%ebx)
Packit c32a2d
	movq 8(%edi),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	movd 116(%edi),%mm6
Packit c32a2d
	punpckldq 112(%edi),%mm6
Packit c32a2d
	movq 8(%eax),%mm7
Packit c32a2d
	pfadd %mm6,%mm4
Packit c32a2d
	movq %mm4,8(%ebx)
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	pfmul %mm7,%mm5
Packit c32a2d
	movd %mm5,116(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,112(%ebx)
Packit c32a2d
	movq 16(%edi),%mm0
Packit c32a2d
	movq %mm0,%mm1
Packit c32a2d
	movd 108(%edi),%mm2
Packit c32a2d
	punpckldq 104(%edi),%mm2
Packit c32a2d
	movq 16(%eax),%mm3
Packit c32a2d
	pfadd %mm2,%mm0
Packit c32a2d
	movq %mm0,16(%ebx)
Packit c32a2d
	pfsub %mm2,%mm1
Packit c32a2d
	pfmul %mm3,%mm1
Packit c32a2d
	movd %mm1,108(%ebx)
Packit c32a2d
	psrlq $32,%mm1
Packit c32a2d
	movd %mm1,104(%ebx)
Packit c32a2d
	movq 24(%edi),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	movd 100(%edi),%mm6
Packit c32a2d
	punpckldq 96(%edi),%mm6
Packit c32a2d
	movq 24(%eax),%mm7
Packit c32a2d
	pfadd %mm6,%mm4
Packit c32a2d
	movq %mm4,24(%ebx)
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	pfmul %mm7,%mm5
Packit c32a2d
	movd %mm5,100(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,96(%ebx)
Packit c32a2d
	movq 32(%edi),%mm0
Packit c32a2d
	movq %mm0,%mm1
Packit c32a2d
	movd 92(%edi),%mm2
Packit c32a2d
	punpckldq 88(%edi),%mm2
Packit c32a2d
	movq 32(%eax),%mm3
Packit c32a2d
	pfadd %mm2,%mm0
Packit c32a2d
	movq %mm0,32(%ebx)
Packit c32a2d
	pfsub %mm2,%mm1
Packit c32a2d
	pfmul %mm3,%mm1
Packit c32a2d
	movd %mm1,92(%ebx)
Packit c32a2d
	psrlq $32,%mm1
Packit c32a2d
	movd %mm1,88(%ebx)
Packit c32a2d
	movq 40(%edi),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	movd 84(%edi),%mm6
Packit c32a2d
	punpckldq 80(%edi),%mm6
Packit c32a2d
	movq 40(%eax),%mm7
Packit c32a2d
	pfadd %mm6,%mm4
Packit c32a2d
	movq %mm4,40(%ebx)
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	pfmul %mm7,%mm5
Packit c32a2d
	movd %mm5,84(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,80(%ebx)
Packit c32a2d
	movq 48(%edi),%mm0
Packit c32a2d
	movq %mm0,%mm1
Packit c32a2d
	movd 76(%edi),%mm2
Packit c32a2d
	punpckldq 72(%edi),%mm2
Packit c32a2d
	movq 48(%eax),%mm3
Packit c32a2d
	pfadd %mm2,%mm0
Packit c32a2d
	movq %mm0,48(%ebx)
Packit c32a2d
	pfsub %mm2,%mm1
Packit c32a2d
	pfmul %mm3,%mm1
Packit c32a2d
	movd %mm1,76(%ebx)
Packit c32a2d
	psrlq $32,%mm1
Packit c32a2d
	movd %mm1,72(%ebx)
Packit c32a2d
	movq 56(%edi),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	movd 68(%edi),%mm6
Packit c32a2d
	punpckldq 64(%edi),%mm6
Packit c32a2d
	movq 56(%eax),%mm7
Packit c32a2d
	pfadd %mm6,%mm4
Packit c32a2d
	movq %mm4,56(%ebx)
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	pfmul %mm7,%mm5
Packit c32a2d
	movd %mm5,68(%ebx)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,64(%ebx)
Packit c32a2d
	
Packit c32a2d
	/* 2 */
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	movl GLOBAL_VAR_PTR(pnts),%eax
Packit c32a2d
	movl 4(%eax),%eax
Packit c32a2d
#else
Packit c32a2d
	movl 4+GLOBAL_VAR(pnts),%eax
Packit c32a2d
#endif
Packit c32a2d
	/* 0,14 */
Packit c32a2d
	movq 0(%ebx),%mm0
Packit c32a2d
	movq %mm0,%mm1
Packit c32a2d
	movd 60(%ebx),%mm2
Packit c32a2d
	punpckldq 56(%ebx),%mm2
Packit c32a2d
	movq 0(%eax),%mm3
Packit c32a2d
	pfadd %mm2,%mm0
Packit c32a2d
	movq %mm0,0(%esi)
Packit c32a2d
	pfsub %mm2,%mm1
Packit c32a2d
	pfmul %mm3,%mm1
Packit c32a2d
	movd %mm1,60(%esi)
Packit c32a2d
	psrlq $32,%mm1
Packit c32a2d
	movd %mm1,56(%esi)
Packit c32a2d
	/* 16,30 */
Packit c32a2d
	movq 64(%ebx),%mm0
Packit c32a2d
	movq %mm0,%mm1
Packit c32a2d
	movd 124(%ebx),%mm2
Packit c32a2d
	punpckldq 120(%ebx),%mm2
Packit c32a2d
	pfadd %mm2,%mm0
Packit c32a2d
	movq %mm0,64(%esi)
Packit c32a2d
	pfsubr %mm2,%mm1
Packit c32a2d
	pfmul %mm3,%mm1
Packit c32a2d
	movd %mm1,124(%esi)
Packit c32a2d
	psrlq $32,%mm1
Packit c32a2d
	movd %mm1,120(%esi)
Packit c32a2d
	/* 2,12 */
Packit c32a2d
	movq 8(%ebx),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	movd 52(%ebx),%mm6
Packit c32a2d
	punpckldq 48(%ebx),%mm6
Packit c32a2d
	movq 8(%eax),%mm7
Packit c32a2d
	pfadd %mm6,%mm4
Packit c32a2d
	movq %mm4,8(%esi)
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	pfmul %mm7,%mm5
Packit c32a2d
	movd %mm5,52(%esi)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,48(%esi)
Packit c32a2d
	/* 18,28 */
Packit c32a2d
	movq 72(%ebx),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	movd 116(%ebx),%mm6
Packit c32a2d
	punpckldq 112(%ebx),%mm6
Packit c32a2d
	pfadd %mm6,%mm4
Packit c32a2d
	movq %mm4,72(%esi)
Packit c32a2d
	pfsubr %mm6,%mm5
Packit c32a2d
	pfmul %mm7,%mm5
Packit c32a2d
	movd %mm5,116(%esi)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,112(%esi)
Packit c32a2d
	/* 4,10 */
Packit c32a2d
	movq 16(%ebx),%mm0
Packit c32a2d
	movq %mm0,%mm1
Packit c32a2d
	movd 44(%ebx),%mm2
Packit c32a2d
	punpckldq 40(%ebx),%mm2
Packit c32a2d
	movq 16(%eax),%mm3
Packit c32a2d
	pfadd %mm2,%mm0
Packit c32a2d
	movq %mm0,16(%esi)
Packit c32a2d
	pfsub %mm2,%mm1
Packit c32a2d
	pfmul %mm3,%mm1
Packit c32a2d
	movd %mm1,44(%esi)
Packit c32a2d
	psrlq $32,%mm1
Packit c32a2d
	movd %mm1,40(%esi)
Packit c32a2d
	/* 20,26 */
Packit c32a2d
	movq 80(%ebx),%mm0
Packit c32a2d
	movq %mm0,%mm1
Packit c32a2d
	movd 108(%ebx),%mm2
Packit c32a2d
	punpckldq 104(%ebx),%mm2
Packit c32a2d
	pfadd %mm2,%mm0
Packit c32a2d
	movq %mm0,80(%esi)
Packit c32a2d
	pfsubr %mm2,%mm1
Packit c32a2d
	pfmul %mm3,%mm1
Packit c32a2d
	movd %mm1,108(%esi)
Packit c32a2d
	psrlq $32,%mm1
Packit c32a2d
	movd %mm1,104(%esi)
Packit c32a2d
	/* 6,8 */
Packit c32a2d
	movq 24(%ebx),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	movd 36(%ebx),%mm6
Packit c32a2d
	punpckldq 32(%ebx),%mm6
Packit c32a2d
	movq 24(%eax),%mm7
Packit c32a2d
	pfadd %mm6,%mm4
Packit c32a2d
	movq %mm4,24(%esi)
Packit c32a2d
	pfsub %mm6,%mm5
Packit c32a2d
	pfmul %mm7,%mm5
Packit c32a2d
	movd %mm5,36(%esi)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,32(%esi)
Packit c32a2d
	/* 22,24 */
Packit c32a2d
	movq 88(%ebx),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	movd 100(%ebx),%mm6
Packit c32a2d
	punpckldq 96(%ebx),%mm6
Packit c32a2d
	pfadd %mm6,%mm4
Packit c32a2d
	movq %mm4,88(%esi)
Packit c32a2d
	pfsubr %mm6,%mm5
Packit c32a2d
	pfmul %mm7,%mm5
Packit c32a2d
	movd %mm5,100(%esi)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,96(%esi)
Packit c32a2d
Packit c32a2d
	/* 3 */
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	movl GLOBAL_VAR_PTR(pnts),%eax
Packit c32a2d
	movl 8(%eax),%eax
Packit c32a2d
#else
Packit c32a2d
	movl 8+GLOBAL_VAR(pnts),%eax
Packit c32a2d
#endif
Packit c32a2d
	movq 0(%eax),%mm0
Packit c32a2d
	movq 8(%eax),%mm1
Packit c32a2d
	/* 0,6 */
Packit c32a2d
	movq 0(%esi),%mm2
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	movd 28(%esi),%mm4
Packit c32a2d
	punpckldq 24(%esi),%mm4
Packit c32a2d
	pfadd %mm4,%mm2
Packit c32a2d
	pfsub %mm4,%mm3
Packit c32a2d
	pfmul %mm0,%mm3
Packit c32a2d
	movq %mm2,0(%ebx)
Packit c32a2d
	movd %mm3,28(%ebx)
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	movd %mm3,24(%ebx)
Packit c32a2d
	/* 2,4 */
Packit c32a2d
	movq 8(%esi),%mm5
Packit c32a2d
	movq %mm5,%mm6
Packit c32a2d
	movd 20(%esi),%mm7
Packit c32a2d
	punpckldq 16(%esi),%mm7
Packit c32a2d
	pfadd %mm7,%mm5
Packit c32a2d
	pfsub %mm7,%mm6
Packit c32a2d
	pfmul %mm1,%mm6
Packit c32a2d
	movq %mm5,8(%ebx)
Packit c32a2d
	movd %mm6,20(%ebx)
Packit c32a2d
	psrlq $32,%mm6
Packit c32a2d
	movd %mm6,16(%ebx)
Packit c32a2d
	/* 8,14 */
Packit c32a2d
	movq 32(%esi),%mm2
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	movd 60(%esi),%mm4
Packit c32a2d
	punpckldq 56(%esi),%mm4
Packit c32a2d
	pfadd %mm4,%mm2
Packit c32a2d
	pfsubr %mm4,%mm3
Packit c32a2d
	pfmul %mm0,%mm3
Packit c32a2d
	movq %mm2,32(%ebx)
Packit c32a2d
	movd %mm3,60(%ebx)
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	movd %mm3,56(%ebx)
Packit c32a2d
	/* 10,12 */
Packit c32a2d
	movq 40(%esi),%mm5
Packit c32a2d
	movq %mm5,%mm6
Packit c32a2d
	movd 52(%esi),%mm7
Packit c32a2d
	punpckldq 48(%esi),%mm7
Packit c32a2d
	pfadd %mm7,%mm5
Packit c32a2d
	pfsubr %mm7,%mm6
Packit c32a2d
	pfmul %mm1,%mm6
Packit c32a2d
	movq %mm5,40(%ebx)
Packit c32a2d
	movd %mm6,52(%ebx)
Packit c32a2d
	psrlq $32,%mm6
Packit c32a2d
	movd %mm6,48(%ebx)
Packit c32a2d
	/* 16,22 */
Packit c32a2d
	movq 64(%esi),%mm2
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	movd 92(%esi),%mm4
Packit c32a2d
	punpckldq 88(%esi),%mm4
Packit c32a2d
	pfadd %mm4,%mm2
Packit c32a2d
	pfsub %mm4,%mm3
Packit c32a2d
	pfmul %mm0,%mm3
Packit c32a2d
	movq %mm2,64(%ebx)
Packit c32a2d
	movd %mm3,92(%ebx)
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	movd %mm3,88(%ebx)
Packit c32a2d
	/*  18,20 */
Packit c32a2d
	movq 72(%esi),%mm5
Packit c32a2d
	movq %mm5,%mm6
Packit c32a2d
	movd 84(%esi),%mm7
Packit c32a2d
	punpckldq 80(%esi),%mm7
Packit c32a2d
	pfadd %mm7,%mm5
Packit c32a2d
	pfsub %mm7,%mm6
Packit c32a2d
	pfmul %mm1,%mm6
Packit c32a2d
	movq %mm5,72(%ebx)
Packit c32a2d
	movd %mm6,84(%ebx)
Packit c32a2d
	psrlq $32,%mm6
Packit c32a2d
	movd %mm6,80(%ebx)
Packit c32a2d
	/*  24,30 */
Packit c32a2d
	movq 96(%esi),%mm2
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	movd 124(%esi),%mm4
Packit c32a2d
	punpckldq 120(%esi),%mm4
Packit c32a2d
	pfadd %mm4,%mm2
Packit c32a2d
	pfsubr %mm4,%mm3
Packit c32a2d
	pfmul %mm0,%mm3
Packit c32a2d
	movq %mm2,96(%ebx)
Packit c32a2d
	movd %mm3,124(%ebx)
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	movd %mm3,120(%ebx)
Packit c32a2d
	/*  26,28 */
Packit c32a2d
	movq 104(%esi),%mm5
Packit c32a2d
	movq %mm5,%mm6
Packit c32a2d
	movd 116(%esi),%mm7
Packit c32a2d
	punpckldq 112(%esi),%mm7
Packit c32a2d
	pfadd %mm7,%mm5
Packit c32a2d
	pfsubr %mm7,%mm6
Packit c32a2d
	pfmul %mm1,%mm6
Packit c32a2d
	movq %mm5,104(%ebx)
Packit c32a2d
	movd %mm6,116(%ebx)
Packit c32a2d
	psrlq $32,%mm6
Packit c32a2d
	movd %mm6,112(%ebx)
Packit c32a2d
	
Packit c32a2d
	/*  4 */
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	movl GLOBAL_VAR_PTR(pnts),%eax
Packit c32a2d
	movl 12(%eax),%eax
Packit c32a2d
#else
Packit c32a2d
	movl 12+GLOBAL_VAR(pnts),%eax
Packit c32a2d
#endif
Packit c32a2d
	movq 0(%eax),%mm0
Packit c32a2d
	/*  0 */
Packit c32a2d
	movq 0(%ebx),%mm1
Packit c32a2d
	movq %mm1,%mm2
Packit c32a2d
	movd 12(%ebx),%mm3
Packit c32a2d
	punpckldq 8(%ebx),%mm3
Packit c32a2d
	pfadd %mm3,%mm1
Packit c32a2d
	pfsub %mm3,%mm2
Packit c32a2d
	pfmul %mm0,%mm2
Packit c32a2d
	movq %mm1,0(%esi)
Packit c32a2d
	movd %mm2,12(%esi)
Packit c32a2d
	psrlq $32,%mm2
Packit c32a2d
	movd %mm2,8(%esi)
Packit c32a2d
	/*  4 */
Packit c32a2d
	movq 16(%ebx),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	movd 28(%ebx),%mm6
Packit c32a2d
	punpckldq 24(%ebx),%mm6
Packit c32a2d
	pfadd %mm6,%mm4
Packit c32a2d
	pfsubr %mm6,%mm5
Packit c32a2d
	pfmul %mm0,%mm5
Packit c32a2d
	movq %mm4,16(%esi)
Packit c32a2d
	movd %mm5,28(%esi)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,24(%esi)
Packit c32a2d
	/*  8 */
Packit c32a2d
	movq 32(%ebx),%mm1
Packit c32a2d
	movq %mm1,%mm2
Packit c32a2d
	movd 44(%ebx),%mm3
Packit c32a2d
	punpckldq 40(%ebx),%mm3
Packit c32a2d
	pfadd %mm3,%mm1
Packit c32a2d
	pfsub %mm3,%mm2
Packit c32a2d
	pfmul %mm0,%mm2
Packit c32a2d
	movq %mm1,32(%esi)
Packit c32a2d
	movd %mm2,44(%esi)
Packit c32a2d
	psrlq $32,%mm2
Packit c32a2d
	movd %mm2,40(%esi)
Packit c32a2d
	/*  12 */
Packit c32a2d
	movq 48(%ebx),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	movd 60(%ebx),%mm6
Packit c32a2d
	punpckldq 56(%ebx),%mm6
Packit c32a2d
	pfadd %mm6,%mm4
Packit c32a2d
	pfsubr %mm6,%mm5
Packit c32a2d
	pfmul %mm0,%mm5
Packit c32a2d
	movq %mm4,48(%esi)
Packit c32a2d
	movd %mm5,60(%esi)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,56(%esi)
Packit c32a2d
	/*  16 */
Packit c32a2d
	movq 64(%ebx),%mm1
Packit c32a2d
	movq %mm1,%mm2
Packit c32a2d
	movd 76(%ebx),%mm3
Packit c32a2d
	punpckldq 72(%ebx),%mm3
Packit c32a2d
	pfadd %mm3,%mm1
Packit c32a2d
	pfsub %mm3,%mm2
Packit c32a2d
	pfmul %mm0,%mm2
Packit c32a2d
	movq %mm1,64(%esi)
Packit c32a2d
	movd %mm2,76(%esi)
Packit c32a2d
	psrlq $32,%mm2
Packit c32a2d
	movd %mm2,72(%esi)
Packit c32a2d
	/*  20 */
Packit c32a2d
	movq 80(%ebx),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	movd 92(%ebx),%mm6
Packit c32a2d
	punpckldq 88(%ebx),%mm6
Packit c32a2d
	pfadd %mm6,%mm4
Packit c32a2d
	pfsubr %mm6,%mm5
Packit c32a2d
	pfmul %mm0,%mm5
Packit c32a2d
	movq %mm4,80(%esi)
Packit c32a2d
	movd %mm5,92(%esi)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,88(%esi)
Packit c32a2d
	/*  24 */
Packit c32a2d
	movq 96(%ebx),%mm1
Packit c32a2d
	movq %mm1,%mm2
Packit c32a2d
	movd 108(%ebx),%mm3
Packit c32a2d
	punpckldq 104(%ebx),%mm3
Packit c32a2d
	pfadd %mm3,%mm1
Packit c32a2d
	pfsub %mm3,%mm2
Packit c32a2d
	pfmul %mm0,%mm2
Packit c32a2d
	movq %mm1,96(%esi)
Packit c32a2d
	movd %mm2,108(%esi)
Packit c32a2d
	psrlq $32,%mm2
Packit c32a2d
	movd %mm2,104(%esi)
Packit c32a2d
	/*  28 */
Packit c32a2d
	movq 112(%ebx),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	movd 124(%ebx),%mm6
Packit c32a2d
	punpckldq 120(%ebx),%mm6
Packit c32a2d
	pfadd %mm6,%mm4
Packit c32a2d
	pfsubr %mm6,%mm5
Packit c32a2d
	pfmul %mm0,%mm5
Packit c32a2d
	movq %mm4,112(%esi)
Packit c32a2d
	movd %mm5,124(%esi)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,120(%esi)
Packit c32a2d
Packit c32a2d
	/*  5 */
Packit c32a2d
	movl $-1,%eax
Packit c32a2d
	movd %eax,%mm1
Packit c32a2d
	movl $1,%eax
Packit c32a2d
	/*  L | H */
Packit c32a2d
	movd %eax,%mm0	
Packit c32a2d
	punpckldq %mm1,%mm0
Packit c32a2d
	/*  1.0 | -1.0 */
Packit c32a2d
	pi2fd %mm0,%mm0	
Packit c32a2d
	movd %eax,%mm1
Packit c32a2d
	pi2fd %mm1,%mm1
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	movl GLOBAL_VAR_PTR(pnts),%eax
Packit c32a2d
	movl 16(%eax),%eax
Packit c32a2d
#else
Packit c32a2d
	movl 16+GLOBAL_VAR(pnts),%eax
Packit c32a2d
#endif
Packit c32a2d
	movd 0(%eax),%mm2
Packit c32a2d
	/*  1.0 | cos0 */
Packit c32a2d
	punpckldq %mm2,%mm1
Packit c32a2d
	/*  0 */
Packit c32a2d
	movq 0(%esi),%mm2
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	pfmul %mm0,%mm3
Packit c32a2d
	pfacc %mm3,%mm2
Packit c32a2d
	pfmul %mm1,%mm2
Packit c32a2d
	movq %mm2,0(%ebx)
Packit c32a2d
	movq 8(%esi),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfmul %mm0,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	pfmul %mm0,%mm4
Packit c32a2d
	pfmul %mm1,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	movq %mm4,8(%ebx)
Packit c32a2d
	/*  4 */
Packit c32a2d
	movq 16(%esi),%mm2
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	pfmul %mm0,%mm3
Packit c32a2d
	pfacc %mm3,%mm2 
Packit c32a2d
	pfmul %mm1,%mm2
Packit c32a2d
	movq 24(%esi),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfmul %mm0,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	pfmul %mm0,%mm4
Packit c32a2d
	pfmul %mm1,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	pfadd %mm4,%mm2
Packit c32a2d
	pfadd %mm3,%mm4
Packit c32a2d
	movq %mm2,16(%ebx)
Packit c32a2d
	movq %mm4,24(%ebx)
Packit c32a2d
	/*  8 */
Packit c32a2d
	movq 32(%esi),%mm2
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	pfmul %mm0,%mm3
Packit c32a2d
	pfacc %mm3,%mm2
Packit c32a2d
	pfmul %mm1,%mm2
Packit c32a2d
	movq %mm2,32(%ebx)
Packit c32a2d
	movq 40(%esi),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfmul %mm0,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	pfmul %mm0,%mm4
Packit c32a2d
	pfmul %mm1,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	movq %mm4,40(%ebx)
Packit c32a2d
	/*  12 */
Packit c32a2d
	movq 48(%esi),%mm2
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	pfmul %mm0,%mm3
Packit c32a2d
	pfacc %mm3,%mm2 
Packit c32a2d
	pfmul %mm1,%mm2
Packit c32a2d
	movq 56(%esi),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfmul %mm0,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	pfmul %mm0,%mm4
Packit c32a2d
	pfmul %mm1,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	pfadd %mm4,%mm2
Packit c32a2d
	pfadd %mm3,%mm4
Packit c32a2d
	movq %mm2,48(%ebx)
Packit c32a2d
	movq %mm4,56(%ebx)
Packit c32a2d
	/*  16 */
Packit c32a2d
	movq 64(%esi),%mm2
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	pfmul %mm0,%mm3
Packit c32a2d
	pfacc %mm3,%mm2
Packit c32a2d
	pfmul %mm1,%mm2
Packit c32a2d
	movq %mm2,64(%ebx)
Packit c32a2d
	movq 72(%esi),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfmul %mm0,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	pfmul %mm0,%mm4
Packit c32a2d
	pfmul %mm1,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	movq %mm4,72(%ebx)
Packit c32a2d
	/*  20 */
Packit c32a2d
	movq 80(%esi),%mm2
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	pfmul %mm0,%mm3
Packit c32a2d
	pfacc %mm3,%mm2 
Packit c32a2d
	pfmul %mm1,%mm2
Packit c32a2d
	movq 88(%esi),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfmul %mm0,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	pfmul %mm0,%mm4
Packit c32a2d
	pfmul %mm1,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	pfadd %mm4,%mm2
Packit c32a2d
	pfadd %mm3,%mm4
Packit c32a2d
	movq %mm2,80(%ebx)
Packit c32a2d
	movq %mm4,88(%ebx)
Packit c32a2d
	/*  24 */
Packit c32a2d
	movq 96(%esi),%mm2
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	pfmul %mm0,%mm3
Packit c32a2d
	pfacc %mm3,%mm2
Packit c32a2d
	pfmul %mm1,%mm2
Packit c32a2d
	movq %mm2,96(%ebx)
Packit c32a2d
	movq 104(%esi),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfmul %mm0,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	pfmul %mm0,%mm4
Packit c32a2d
	pfmul %mm1,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	movq %mm4,104(%ebx)
Packit c32a2d
	/*  28 */
Packit c32a2d
	movq 112(%esi),%mm2
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	pfmul %mm0,%mm3
Packit c32a2d
	pfacc %mm3,%mm2 
Packit c32a2d
	pfmul %mm1,%mm2
Packit c32a2d
	movq 120(%esi),%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	pfmul %mm0,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	pfmul %mm0,%mm4
Packit c32a2d
	pfmul %mm1,%mm4
Packit c32a2d
	movq %mm4,%mm5
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	pfacc %mm5,%mm4
Packit c32a2d
	movq %mm2,%mm3
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	pfadd %mm4,%mm2
Packit c32a2d
	pfadd %mm3,%mm4
Packit c32a2d
	movq %mm2,112(%ebx)
Packit c32a2d
	movq %mm4,120(%ebx)
Packit c32a2d
Packit c32a2d
	/*  Phase6 */
Packit c32a2d
	movl 0(%ebx),%eax
Packit c32a2d
	movl %eax,1024(%ebp)
Packit c32a2d
	movl 4(%ebx),%eax
Packit c32a2d
	movl %eax,0(%ebp)
Packit c32a2d
	movl %eax,0(%edx)
Packit c32a2d
	movl 8(%ebx),%eax
Packit c32a2d
	movl %eax,512(%ebp)
Packit c32a2d
	movl 12(%ebx),%eax
Packit c32a2d
	movl %eax,512(%edx)
Packit c32a2d
Packit c32a2d
	movl 16(%ebx),%eax
Packit c32a2d
	movl %eax,768(%ebp)
Packit c32a2d
	movl 20(%ebx),%eax
Packit c32a2d
	movl %eax,256(%edx)
Packit c32a2d
Packit c32a2d
	movl 24(%ebx),%eax
Packit c32a2d
	movl %eax,256(%ebp)
Packit c32a2d
	movl 28(%ebx),%eax
Packit c32a2d
	movl %eax,768(%edx)
Packit c32a2d
Packit c32a2d
	movq 32(%ebx),%mm0
Packit c32a2d
	movq 48(%ebx),%mm1
Packit c32a2d
	pfadd %mm1,%mm0
Packit c32a2d
	movd %mm0,896(%ebp)
Packit c32a2d
	psrlq $32,%mm0
Packit c32a2d
	movd %mm0,128(%edx)
Packit c32a2d
	movq 40(%ebx),%mm2
Packit c32a2d
	pfadd %mm2,%mm1
Packit c32a2d
	movd %mm1,640(%ebp)
Packit c32a2d
	psrlq $32,%mm1
Packit c32a2d
	movd %mm1,384(%edx)
Packit c32a2d
Packit c32a2d
	movq 56(%ebx),%mm3
Packit c32a2d
	pfadd %mm3,%mm2
Packit c32a2d
	movd %mm2,384(%ebp)
Packit c32a2d
        psrlq $32,%mm2
Packit c32a2d
	movd %mm2,640(%edx)
Packit c32a2d
Packit c32a2d
	movd 36(%ebx),%mm4
Packit c32a2d
	pfadd %mm4,%mm3
Packit c32a2d
	movd %mm3,128(%ebp)
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	movd %mm3,896(%edx)
Packit c32a2d
	movq 96(%ebx),%mm0
Packit c32a2d
	movq 64(%ebx),%mm1
Packit c32a2d
Packit c32a2d
	movq 112(%ebx),%mm2
Packit c32a2d
        pfadd %mm2,%mm0
Packit c32a2d
	movq %mm0,%mm3
Packit c32a2d
	pfadd %mm1,%mm3
Packit c32a2d
	movd %mm3,960(%ebp)
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	movd %mm3,64(%edx)
Packit c32a2d
	movq 80(%ebx),%mm1
Packit c32a2d
	pfadd %mm1,%mm0
Packit c32a2d
	movd %mm0,832(%ebp)
Packit c32a2d
        psrlq $32,%mm0
Packit c32a2d
	movd %mm0,192(%edx)
Packit c32a2d
	movq 104(%ebx),%mm3
Packit c32a2d
	pfadd %mm3,%mm2
Packit c32a2d
	movq %mm2,%mm4
Packit c32a2d
	pfadd %mm1,%mm4
Packit c32a2d
	movd %mm4,704(%ebp)
Packit c32a2d
	psrlq $32,%mm4
Packit c32a2d
	movd %mm4,320(%edx)
Packit c32a2d
	movq 72(%ebx),%mm1
Packit c32a2d
	pfadd %mm1,%mm2
Packit c32a2d
	movd %mm2,576(%ebp)
Packit c32a2d
	psrlq $32,%mm2
Packit c32a2d
	movd %mm2,448(%edx)
Packit c32a2d
Packit c32a2d
	movq 120(%ebx),%mm4
Packit c32a2d
	pfadd %mm4,%mm3
Packit c32a2d
	movq %mm3,%mm5
Packit c32a2d
	pfadd %mm1,%mm5
Packit c32a2d
	movd %mm5,448(%ebp)
Packit c32a2d
	psrlq $32,%mm5
Packit c32a2d
	movd %mm5,576(%edx)
Packit c32a2d
	movq 88(%ebx),%mm1
Packit c32a2d
	pfadd %mm1,%mm3
Packit c32a2d
	movd %mm3,320(%ebp)
Packit c32a2d
	psrlq $32,%mm3
Packit c32a2d
	movd %mm3,704(%edx)
Packit c32a2d
Packit c32a2d
	movd 100(%ebx),%mm5
Packit c32a2d
	pfadd %mm5,%mm4
Packit c32a2d
	movq %mm4,%mm6
Packit c32a2d
	pfadd %mm1,%mm6
Packit c32a2d
	movd %mm6,192(%ebp)
Packit c32a2d
	psrlq $32,%mm6
Packit c32a2d
	movd %mm6,832(%edx)	
Packit c32a2d
	movd 68(%ebx),%mm1
Packit c32a2d
	pfadd %mm1,%mm4
Packit c32a2d
	movd %mm4,64(%ebp)
Packit c32a2d
	psrlq $32,%mm4
Packit c32a2d
	movd %mm4,960(%edx)
Packit c32a2d
Packit c32a2d
	/*  femms */
Packit c32a2d
Packit c32a2d
        popl %ebx
Packit c32a2d
	popl %esi
Packit c32a2d
	popl %edi
Packit c32a2d
        popl %ebp
Packit c32a2d
	addl $256,%esp
Packit c32a2d
	
Packit c32a2d
        ret
Packit c32a2d
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	.section	__IMPORT,__pointers,non_lazy_symbol_pointers
Packit c32a2d
L_pnts:
Packit c32a2d
	.indirect_symbol	ASM_NAME(pnts)
Packit c32a2d
	.long	0
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
NONEXEC_STACK