|
Packit |
c32a2d |
/*
|
|
Packit |
c32a2d |
dct64_3dnow.s: Replacement of dct36() with AMD's 3DNow! SIMD operations support
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
|
|
Packit |
c32a2d |
see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
Packit |
c32a2d |
initially written by Syuuhei Kashiyama
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
This code based 'dct36_3dnow.s' by Syuuhei Kashiyama
|
|
Packit |
c32a2d |
<squash@mb.kcom.ne.jp>,only two types of changes have been made:
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
- remove PREFETCH instruction for speedup
|
|
Packit |
c32a2d |
- change function name for support 3DNow! automatic detect
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
You can find Kashiyama's original 3dnow! support patch
|
|
Packit |
c32a2d |
(for mpg123-0.59o) at
|
|
Packit |
c32a2d |
http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese).
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999
|
|
Packit |
c32a2d |
<kim@comtec.co.jp> - after 1.Apr.1999
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
Replacement of dct36() with AMD's 3DNow! SIMD operations support
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
Syuuhei Kashiyama <squash@mb.kcom.ne.jp>
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
The author of this program disclaim whole expressed or implied
|
|
Packit |
c32a2d |
warranties with regard to this program, and in no event shall the
|
|
Packit |
c32a2d |
author of this program liable to whatever resulted from the use of
|
|
Packit |
c32a2d |
this program. Use it at your own risk.
|
|
Packit |
c32a2d |
*/
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#include "mangle.h"
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
.globl ASM_NAME(dct36_3dnow)
|
|
Packit |
c32a2d |
/* .type ASM_NAME(dct36_3dnow),@function */
|
|
Packit |
c32a2d |
ASM_NAME(dct36_3dnow):
|
|
Packit |
c32a2d |
pushl %ebp
|
|
Packit |
c32a2d |
movl %esp,%ebp
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
sub $4,%esp
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
pushl %esi
|
|
Packit |
c32a2d |
pushl %ebx
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#undef _EBX_
|
|
Packit |
c32a2d |
#define _EBX_ %edi
|
|
Packit |
c32a2d |
PREPARE_GOT
|
|
Packit |
c32a2d |
GET_GOT
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
#define _COS9_ 0(%edi)
|
|
Packit |
c32a2d |
#define _tfcos36_ 0(%eax)
|
|
Packit |
c32a2d |
mov GLOBAL_VAR_PTR(tfcos36), %eax
|
|
Packit |
c32a2d |
mov GLOBAL_VAR_PTR(COS9), %edi
|
|
Packit |
c32a2d |
mov %eax, -4(%ebp)
|
|
Packit |
c32a2d |
#else
|
|
Packit |
c32a2d |
#define _COS9_ GLOBAL_VAR(COS9)
|
|
Packit |
c32a2d |
#define _tfcos36_ GLOBAL_VAR(tfcos36)
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movl 8(%ebp),%eax
|
|
Packit |
c32a2d |
movl 12(%ebp),%esi
|
|
Packit |
c32a2d |
movl 16(%ebp),%ecx
|
|
Packit |
c32a2d |
movl 20(%ebp),%edx
|
|
Packit |
c32a2d |
movl 24(%ebp),%ebx
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
femms
|
|
Packit |
c32a2d |
movq (%eax),%mm0
|
|
Packit |
c32a2d |
movq 4(%eax),%mm1
|
|
Packit |
c32a2d |
pfadd %mm1,%mm0
|
|
Packit |
c32a2d |
movq %mm0,4(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm1
|
|
Packit |
c32a2d |
movq 12(%eax),%mm2
|
|
Packit |
c32a2d |
punpckldq %mm2,%mm1
|
|
Packit |
c32a2d |
pfadd %mm2,%mm1
|
|
Packit |
c32a2d |
movq %mm1,12(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm2
|
|
Packit |
c32a2d |
movq 20(%eax),%mm3
|
|
Packit |
c32a2d |
punpckldq %mm3,%mm2
|
|
Packit |
c32a2d |
pfadd %mm3,%mm2
|
|
Packit |
c32a2d |
movq %mm2,20(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm3
|
|
Packit |
c32a2d |
movq 28(%eax),%mm4
|
|
Packit |
c32a2d |
punpckldq %mm4,%mm3
|
|
Packit |
c32a2d |
pfadd %mm4,%mm3
|
|
Packit |
c32a2d |
movq %mm3,28(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm4
|
|
Packit |
c32a2d |
movq 36(%eax),%mm5
|
|
Packit |
c32a2d |
punpckldq %mm5,%mm4
|
|
Packit |
c32a2d |
pfadd %mm5,%mm4
|
|
Packit |
c32a2d |
movq %mm4,36(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movq 44(%eax),%mm6
|
|
Packit |
c32a2d |
punpckldq %mm6,%mm5
|
|
Packit |
c32a2d |
pfadd %mm6,%mm5
|
|
Packit |
c32a2d |
movq %mm5,44(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm6
|
|
Packit |
c32a2d |
movq 52(%eax),%mm7
|
|
Packit |
c32a2d |
punpckldq %mm7,%mm6
|
|
Packit |
c32a2d |
pfadd %mm7,%mm6
|
|
Packit |
c32a2d |
movq %mm6,52(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm7
|
|
Packit |
c32a2d |
movq 60(%eax),%mm0
|
|
Packit |
c32a2d |
punpckldq %mm0,%mm7
|
|
Packit |
c32a2d |
pfadd %mm0,%mm7
|
|
Packit |
c32a2d |
movq %mm7,60(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm0
|
|
Packit |
c32a2d |
movd 68(%eax),%mm1
|
|
Packit |
c32a2d |
pfadd %mm1,%mm0
|
|
Packit |
c32a2d |
movd %mm0,68(%eax)
|
|
Packit |
c32a2d |
movd 4(%eax),%mm0
|
|
Packit |
c32a2d |
movd 12(%eax),%mm1
|
|
Packit |
c32a2d |
punpckldq %mm1,%mm0
|
|
Packit |
c32a2d |
punpckldq 20(%eax),%mm1
|
|
Packit |
c32a2d |
pfadd %mm1,%mm0
|
|
Packit |
c32a2d |
movd %mm0,12(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm0
|
|
Packit |
c32a2d |
movd %mm0,20(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm1
|
|
Packit |
c32a2d |
movd 28(%eax),%mm2
|
|
Packit |
c32a2d |
punpckldq %mm2,%mm1
|
|
Packit |
c32a2d |
punpckldq 36(%eax),%mm2
|
|
Packit |
c32a2d |
pfadd %mm2,%mm1
|
|
Packit |
c32a2d |
movd %mm1,28(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm1
|
|
Packit |
c32a2d |
movd %mm1,36(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm2
|
|
Packit |
c32a2d |
movd 44(%eax),%mm3
|
|
Packit |
c32a2d |
punpckldq %mm3,%mm2
|
|
Packit |
c32a2d |
punpckldq 52(%eax),%mm3
|
|
Packit |
c32a2d |
pfadd %mm3,%mm2
|
|
Packit |
c32a2d |
movd %mm2,44(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm2
|
|
Packit |
c32a2d |
movd %mm2,52(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm3
|
|
Packit |
c32a2d |
movd 60(%eax),%mm4
|
|
Packit |
c32a2d |
punpckldq %mm4,%mm3
|
|
Packit |
c32a2d |
punpckldq 68(%eax),%mm4
|
|
Packit |
c32a2d |
pfadd %mm4,%mm3
|
|
Packit |
c32a2d |
movd %mm3,60(%eax)
|
|
Packit |
c32a2d |
psrlq $32,%mm3
|
|
Packit |
c32a2d |
movd %mm3,68(%eax)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movq 24(%eax),%mm0
|
|
Packit |
c32a2d |
movq 48(%eax),%mm1
|
|
Packit |
c32a2d |
movd 12+_COS9_,%mm2
|
|
Packit |
c32a2d |
punpckldq %mm2,%mm2
|
|
Packit |
c32a2d |
movd 24+_COS9_,%mm3
|
|
Packit |
c32a2d |
punpckldq %mm3,%mm3
|
|
Packit |
c32a2d |
pfmul %mm2,%mm0
|
|
Packit |
c32a2d |
pfmul %mm3,%mm1
|
|
Packit |
c32a2d |
pushl %eax
|
|
Packit |
c32a2d |
movl $1,%eax
|
|
Packit |
c32a2d |
movd %eax,%mm7
|
|
Packit |
c32a2d |
pi2fd %mm7,%mm7
|
|
Packit |
c32a2d |
popl %eax
|
|
Packit |
c32a2d |
movq 8(%eax),%mm2
|
|
Packit |
c32a2d |
movd 4+_COS9_,%mm3
|
|
Packit |
c32a2d |
punpckldq %mm3,%mm3
|
|
Packit |
c32a2d |
pfmul %mm3,%mm2
|
|
Packit |
c32a2d |
pfadd %mm0,%mm2
|
|
Packit |
c32a2d |
movq 40(%eax),%mm3
|
|
Packit |
c32a2d |
movd 20+_COS9_,%mm4
|
|
Packit |
c32a2d |
punpckldq %mm4,%mm4
|
|
Packit |
c32a2d |
pfmul %mm4,%mm3
|
|
Packit |
c32a2d |
pfadd %mm3,%mm2
|
|
Packit |
c32a2d |
movq 56(%eax),%mm3
|
|
Packit |
c32a2d |
movd 28+_COS9_,%mm4
|
|
Packit |
c32a2d |
punpckldq %mm4,%mm4
|
|
Packit |
c32a2d |
pfmul %mm4,%mm3
|
|
Packit |
c32a2d |
pfadd %mm3,%mm2
|
|
Packit |
c32a2d |
movq (%eax),%mm3
|
|
Packit |
c32a2d |
movq 16(%eax),%mm4
|
|
Packit |
c32a2d |
movd 8+_COS9_,%mm5
|
|
Packit |
c32a2d |
punpckldq %mm5,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
pfadd %mm4,%mm3
|
|
Packit |
c32a2d |
movq 32(%eax),%mm4
|
|
Packit |
c32a2d |
movd 16+_COS9_,%mm5
|
|
Packit |
c32a2d |
punpckldq %mm5,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
pfadd %mm4,%mm3
|
|
Packit |
c32a2d |
pfadd %mm1,%mm3
|
|
Packit |
c32a2d |
movq 64(%eax),%mm4
|
|
Packit |
c32a2d |
movd 32+_COS9_,%mm5
|
|
Packit |
c32a2d |
punpckldq %mm5,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
pfadd %mm4,%mm3
|
|
Packit |
c32a2d |
movq %mm2,%mm4
|
|
Packit |
c32a2d |
pfadd %mm3,%mm4
|
|
Packit |
c32a2d |
movq %mm7,%mm5
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
mov -4(%ebp),%eax
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
punpckldq 0+_tfcos36_,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
movq %mm4,%mm5
|
|
Packit |
c32a2d |
pfacc %mm5,%mm5
|
|
Packit |
c32a2d |
movd 108(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 104(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,36(%ecx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,32(%ecx)
|
|
Packit |
c32a2d |
movq %mm4,%mm6
|
|
Packit |
c32a2d |
punpckldq %mm6,%mm5
|
|
Packit |
c32a2d |
pfsub %mm6,%mm5
|
|
Packit |
c32a2d |
punpckhdq %mm5,%mm5
|
|
Packit |
c32a2d |
movd 32(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 36(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd 32(%esi),%mm6
|
|
Packit |
c32a2d |
punpckldq 36(%esi),%mm6
|
|
Packit |
c32a2d |
pfadd %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,1024(%ebx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,1152(%ebx)
|
|
Packit |
c32a2d |
movq %mm3,%mm4
|
|
Packit |
c32a2d |
pfsub %mm2,%mm4
|
|
Packit |
c32a2d |
movq %mm7,%mm5
|
|
Packit |
c32a2d |
punpckldq 32+_tfcos36_,%mm5
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
mov 8(%ebp),%eax
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
movq %mm4,%mm5
|
|
Packit |
c32a2d |
pfacc %mm5,%mm5
|
|
Packit |
c32a2d |
movd 140(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 72(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,68(%ecx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,0(%ecx)
|
|
Packit |
c32a2d |
movq %mm4,%mm6
|
|
Packit |
c32a2d |
punpckldq %mm6,%mm5
|
|
Packit |
c32a2d |
pfsub %mm6,%mm5
|
|
Packit |
c32a2d |
punpckhdq %mm5,%mm5
|
|
Packit |
c32a2d |
movd 0(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 68(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd 0(%esi),%mm6
|
|
Packit |
c32a2d |
punpckldq 68(%esi),%mm6
|
|
Packit |
c32a2d |
pfadd %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,0(%ebx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,2176(%ebx)
|
|
Packit |
c32a2d |
movq 8(%eax),%mm2
|
|
Packit |
c32a2d |
movq 40(%eax),%mm3
|
|
Packit |
c32a2d |
pfsub %mm3,%mm2
|
|
Packit |
c32a2d |
movq 56(%eax),%mm3
|
|
Packit |
c32a2d |
pfsub %mm3,%mm2
|
|
Packit |
c32a2d |
movd 12+_COS9_,%mm3
|
|
Packit |
c32a2d |
punpckldq %mm3,%mm3
|
|
Packit |
c32a2d |
pfmul %mm3,%mm2
|
|
Packit |
c32a2d |
movq 16(%eax),%mm3
|
|
Packit |
c32a2d |
movq 32(%eax),%mm4
|
|
Packit |
c32a2d |
pfsub %mm4,%mm3
|
|
Packit |
c32a2d |
movq 64(%eax),%mm4
|
|
Packit |
c32a2d |
pfsub %mm4,%mm3
|
|
Packit |
c32a2d |
movd 24+_COS9_,%mm4
|
|
Packit |
c32a2d |
punpckldq %mm4,%mm4
|
|
Packit |
c32a2d |
pfmul %mm4,%mm3
|
|
Packit |
c32a2d |
movq 48(%eax),%mm4
|
|
Packit |
c32a2d |
pfsub %mm4,%mm3
|
|
Packit |
c32a2d |
movq (%eax),%mm4
|
|
Packit |
c32a2d |
pfadd %mm4,%mm3
|
|
Packit |
c32a2d |
movq %mm2,%mm4
|
|
Packit |
c32a2d |
pfadd %mm3,%mm4
|
|
Packit |
c32a2d |
movq %mm7,%mm5
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
mov -4(%ebp),%eax
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
punpckldq 4+_tfcos36_,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
movq %mm4,%mm5
|
|
Packit |
c32a2d |
pfacc %mm5,%mm5
|
|
Packit |
c32a2d |
movd 112(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 100(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,40(%ecx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,28(%ecx)
|
|
Packit |
c32a2d |
movq %mm4,%mm6
|
|
Packit |
c32a2d |
punpckldq %mm6,%mm5
|
|
Packit |
c32a2d |
pfsub %mm6,%mm5
|
|
Packit |
c32a2d |
punpckhdq %mm5,%mm5
|
|
Packit |
c32a2d |
movd 28(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 40(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd 28(%esi),%mm6
|
|
Packit |
c32a2d |
punpckldq 40(%esi),%mm6
|
|
Packit |
c32a2d |
pfadd %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,896(%ebx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,1280(%ebx)
|
|
Packit |
c32a2d |
movq %mm3,%mm4
|
|
Packit |
c32a2d |
pfsub %mm2,%mm4
|
|
Packit |
c32a2d |
movq %mm7,%mm5
|
|
Packit |
c32a2d |
punpckldq 28+_tfcos36_,%mm5
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
mov 8(%ebp),%eax
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
movq %mm4,%mm5
|
|
Packit |
c32a2d |
pfacc %mm5,%mm5
|
|
Packit |
c32a2d |
movd 136(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 76(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,64(%ecx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,4(%ecx)
|
|
Packit |
c32a2d |
movq %mm4,%mm6
|
|
Packit |
c32a2d |
punpckldq %mm6,%mm5
|
|
Packit |
c32a2d |
pfsub %mm6,%mm5
|
|
Packit |
c32a2d |
punpckhdq %mm5,%mm5
|
|
Packit |
c32a2d |
movd 4(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 64(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd 4(%esi),%mm6
|
|
Packit |
c32a2d |
punpckldq 64(%esi),%mm6
|
|
Packit |
c32a2d |
pfadd %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,128(%ebx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,2048(%ebx)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movq 8(%eax),%mm2
|
|
Packit |
c32a2d |
movd 20+_COS9_,%mm3
|
|
Packit |
c32a2d |
punpckldq %mm3,%mm3
|
|
Packit |
c32a2d |
pfmul %mm3,%mm2
|
|
Packit |
c32a2d |
pfsub %mm0,%mm2
|
|
Packit |
c32a2d |
movq 40(%eax),%mm3
|
|
Packit |
c32a2d |
movd 28+_COS9_,%mm4
|
|
Packit |
c32a2d |
punpckldq %mm4,%mm4
|
|
Packit |
c32a2d |
pfmul %mm4,%mm3
|
|
Packit |
c32a2d |
pfsub %mm3,%mm2
|
|
Packit |
c32a2d |
movq 56(%eax),%mm3
|
|
Packit |
c32a2d |
movd 4+_COS9_,%mm4
|
|
Packit |
c32a2d |
punpckldq %mm4,%mm4
|
|
Packit |
c32a2d |
pfmul %mm4,%mm3
|
|
Packit |
c32a2d |
pfadd %mm3,%mm2
|
|
Packit |
c32a2d |
movq (%eax),%mm3
|
|
Packit |
c32a2d |
movq 16(%eax),%mm4
|
|
Packit |
c32a2d |
movd 32+_COS9_,%mm5
|
|
Packit |
c32a2d |
punpckldq %mm5,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
pfsub %mm4,%mm3
|
|
Packit |
c32a2d |
movq 32(%eax),%mm4
|
|
Packit |
c32a2d |
movd 8+_COS9_,%mm5
|
|
Packit |
c32a2d |
punpckldq %mm5,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
pfsub %mm4,%mm3
|
|
Packit |
c32a2d |
pfadd %mm1,%mm3
|
|
Packit |
c32a2d |
movq 64(%eax),%mm4
|
|
Packit |
c32a2d |
movd 16+_COS9_,%mm5
|
|
Packit |
c32a2d |
punpckldq %mm5,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
pfadd %mm4,%mm3
|
|
Packit |
c32a2d |
movq %mm2,%mm4
|
|
Packit |
c32a2d |
pfadd %mm3,%mm4
|
|
Packit |
c32a2d |
movq %mm7,%mm5
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
mov -4(%ebp),%eax
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
punpckldq 8+_tfcos36_,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
movq %mm4,%mm5
|
|
Packit |
c32a2d |
pfacc %mm5,%mm5
|
|
Packit |
c32a2d |
movd 116(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 96(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,44(%ecx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,24(%ecx)
|
|
Packit |
c32a2d |
movq %mm4,%mm6
|
|
Packit |
c32a2d |
punpckldq %mm6,%mm5
|
|
Packit |
c32a2d |
pfsub %mm6,%mm5
|
|
Packit |
c32a2d |
punpckhdq %mm5,%mm5
|
|
Packit |
c32a2d |
movd 24(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 44(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd 24(%esi),%mm6
|
|
Packit |
c32a2d |
punpckldq 44(%esi),%mm6
|
|
Packit |
c32a2d |
pfadd %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,768(%ebx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,1408(%ebx)
|
|
Packit |
c32a2d |
movq %mm3,%mm4
|
|
Packit |
c32a2d |
pfsub %mm2,%mm4
|
|
Packit |
c32a2d |
movq %mm7,%mm5
|
|
Packit |
c32a2d |
punpckldq 24+_tfcos36_,%mm5
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
mov 8(%ebp),%eax
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
movq %mm4,%mm5
|
|
Packit |
c32a2d |
pfacc %mm5,%mm5
|
|
Packit |
c32a2d |
movd 132(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 80(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,60(%ecx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,8(%ecx)
|
|
Packit |
c32a2d |
movq %mm4,%mm6
|
|
Packit |
c32a2d |
punpckldq %mm6,%mm5
|
|
Packit |
c32a2d |
pfsub %mm6,%mm5
|
|
Packit |
c32a2d |
punpckhdq %mm5,%mm5
|
|
Packit |
c32a2d |
movd 8(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 60(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd 8(%esi),%mm6
|
|
Packit |
c32a2d |
punpckldq 60(%esi),%mm6
|
|
Packit |
c32a2d |
pfadd %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,256(%ebx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,1920(%ebx)
|
|
Packit |
c32a2d |
movq 8(%eax),%mm2
|
|
Packit |
c32a2d |
movd 28+_COS9_,%mm3
|
|
Packit |
c32a2d |
punpckldq %mm3,%mm3
|
|
Packit |
c32a2d |
pfmul %mm3,%mm2
|
|
Packit |
c32a2d |
pfsub %mm0,%mm2
|
|
Packit |
c32a2d |
movq 40(%eax),%mm3
|
|
Packit |
c32a2d |
movd 4+_COS9_,%mm4
|
|
Packit |
c32a2d |
punpckldq %mm4,%mm4
|
|
Packit |
c32a2d |
pfmul %mm4,%mm3
|
|
Packit |
c32a2d |
pfadd %mm3,%mm2
|
|
Packit |
c32a2d |
movq 56(%eax),%mm3
|
|
Packit |
c32a2d |
movd 20+_COS9_,%mm4
|
|
Packit |
c32a2d |
punpckldq %mm4,%mm4
|
|
Packit |
c32a2d |
pfmul %mm4,%mm3
|
|
Packit |
c32a2d |
pfsub %mm3,%mm2
|
|
Packit |
c32a2d |
movq (%eax),%mm3
|
|
Packit |
c32a2d |
movq 16(%eax),%mm4
|
|
Packit |
c32a2d |
movd 16+_COS9_,%mm5
|
|
Packit |
c32a2d |
punpckldq %mm5,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
pfsub %mm4,%mm3
|
|
Packit |
c32a2d |
movq 32(%eax),%mm4
|
|
Packit |
c32a2d |
movd 32+_COS9_,%mm5
|
|
Packit |
c32a2d |
punpckldq %mm5,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
pfadd %mm4,%mm3
|
|
Packit |
c32a2d |
pfadd %mm1,%mm3
|
|
Packit |
c32a2d |
movq 64(%eax),%mm4
|
|
Packit |
c32a2d |
movd 8+_COS9_,%mm5
|
|
Packit |
c32a2d |
punpckldq %mm5,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
pfsub %mm4,%mm3
|
|
Packit |
c32a2d |
movq %mm2,%mm4
|
|
Packit |
c32a2d |
pfadd %mm3,%mm4
|
|
Packit |
c32a2d |
movq %mm7,%mm5
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
mov -4(%ebp),%eax
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
punpckldq 12+_tfcos36_,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
movq %mm4,%mm5
|
|
Packit |
c32a2d |
pfacc %mm5,%mm5
|
|
Packit |
c32a2d |
movd 120(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 92(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,48(%ecx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,20(%ecx)
|
|
Packit |
c32a2d |
movq %mm4,%mm6
|
|
Packit |
c32a2d |
punpckldq %mm6,%mm5
|
|
Packit |
c32a2d |
pfsub %mm6,%mm5
|
|
Packit |
c32a2d |
punpckhdq %mm5,%mm5
|
|
Packit |
c32a2d |
movd 20(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 48(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd 20(%esi),%mm6
|
|
Packit |
c32a2d |
punpckldq 48(%esi),%mm6
|
|
Packit |
c32a2d |
pfadd %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,640(%ebx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,1536(%ebx)
|
|
Packit |
c32a2d |
movq %mm3,%mm4
|
|
Packit |
c32a2d |
pfsub %mm2,%mm4
|
|
Packit |
c32a2d |
movq %mm7,%mm5
|
|
Packit |
c32a2d |
punpckldq 20+_tfcos36_,%mm5
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
mov 8(%ebp),%eax
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
movq %mm4,%mm5
|
|
Packit |
c32a2d |
pfacc %mm5,%mm5
|
|
Packit |
c32a2d |
movd 128(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 84(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,56(%ecx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,12(%ecx)
|
|
Packit |
c32a2d |
movq %mm4,%mm6
|
|
Packit |
c32a2d |
punpckldq %mm6,%mm5
|
|
Packit |
c32a2d |
pfsub %mm6,%mm5
|
|
Packit |
c32a2d |
punpckhdq %mm5,%mm5
|
|
Packit |
c32a2d |
movd 12(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 56(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd 12(%esi),%mm6
|
|
Packit |
c32a2d |
punpckldq 56(%esi),%mm6
|
|
Packit |
c32a2d |
pfadd %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,384(%ebx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,1792(%ebx)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movq (%eax),%mm4
|
|
Packit |
c32a2d |
movq 16(%eax),%mm3
|
|
Packit |
c32a2d |
pfsub %mm3,%mm4
|
|
Packit |
c32a2d |
movq 32(%eax),%mm3
|
|
Packit |
c32a2d |
pfadd %mm3,%mm4
|
|
Packit |
c32a2d |
movq 48(%eax),%mm3
|
|
Packit |
c32a2d |
pfsub %mm3,%mm4
|
|
Packit |
c32a2d |
movq 64(%eax),%mm3
|
|
Packit |
c32a2d |
pfadd %mm3,%mm4
|
|
Packit |
c32a2d |
movq %mm7,%mm5
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
mov -4(%ebp),%eax
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
punpckldq 16+_tfcos36_,%mm5
|
|
Packit |
c32a2d |
pfmul %mm5,%mm4
|
|
Packit |
c32a2d |
movq %mm4,%mm5
|
|
Packit |
c32a2d |
pfacc %mm5,%mm5
|
|
Packit |
c32a2d |
movd 124(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 88(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,52(%ecx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,16(%ecx)
|
|
Packit |
c32a2d |
movq %mm4,%mm6
|
|
Packit |
c32a2d |
punpckldq %mm6,%mm5
|
|
Packit |
c32a2d |
pfsub %mm6,%mm5
|
|
Packit |
c32a2d |
punpckhdq %mm5,%mm5
|
|
Packit |
c32a2d |
movd 16(%edx),%mm6
|
|
Packit |
c32a2d |
punpckldq 52(%edx),%mm6
|
|
Packit |
c32a2d |
pfmul %mm6,%mm5
|
|
Packit |
c32a2d |
movd 16(%esi),%mm6
|
|
Packit |
c32a2d |
punpckldq 52(%esi),%mm6
|
|
Packit |
c32a2d |
pfadd %mm6,%mm5
|
|
Packit |
c32a2d |
movd %mm5,512(%ebx)
|
|
Packit |
c32a2d |
psrlq $32,%mm5
|
|
Packit |
c32a2d |
movd %mm5,1664(%ebx)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
femms
|
|
Packit |
c32a2d |
RESTORE_GOT
|
|
Packit |
c32a2d |
popl %ebx
|
|
Packit |
c32a2d |
popl %esi
|
|
Packit |
c32a2d |
movl %ebp,%esp
|
|
Packit |
c32a2d |
popl %ebp
|
|
Packit |
c32a2d |
ret
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
.section __IMPORT,__pointers,non_lazy_symbol_pointers
|
|
Packit |
c32a2d |
L_tfcos36:
|
|
Packit |
c32a2d |
.indirect_symbol ASM_NAME(tfcos36)
|
|
Packit |
c32a2d |
.long 0
|
|
Packit |
c32a2d |
L_COS9:
|
|
Packit |
c32a2d |
.indirect_symbol ASM_NAME(COS9)
|
|
Packit |
c32a2d |
.long 0
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
NONEXEC_STACK
|