|
Packit |
c32a2d |
/*
|
|
Packit |
c32a2d |
synth_mmx: MMX optimized synth
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
|
|
Packit |
c32a2d |
see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
Packit |
c32a2d |
initially written by the mysterious higway (apparently)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
Thomas' notes about the LGPL transition:
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
Initially, I found the note "this code comes under GPL" in this file.
|
|
Packit |
c32a2d |
After asking Michael Hipp about legal status of the MMX files, he said
|
|
Packit |
c32a2d |
that he received them without any comment and thus I believe that the GPL
|
|
Packit |
c32a2d |
comment was made by Michael, since he made mpg123 GPL at some time - and
|
|
Packit |
c32a2d |
marked some files that way, but not all. The contributer accepted the
|
|
Packit |
c32a2d |
license terms of mpg123 back then, which included Michael deciding on
|
|
Packit |
c32a2d |
license issues.
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
Based on that thought, I now consider this file along with the other parts
|
|
Packit |
c32a2d |
of higway's MMX optimisation to be licensed under LGPL 2.1 by Michael's
|
|
Packit |
c32a2d |
decision.
|
|
Packit |
c32a2d |
*/
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#include "mangle.h"
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
.text
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
.globl ASM_NAME(synth_1to1_MMX)
|
|
Packit |
c32a2d |
/* int synth_1to1_MMX(real *bandPtr, int channel, short *out, short *buffs, int *bo, float *decwins); */
|
|
Packit |
c32a2d |
ASM_NAME(synth_1to1_MMX):
|
|
Packit |
c32a2d |
pushl %ebp
|
|
Packit |
c32a2d |
pushl %edi
|
|
Packit |
c32a2d |
pushl %esi
|
|
Packit |
c32a2d |
pushl %ebx
|
|
Packit |
c32a2d |
/* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16=back, 20=bandPtr, 24=channel, 28=out, 32=buffs, 36=bo, 40=decwins */
|
|
Packit |
c32a2d |
movl 24(%esp),%ecx
|
|
Packit |
c32a2d |
movl 28(%esp),%edi
|
|
Packit |
c32a2d |
movl $15,%ebx
|
|
Packit |
c32a2d |
movl 36(%esp),%edx
|
|
Packit |
c32a2d |
leal (%edi,%ecx,2),%edi
|
|
Packit |
c32a2d |
decl %ecx
|
|
Packit |
c32a2d |
movl 32(%esp),%esi
|
|
Packit |
c32a2d |
movl (%edx),%eax
|
|
Packit |
c32a2d |
jecxz 1f
|
|
Packit |
c32a2d |
decl %eax
|
|
Packit |
c32a2d |
andl %ebx,%eax
|
|
Packit |
c32a2d |
leal 1088(%esi),%esi
|
|
Packit |
c32a2d |
movl %eax,(%edx)
|
|
Packit |
c32a2d |
1:
|
|
Packit |
c32a2d |
leal (%esi,%eax,2),%edx
|
|
Packit |
c32a2d |
movl %eax,%ebp
|
|
Packit |
c32a2d |
incl %eax
|
|
Packit |
c32a2d |
pushl 20(%esp)
|
|
Packit |
c32a2d |
andl %ebx,%eax
|
|
Packit |
c32a2d |
leal 544(%esi,%eax,2),%ecx
|
|
Packit |
c32a2d |
incl %ebx
|
|
Packit |
c32a2d |
testl $1, %eax
|
|
Packit |
c32a2d |
jnz 2f
|
|
Packit |
c32a2d |
xchgl %edx,%ecx
|
|
Packit |
c32a2d |
incl %ebp
|
|
Packit |
c32a2d |
leal 544(%esi),%esi
|
|
Packit |
c32a2d |
2:
|
|
Packit |
c32a2d |
pushl %edx
|
|
Packit |
c32a2d |
pushl %ecx
|
|
Packit |
c32a2d |
call FUNC(dct64_MMX)
|
|
Packit |
c32a2d |
addl $12,%esp
|
|
Packit |
c32a2d |
/* stack like before, pushed 3, incremented again */
|
|
Packit |
c32a2d |
leal 1(%ebx), %ecx
|
|
Packit |
c32a2d |
subl %ebp,%ebx
|
|
Packit |
c32a2d |
pushl %eax
|
|
Packit |
c32a2d |
movl 44(%esp),%eax /* decwins */
|
|
Packit |
c32a2d |
leal (%eax,%ebx,2), %edx
|
|
Packit |
c32a2d |
popl %eax
|
|
Packit |
c32a2d |
3:
|
|
Packit |
c32a2d |
movq (%edx),%mm0
|
|
Packit |
c32a2d |
pmaddwd (%esi),%mm0
|
|
Packit |
c32a2d |
movq 8(%edx),%mm1
|
|
Packit |
c32a2d |
pmaddwd 8(%esi),%mm1
|
|
Packit |
c32a2d |
movq 16(%edx),%mm2
|
|
Packit |
c32a2d |
pmaddwd 16(%esi),%mm2
|
|
Packit |
c32a2d |
movq 24(%edx),%mm3
|
|
Packit |
c32a2d |
pmaddwd 24(%esi),%mm3
|
|
Packit |
c32a2d |
paddd %mm1,%mm0
|
|
Packit |
c32a2d |
paddd %mm2,%mm0
|
|
Packit |
c32a2d |
paddd %mm3,%mm0
|
|
Packit |
c32a2d |
movq %mm0,%mm1
|
|
Packit |
c32a2d |
psrlq $32,%mm1
|
|
Packit |
c32a2d |
paddd %mm1,%mm0
|
|
Packit |
c32a2d |
psrad $13,%mm0
|
|
Packit |
c32a2d |
packssdw %mm0,%mm0
|
|
Packit |
c32a2d |
movd %mm0,%eax
|
|
Packit |
c32a2d |
movw %ax, (%edi)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
leal 32(%esi),%esi
|
|
Packit |
c32a2d |
leal 64(%edx),%edx
|
|
Packit |
c32a2d |
leal 4(%edi),%edi
|
|
Packit |
c32a2d |
loop 3b
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
subl $64,%esi
|
|
Packit |
c32a2d |
movl $15,%ecx
|
|
Packit |
c32a2d |
4:
|
|
Packit |
c32a2d |
movq (%edx),%mm0
|
|
Packit |
c32a2d |
pmaddwd (%esi),%mm0
|
|
Packit |
c32a2d |
movq 8(%edx),%mm1
|
|
Packit |
c32a2d |
pmaddwd 8(%esi),%mm1
|
|
Packit |
c32a2d |
movq 16(%edx),%mm2
|
|
Packit |
c32a2d |
pmaddwd 16(%esi),%mm2
|
|
Packit |
c32a2d |
movq 24(%edx),%mm3
|
|
Packit |
c32a2d |
pmaddwd 24(%esi),%mm3
|
|
Packit |
c32a2d |
paddd %mm1,%mm0
|
|
Packit |
c32a2d |
paddd %mm2,%mm0
|
|
Packit |
c32a2d |
paddd %mm3,%mm0
|
|
Packit |
c32a2d |
movq %mm0,%mm1
|
|
Packit |
c32a2d |
psrlq $32,%mm1
|
|
Packit |
c32a2d |
paddd %mm0,%mm1
|
|
Packit |
c32a2d |
psrad $13,%mm1
|
|
Packit |
c32a2d |
packssdw %mm1,%mm1
|
|
Packit |
c32a2d |
psubd %mm0,%mm0
|
|
Packit |
c32a2d |
psubsw %mm1,%mm0
|
|
Packit |
c32a2d |
movd %mm0,%eax
|
|
Packit |
c32a2d |
movw %ax,(%edi)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
subl $32,%esi
|
|
Packit |
c32a2d |
addl $64,%edx
|
|
Packit |
c32a2d |
leal 4(%edi),%edi
|
|
Packit |
c32a2d |
loop 4b
|
|
Packit |
c32a2d |
emms
|
|
Packit |
c32a2d |
popl %ebx
|
|
Packit |
c32a2d |
popl %esi
|
|
Packit |
c32a2d |
popl %edi
|
|
Packit |
c32a2d |
popl %ebp
|
|
Packit |
c32a2d |
ret
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
|
|
Packit |
c32a2d |
L_dct64_MMX:
|
|
Packit |
c32a2d |
.indirect_symbol ASM_NAME(dct64_MMX)
|
|
Packit |
c32a2d |
hlt ; hlt ; hlt ; hlt ; hlt
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
NONEXEC_STACK
|