Blame src/libmpg123/synth_mmx.S

Packit c32a2d
 /*
Packit c32a2d
	synth_mmx: MMX optimized synth
Packit c32a2d
Packit c32a2d
	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
Packit c32a2d
	see COPYING and AUTHORS files in distribution or http://mpg123.org
Packit c32a2d
	initially written by the mysterious higway (apparently)
Packit c32a2d
Packit c32a2d
	Thomas' notes about the LGPL transition:
Packit c32a2d
Packit c32a2d
	Initially, I found the note "this code comes under GPL" in this file.
Packit c32a2d
	After asking Michael Hipp about legal status of the MMX files, he said
Packit c32a2d
	that he received them without any comment and thus I believe that the GPL
Packit c32a2d
	comment was made by Michael, since he made mpg123 GPL at some time - and
Packit c32a2d
	marked some files that way, but not all. The contributer accepted the
Packit c32a2d
	license terms of mpg123 back then, which included Michael deciding on
Packit c32a2d
	license issues.
Packit c32a2d
Packit c32a2d
	Based on that thought, I now consider this file along with the other parts
Packit c32a2d
	of higway's MMX optimisation to be licensed under LGPL 2.1 by Michael's
Packit c32a2d
	decision.
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#include "mangle.h"
Packit c32a2d
Packit c32a2d
.text
Packit c32a2d
Packit c32a2d
.globl ASM_NAME(synth_1to1_MMX)
Packit c32a2d
/* int synth_1to1_MMX(real *bandPtr, int channel, short *out, short *buffs, int *bo, float *decwins); */
Packit c32a2d
ASM_NAME(synth_1to1_MMX):
Packit c32a2d
        pushl %ebp
Packit c32a2d
        pushl %edi
Packit c32a2d
        pushl %esi
Packit c32a2d
        pushl %ebx
Packit c32a2d
/* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16=back, 20=bandPtr, 24=channel, 28=out, 32=buffs, 36=bo, 40=decwins */
Packit c32a2d
        movl 24(%esp),%ecx
Packit c32a2d
        movl 28(%esp),%edi
Packit c32a2d
        movl $15,%ebx
Packit c32a2d
        movl 36(%esp),%edx
Packit c32a2d
        leal (%edi,%ecx,2),%edi
Packit c32a2d
	decl %ecx
Packit c32a2d
        movl 32(%esp),%esi
Packit c32a2d
        movl (%edx),%eax
Packit c32a2d
        jecxz 1f
Packit c32a2d
        decl %eax
Packit c32a2d
        andl %ebx,%eax
Packit c32a2d
        leal 1088(%esi),%esi
Packit c32a2d
        movl %eax,(%edx)
Packit c32a2d
1:
Packit c32a2d
        leal (%esi,%eax,2),%edx
Packit c32a2d
        movl %eax,%ebp
Packit c32a2d
        incl %eax
Packit c32a2d
        pushl 20(%esp)
Packit c32a2d
        andl %ebx,%eax
Packit c32a2d
        leal 544(%esi,%eax,2),%ecx
Packit c32a2d
        incl %ebx
Packit c32a2d
	testl $1, %eax
Packit c32a2d
	jnz 2f
Packit c32a2d
        xchgl %edx,%ecx
Packit c32a2d
	incl %ebp
Packit c32a2d
        leal 544(%esi),%esi
Packit c32a2d
2:
Packit c32a2d
        pushl %edx
Packit c32a2d
        pushl %ecx
Packit c32a2d
        call FUNC(dct64_MMX)
Packit c32a2d
        addl $12,%esp
Packit c32a2d
/* stack like before, pushed 3, incremented again */
Packit c32a2d
	leal 1(%ebx), %ecx
Packit c32a2d
        subl %ebp,%ebx
Packit c32a2d
	pushl %eax
Packit c32a2d
	movl 44(%esp),%eax /* decwins */
Packit c32a2d
	leal (%eax,%ebx,2), %edx
Packit c32a2d
	popl %eax
Packit c32a2d
3:
Packit c32a2d
        movq  (%edx),%mm0
Packit c32a2d
        pmaddwd (%esi),%mm0
Packit c32a2d
        movq  8(%edx),%mm1
Packit c32a2d
        pmaddwd 8(%esi),%mm1
Packit c32a2d
        movq  16(%edx),%mm2
Packit c32a2d
        pmaddwd 16(%esi),%mm2
Packit c32a2d
        movq  24(%edx),%mm3
Packit c32a2d
        pmaddwd 24(%esi),%mm3
Packit c32a2d
        paddd %mm1,%mm0
Packit c32a2d
        paddd %mm2,%mm0
Packit c32a2d
        paddd %mm3,%mm0
Packit c32a2d
        movq  %mm0,%mm1
Packit c32a2d
        psrlq $32,%mm1
Packit c32a2d
        paddd %mm1,%mm0
Packit c32a2d
        psrad $13,%mm0
Packit c32a2d
        packssdw %mm0,%mm0
Packit c32a2d
        movd %mm0,%eax
Packit c32a2d
	movw %ax, (%edi)
Packit c32a2d
Packit c32a2d
        leal 32(%esi),%esi
Packit c32a2d
        leal 64(%edx),%edx
Packit c32a2d
        leal 4(%edi),%edi
Packit c32a2d
        loop 3b
Packit c32a2d
Packit c32a2d
Packit c32a2d
        subl $64,%esi
Packit c32a2d
        movl $15,%ecx
Packit c32a2d
4:
Packit c32a2d
        movq  (%edx),%mm0
Packit c32a2d
        pmaddwd (%esi),%mm0
Packit c32a2d
        movq  8(%edx),%mm1
Packit c32a2d
        pmaddwd 8(%esi),%mm1
Packit c32a2d
        movq  16(%edx),%mm2
Packit c32a2d
        pmaddwd 16(%esi),%mm2
Packit c32a2d
        movq  24(%edx),%mm3
Packit c32a2d
        pmaddwd 24(%esi),%mm3
Packit c32a2d
        paddd %mm1,%mm0
Packit c32a2d
        paddd %mm2,%mm0
Packit c32a2d
        paddd %mm3,%mm0
Packit c32a2d
        movq  %mm0,%mm1
Packit c32a2d
        psrlq $32,%mm1
Packit c32a2d
        paddd %mm0,%mm1
Packit c32a2d
        psrad $13,%mm1
Packit c32a2d
        packssdw %mm1,%mm1
Packit c32a2d
        psubd %mm0,%mm0
Packit c32a2d
        psubsw %mm1,%mm0
Packit c32a2d
        movd %mm0,%eax
Packit c32a2d
	movw %ax,(%edi)
Packit c32a2d
Packit c32a2d
        subl $32,%esi
Packit c32a2d
        addl $64,%edx
Packit c32a2d
        leal 4(%edi),%edi
Packit c32a2d
        loop 4b
Packit c32a2d
	emms
Packit c32a2d
        popl %ebx
Packit c32a2d
        popl %esi
Packit c32a2d
        popl %edi
Packit c32a2d
        popl %ebp
Packit c32a2d
        ret
Packit c32a2d
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
Packit c32a2d
L_dct64_MMX:
Packit c32a2d
	.indirect_symbol ASM_NAME(dct64_MMX)
Packit c32a2d
	hlt ; hlt ; hlt ; hlt ; hlt
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
NONEXEC_STACK