Blame src/libmpg123/synth_i586.S

Packit c32a2d
/*
Packit c32a2d
 decode_i586: asm synth
Packit c32a2d
Packit c32a2d
 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
Packit c32a2d
 see COPYING and AUTHORS files in distribution or http://mpg123.org
Packit c32a2d
 initially written by Stefan Bieschewski
Packit c32a2d
 
Packit c32a2d
 synth_1to1 works the same way as the c version of this
Packit c32a2d
 file.  only two types of changes have been made:
Packit c32a2d
 - reordered floating point instructions to
Packit c32a2d
   prevent pipline stalls
Packit c32a2d
 - made WRITE_SAMPLE use integer instead of
Packit c32a2d
   (slower) floating point
Packit c32a2d
 all kinds of x86 processors should benefit from these
Packit c32a2d
 modifications.
Packit c32a2d
Packit c32a2d
 useful sources of information on optimizing x86 code include:
Packit c32a2d
Packit c32a2d
     Intel Architecture Optimization Manual
Packit c32a2d
     http://www.intel.com/design/pentium/manuals/242816.htm
Packit c32a2d
Packit c32a2d
     Cyrix 6x86 Instruction Set Summary
Packit c32a2d
     ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
Packit c32a2d
Packit c32a2d
     AMD-K5 Processor Software Development
Packit c32a2d
     http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
Packit c32a2d
Packit c32a2d
 Stefan Bieschewski <stb@acm.org>
Packit c32a2d
Packit c32a2d
 $Id: decode_i586.s 1 2004-09-18 13:30:08Z thomas $
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#include "mangle.h"
Packit c32a2d
Packit c32a2d
.data
Packit c32a2d
#ifndef __APPLE__
Packit c32a2d
.section .rodata
Packit c32a2d
#endif
Packit c32a2d
	ALIGN8
Packit c32a2d
.LC0:
Packit c32a2d
	.long 0x0,0x40dfffc0
Packit c32a2d
	ALIGN8
Packit c32a2d
.LC1:
Packit c32a2d
	.long 0x0,0xc0e00000
Packit c32a2d
	ALIGN8
Packit c32a2d
.text
Packit c32a2d
/* int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); */
Packit c32a2d
.globl ASM_NAME(synth_1to1_i586_asm)
Packit c32a2d
ASM_NAME(synth_1to1_i586_asm):
Packit c32a2d
	subl $12,%esp
Packit c32a2d
	pushl %ebp
Packit c32a2d
	pushl %edi
Packit c32a2d
	pushl %esi
Packit c32a2d
	pushl %ebx
Packit c32a2d
/* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16,20,24=local, 28=back, 32=bandPtr, 36=channel, 40=out, 44=buffs, 48=bo, 52=decwin */
Packit c32a2d
	movl 32(%esp),%eax /* *bandPtr */
Packit c32a2d
	movl 40(%esp),%esi /* *out */
Packit c32a2d
	movl 48(%esp),%edi /* *bo */
Packit c32a2d
	movl (%edi),%ebp   /* store bo value in ebp */
Packit c32a2d
	xorl %edi,%edi
Packit c32a2d
	cmpl %edi,36(%esp)
Packit c32a2d
	jne .L48           /* if(!channel) */
Packit c32a2d
	decl %ebp          /* bo-- */
Packit c32a2d
	andl $15,%ebp      /* bo &= 0xf */
Packit c32a2d
	movl 48(%esp),	%edi /* *bo */
Packit c32a2d
	movl %ebp,(%edi)   /* write back bo */
Packit c32a2d
	xorl %edi,%edi     /* restore %edi to 0; it's used later */
Packit c32a2d
	movl 44(%esp),%ecx /* use buffs */
Packit c32a2d
	jmp .L49
Packit c32a2d
.L48: /* if(channel) use buffs+2176 */
Packit c32a2d
	addl $2,%esi
Packit c32a2d
	movl 44(%esp),%ecx /* *buffs */
Packit c32a2d
	addl $2176,%ecx
Packit c32a2d
.L49:
Packit c32a2d
	testl $1,%ebp
Packit c32a2d
	je .L50
Packit c32a2d
	movl %ecx,%ebx
Packit c32a2d
	movl %ebp,16(%esp)
Packit c32a2d
	pushl %eax
Packit c32a2d
	movl 20(%esp),%edx
Packit c32a2d
	leal (%ebx,%edx,4),%eax
Packit c32a2d
	pushl %eax
Packit c32a2d
	movl 24(%esp),%eax
Packit c32a2d
	incl %eax
Packit c32a2d
	andl $15,%eax
Packit c32a2d
	leal 1088(,%eax,4),%eax
Packit c32a2d
	addl %ebx,%eax
Packit c32a2d
	jmp .L74
Packit c32a2d
.L50:
Packit c32a2d
	leal 1088(%ecx),%ebx
Packit c32a2d
	leal 1(%ebp),%edx
Packit c32a2d
	movl %edx,16(%esp)
Packit c32a2d
	pushl %eax
Packit c32a2d
	leal 1092(%ecx,%ebp,4),%eax
Packit c32a2d
	pushl %eax
Packit c32a2d
	leal (%ecx,%ebp,4),%eax
Packit c32a2d
.L74:
Packit c32a2d
	pushl %eax
Packit c32a2d
	call FUNC(dct64_i386)
Packit c32a2d
	addl $12,%esp
Packit c32a2d
/* stack now back on track */
Packit c32a2d
	movl 16(%esp),%edx
Packit c32a2d
	leal 0(,%edx,4),%edx
Packit c32a2d
	movl 52(%esp),%eax /* decwin */
Packit c32a2d
	addl $64,%eax
Packit c32a2d
	movl %eax,%ecx
Packit c32a2d
	subl %edx,%ecx
Packit c32a2d
	movl $16,%ebp
Packit c32a2d
.L55:
Packit c32a2d
	flds (%ecx)
Packit c32a2d
	fmuls (%ebx)
Packit c32a2d
	flds 4(%ecx)
Packit c32a2d
	fmuls 4(%ebx)
Packit c32a2d
	fxch %st(1)
Packit c32a2d
	flds 8(%ecx)
Packit c32a2d
	fmuls 8(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 12(%ecx)
Packit c32a2d
	fmuls 12(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 16(%ecx)
Packit c32a2d
	fmuls 16(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 20(%ecx)
Packit c32a2d
	fmuls 20(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 24(%ecx)
Packit c32a2d
	fmuls 24(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 28(%ecx)
Packit c32a2d
	fmuls 28(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 32(%ecx)
Packit c32a2d
	fmuls 32(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 36(%ecx)
Packit c32a2d
	fmuls 36(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 40(%ecx)
Packit c32a2d
	fmuls 40(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 44(%ecx)
Packit c32a2d
	fmuls 44(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 48(%ecx)
Packit c32a2d
	fmuls 48(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 52(%ecx)
Packit c32a2d
	fmuls 52(%ebx)
Packit c32a2d
	fxch %st(2)         
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 56(%ecx)
Packit c32a2d
	fmuls 56(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 60(%ecx)
Packit c32a2d
	fmuls 60(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	subl $4,%esp
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	fxch %st(1)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	fistpl (%esp)
Packit c32a2d
	popl %eax
Packit c32a2d
	cmpl $32767,%eax
Packit c32a2d
	jg 1f
Packit c32a2d
	cmpl $-32768,%eax
Packit c32a2d
	jl 2f
Packit c32a2d
	movw %ax,(%esi)
Packit c32a2d
	jmp 4f
Packit c32a2d
1:	movw $32767,(%esi)
Packit c32a2d
	jmp 3f
Packit c32a2d
2:	movw $-32768,(%esi)
Packit c32a2d
3:	incl %edi
Packit c32a2d
4:
Packit c32a2d
.L54:
Packit c32a2d
	addl $64,%ebx
Packit c32a2d
	subl $-128,%ecx
Packit c32a2d
	addl $4,%esi
Packit c32a2d
	decl %ebp
Packit c32a2d
	jnz .L55
Packit c32a2d
	flds (%ecx)
Packit c32a2d
	fmuls (%ebx)
Packit c32a2d
	flds 8(%ecx)
Packit c32a2d
	fmuls 8(%ebx)
Packit c32a2d
	flds 16(%ecx)
Packit c32a2d
	fmuls 16(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 24(%ecx)
Packit c32a2d
	fmuls 24(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 32(%ecx)
Packit c32a2d
	fmuls 32(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 40(%ecx)
Packit c32a2d
	fmuls 40(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 48(%ecx)
Packit c32a2d
	fmuls 48(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 56(%ecx)
Packit c32a2d
	fmuls 56(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	subl $4,%esp
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	fxch %st(1)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	fistpl (%esp)
Packit c32a2d
	popl %eax
Packit c32a2d
	cmpl $32767,%eax
Packit c32a2d
	jg 1f
Packit c32a2d
	cmpl $-32768,%eax
Packit c32a2d
	jl 2f
Packit c32a2d
	movw %ax,(%esi)
Packit c32a2d
	jmp 4f
Packit c32a2d
1:	movw $32767,(%esi)
Packit c32a2d
	jmp 3f
Packit c32a2d
2:	movw $-32768,(%esi)
Packit c32a2d
3:	incl %edi
Packit c32a2d
4:
Packit c32a2d
.L62:
Packit c32a2d
	addl $-64,%ebx
Packit c32a2d
	addl $4,%esi
Packit c32a2d
	movl 16(%esp),%edx
Packit c32a2d
	leal -128(%ecx,%edx,8),%ecx
Packit c32a2d
	movl $15,%ebp
Packit c32a2d
.L68:
Packit c32a2d
	flds -4(%ecx)
Packit c32a2d
	fchs
Packit c32a2d
	fmuls (%ebx)
Packit c32a2d
	flds -8(%ecx)
Packit c32a2d
	fmuls 4(%ebx)
Packit c32a2d
	fxch %st(1)
Packit c32a2d
	flds -12(%ecx)
Packit c32a2d
	fmuls 8(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -16(%ecx)
Packit c32a2d
	fmuls 12(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -20(%ecx)
Packit c32a2d
	fmuls 16(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -24(%ecx)
Packit c32a2d
	fmuls 20(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -28(%ecx)
Packit c32a2d
	fmuls 24(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -32(%ecx)
Packit c32a2d
	fmuls 28(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -36(%ecx)
Packit c32a2d
	fmuls 32(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -40(%ecx)
Packit c32a2d
	fmuls 36(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -44(%ecx)
Packit c32a2d
	fmuls 40(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -48(%ecx)
Packit c32a2d
	fmuls 44(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -52(%ecx)
Packit c32a2d
	fmuls 48(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -56(%ecx)
Packit c32a2d
	fmuls 52(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -60(%ecx)
Packit c32a2d
	fmuls 56(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds (%ecx)
Packit c32a2d
	fmuls 60(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	subl $4,%esp
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	fxch %st(1)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	fistpl (%esp)
Packit c32a2d
	popl %eax
Packit c32a2d
	cmpl $32767,%eax
Packit c32a2d
	jg 1f
Packit c32a2d
	cmpl $-32768,%eax
Packit c32a2d
	jl 2f
Packit c32a2d
	movw %ax,(%esi)
Packit c32a2d
	jmp 4f
Packit c32a2d
1:	movw $32767,(%esi)
Packit c32a2d
	jmp 3f
Packit c32a2d
2:	movw $-32768,(%esi)
Packit c32a2d
3:	incl %edi
Packit c32a2d
4:
Packit c32a2d
.L67:
Packit c32a2d
	addl $-64,%ebx
Packit c32a2d
	addl $-128,%ecx
Packit c32a2d
	addl $4,%esi
Packit c32a2d
	decl %ebp
Packit c32a2d
	jnz .L68
Packit c32a2d
	movl %edi,%eax
Packit c32a2d
	popl %ebx
Packit c32a2d
	popl %esi
Packit c32a2d
	popl %edi
Packit c32a2d
	popl %ebp
Packit c32a2d
	addl $12,%esp
Packit c32a2d
	ret
Packit c32a2d
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
Packit c32a2d
L_dct64_i386:
Packit c32a2d
	.indirect_symbol ASM_NAME(dct64_i386)
Packit c32a2d
	hlt ; hlt ; hlt ; hlt ; hlt
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
NONEXEC_STACK