|
Packit |
c32a2d |
/*
|
|
Packit |
c32a2d |
decode_i586: asm synth
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
|
|
Packit |
c32a2d |
see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
Packit |
c32a2d |
initially written by Stefan Bieschewski
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
synth_1to1 works the same way as the c version of this
|
|
Packit |
c32a2d |
file. only two types of changes have been made:
|
|
Packit |
c32a2d |
- reordered floating point instructions to
|
|
Packit |
c32a2d |
prevent pipline stalls
|
|
Packit |
c32a2d |
- made WRITE_SAMPLE use integer instead of
|
|
Packit |
c32a2d |
(slower) floating point
|
|
Packit |
c32a2d |
all kinds of x86 processors should benefit from these
|
|
Packit |
c32a2d |
modifications.
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
useful sources of information on optimizing x86 code include:
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
Intel Architecture Optimization Manual
|
|
Packit |
c32a2d |
http://www.intel.com/design/pentium/manuals/242816.htm
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
Cyrix 6x86 Instruction Set Summary
|
|
Packit |
c32a2d |
ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
AMD-K5 Processor Software Development
|
|
Packit |
c32a2d |
http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
Stefan Bieschewski <stb@acm.org>
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
$Id: decode_i586.s 1 2004-09-18 13:30:08Z thomas $
|
|
Packit |
c32a2d |
*/
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#include "mangle.h"
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
.data
|
|
Packit |
c32a2d |
#ifndef __APPLE__
|
|
Packit |
c32a2d |
.section .rodata
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
ALIGN8
|
|
Packit |
c32a2d |
.LC0:
|
|
Packit |
c32a2d |
.long 0x0,0x40dfffc0
|
|
Packit |
c32a2d |
ALIGN8
|
|
Packit |
c32a2d |
.LC1:
|
|
Packit |
c32a2d |
.long 0x0,0xc0e00000
|
|
Packit |
c32a2d |
ALIGN8
|
|
Packit |
c32a2d |
.text
|
|
Packit |
c32a2d |
/* int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); */
|
|
Packit |
c32a2d |
.globl ASM_NAME(synth_1to1_i586_asm)
|
|
Packit |
c32a2d |
ASM_NAME(synth_1to1_i586_asm):
|
|
Packit |
c32a2d |
subl $12,%esp
|
|
Packit |
c32a2d |
pushl %ebp
|
|
Packit |
c32a2d |
pushl %edi
|
|
Packit |
c32a2d |
pushl %esi
|
|
Packit |
c32a2d |
pushl %ebx
|
|
Packit |
c32a2d |
/* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16,20,24=local, 28=back, 32=bandPtr, 36=channel, 40=out, 44=buffs, 48=bo, 52=decwin */
|
|
Packit |
c32a2d |
movl 32(%esp),%eax /* *bandPtr */
|
|
Packit |
c32a2d |
movl 40(%esp),%esi /* *out */
|
|
Packit |
c32a2d |
movl 48(%esp),%edi /* *bo */
|
|
Packit |
c32a2d |
movl (%edi),%ebp /* store bo value in ebp */
|
|
Packit |
c32a2d |
xorl %edi,%edi
|
|
Packit |
c32a2d |
cmpl %edi,36(%esp)
|
|
Packit |
c32a2d |
jne .L48 /* if(!channel) */
|
|
Packit |
c32a2d |
decl %ebp /* bo-- */
|
|
Packit |
c32a2d |
andl $15,%ebp /* bo &= 0xf */
|
|
Packit |
c32a2d |
movl 48(%esp), %edi /* *bo */
|
|
Packit |
c32a2d |
movl %ebp,(%edi) /* write back bo */
|
|
Packit |
c32a2d |
xorl %edi,%edi /* restore %edi to 0; it's used later */
|
|
Packit |
c32a2d |
movl 44(%esp),%ecx /* use buffs */
|
|
Packit |
c32a2d |
jmp .L49
|
|
Packit |
c32a2d |
.L48: /* if(channel) use buffs+2176 */
|
|
Packit |
c32a2d |
addl $2,%esi
|
|
Packit |
c32a2d |
movl 44(%esp),%ecx /* *buffs */
|
|
Packit |
c32a2d |
addl $2176,%ecx
|
|
Packit |
c32a2d |
.L49:
|
|
Packit |
c32a2d |
testl $1,%ebp
|
|
Packit |
c32a2d |
je .L50
|
|
Packit |
c32a2d |
movl %ecx,%ebx
|
|
Packit |
c32a2d |
movl %ebp,16(%esp)
|
|
Packit |
c32a2d |
pushl %eax
|
|
Packit |
c32a2d |
movl 20(%esp),%edx
|
|
Packit |
c32a2d |
leal (%ebx,%edx,4),%eax
|
|
Packit |
c32a2d |
pushl %eax
|
|
Packit |
c32a2d |
movl 24(%esp),%eax
|
|
Packit |
c32a2d |
incl %eax
|
|
Packit |
c32a2d |
andl $15,%eax
|
|
Packit |
c32a2d |
leal 1088(,%eax,4),%eax
|
|
Packit |
c32a2d |
addl %ebx,%eax
|
|
Packit |
c32a2d |
jmp .L74
|
|
Packit |
c32a2d |
.L50:
|
|
Packit |
c32a2d |
leal 1088(%ecx),%ebx
|
|
Packit |
c32a2d |
leal 1(%ebp),%edx
|
|
Packit |
c32a2d |
movl %edx,16(%esp)
|
|
Packit |
c32a2d |
pushl %eax
|
|
Packit |
c32a2d |
leal 1092(%ecx,%ebp,4),%eax
|
|
Packit |
c32a2d |
pushl %eax
|
|
Packit |
c32a2d |
leal (%ecx,%ebp,4),%eax
|
|
Packit |
c32a2d |
.L74:
|
|
Packit |
c32a2d |
pushl %eax
|
|
Packit |
c32a2d |
call FUNC(dct64_i386)
|
|
Packit |
c32a2d |
addl $12,%esp
|
|
Packit |
c32a2d |
/* stack now back on track */
|
|
Packit |
c32a2d |
movl 16(%esp),%edx
|
|
Packit |
c32a2d |
leal 0(,%edx,4),%edx
|
|
Packit |
c32a2d |
movl 52(%esp),%eax /* decwin */
|
|
Packit |
c32a2d |
addl $64,%eax
|
|
Packit |
c32a2d |
movl %eax,%ecx
|
|
Packit |
c32a2d |
subl %edx,%ecx
|
|
Packit |
c32a2d |
movl $16,%ebp
|
|
Packit |
c32a2d |
.L55:
|
|
Packit |
c32a2d |
flds (%ecx)
|
|
Packit |
c32a2d |
fmuls (%ebx)
|
|
Packit |
c32a2d |
flds 4(%ecx)
|
|
Packit |
c32a2d |
fmuls 4(%ebx)
|
|
Packit |
c32a2d |
fxch %st(1)
|
|
Packit |
c32a2d |
flds 8(%ecx)
|
|
Packit |
c32a2d |
fmuls 8(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds 12(%ecx)
|
|
Packit |
c32a2d |
fmuls 12(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
flds 16(%ecx)
|
|
Packit |
c32a2d |
fmuls 16(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds 20(%ecx)
|
|
Packit |
c32a2d |
fmuls 20(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
flds 24(%ecx)
|
|
Packit |
c32a2d |
fmuls 24(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds 28(%ecx)
|
|
Packit |
c32a2d |
fmuls 28(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
flds 32(%ecx)
|
|
Packit |
c32a2d |
fmuls 32(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds 36(%ecx)
|
|
Packit |
c32a2d |
fmuls 36(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
flds 40(%ecx)
|
|
Packit |
c32a2d |
fmuls 40(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds 44(%ecx)
|
|
Packit |
c32a2d |
fmuls 44(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
flds 48(%ecx)
|
|
Packit |
c32a2d |
fmuls 48(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds 52(%ecx)
|
|
Packit |
c32a2d |
fmuls 52(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
flds 56(%ecx)
|
|
Packit |
c32a2d |
fmuls 56(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds 60(%ecx)
|
|
Packit |
c32a2d |
fmuls 60(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
subl $4,%esp
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
fxch %st(1)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
fistpl (%esp)
|
|
Packit |
c32a2d |
popl %eax
|
|
Packit |
c32a2d |
cmpl $32767,%eax
|
|
Packit |
c32a2d |
jg 1f
|
|
Packit |
c32a2d |
cmpl $-32768,%eax
|
|
Packit |
c32a2d |
jl 2f
|
|
Packit |
c32a2d |
movw %ax,(%esi)
|
|
Packit |
c32a2d |
jmp 4f
|
|
Packit |
c32a2d |
1: movw $32767,(%esi)
|
|
Packit |
c32a2d |
jmp 3f
|
|
Packit |
c32a2d |
2: movw $-32768,(%esi)
|
|
Packit |
c32a2d |
3: incl %edi
|
|
Packit |
c32a2d |
4:
|
|
Packit |
c32a2d |
.L54:
|
|
Packit |
c32a2d |
addl $64,%ebx
|
|
Packit |
c32a2d |
subl $-128,%ecx
|
|
Packit |
c32a2d |
addl $4,%esi
|
|
Packit |
c32a2d |
decl %ebp
|
|
Packit |
c32a2d |
jnz .L55
|
|
Packit |
c32a2d |
flds (%ecx)
|
|
Packit |
c32a2d |
fmuls (%ebx)
|
|
Packit |
c32a2d |
flds 8(%ecx)
|
|
Packit |
c32a2d |
fmuls 8(%ebx)
|
|
Packit |
c32a2d |
flds 16(%ecx)
|
|
Packit |
c32a2d |
fmuls 16(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
flds 24(%ecx)
|
|
Packit |
c32a2d |
fmuls 24(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
flds 32(%ecx)
|
|
Packit |
c32a2d |
fmuls 32(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
flds 40(%ecx)
|
|
Packit |
c32a2d |
fmuls 40(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
flds 48(%ecx)
|
|
Packit |
c32a2d |
fmuls 48(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
flds 56(%ecx)
|
|
Packit |
c32a2d |
fmuls 56(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
subl $4,%esp
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
fxch %st(1)
|
|
Packit |
c32a2d |
faddp %st,%st(1)
|
|
Packit |
c32a2d |
fistpl (%esp)
|
|
Packit |
c32a2d |
popl %eax
|
|
Packit |
c32a2d |
cmpl $32767,%eax
|
|
Packit |
c32a2d |
jg 1f
|
|
Packit |
c32a2d |
cmpl $-32768,%eax
|
|
Packit |
c32a2d |
jl 2f
|
|
Packit |
c32a2d |
movw %ax,(%esi)
|
|
Packit |
c32a2d |
jmp 4f
|
|
Packit |
c32a2d |
1: movw $32767,(%esi)
|
|
Packit |
c32a2d |
jmp 3f
|
|
Packit |
c32a2d |
2: movw $-32768,(%esi)
|
|
Packit |
c32a2d |
3: incl %edi
|
|
Packit |
c32a2d |
4:
|
|
Packit |
c32a2d |
.L62:
|
|
Packit |
c32a2d |
addl $-64,%ebx
|
|
Packit |
c32a2d |
addl $4,%esi
|
|
Packit |
c32a2d |
movl 16(%esp),%edx
|
|
Packit |
c32a2d |
leal -128(%ecx,%edx,8),%ecx
|
|
Packit |
c32a2d |
movl $15,%ebp
|
|
Packit |
c32a2d |
.L68:
|
|
Packit |
c32a2d |
flds -4(%ecx)
|
|
Packit |
c32a2d |
fchs
|
|
Packit |
c32a2d |
fmuls (%ebx)
|
|
Packit |
c32a2d |
flds -8(%ecx)
|
|
Packit |
c32a2d |
fmuls 4(%ebx)
|
|
Packit |
c32a2d |
fxch %st(1)
|
|
Packit |
c32a2d |
flds -12(%ecx)
|
|
Packit |
c32a2d |
fmuls 8(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds -16(%ecx)
|
|
Packit |
c32a2d |
fmuls 12(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds -20(%ecx)
|
|
Packit |
c32a2d |
fmuls 16(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds -24(%ecx)
|
|
Packit |
c32a2d |
fmuls 20(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds -28(%ecx)
|
|
Packit |
c32a2d |
fmuls 24(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds -32(%ecx)
|
|
Packit |
c32a2d |
fmuls 28(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds -36(%ecx)
|
|
Packit |
c32a2d |
fmuls 32(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds -40(%ecx)
|
|
Packit |
c32a2d |
fmuls 36(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds -44(%ecx)
|
|
Packit |
c32a2d |
fmuls 40(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds -48(%ecx)
|
|
Packit |
c32a2d |
fmuls 44(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds -52(%ecx)
|
|
Packit |
c32a2d |
fmuls 48(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds -56(%ecx)
|
|
Packit |
c32a2d |
fmuls 52(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds -60(%ecx)
|
|
Packit |
c32a2d |
fmuls 56(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
flds (%ecx)
|
|
Packit |
c32a2d |
fmuls 60(%ebx)
|
|
Packit |
c32a2d |
fxch %st(2)
|
|
Packit |
c32a2d |
subl $4,%esp
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
fxch %st(1)
|
|
Packit |
c32a2d |
fsubrp %st,%st(1)
|
|
Packit |
c32a2d |
fistpl (%esp)
|
|
Packit |
c32a2d |
popl %eax
|
|
Packit |
c32a2d |
cmpl $32767,%eax
|
|
Packit |
c32a2d |
jg 1f
|
|
Packit |
c32a2d |
cmpl $-32768,%eax
|
|
Packit |
c32a2d |
jl 2f
|
|
Packit |
c32a2d |
movw %ax,(%esi)
|
|
Packit |
c32a2d |
jmp 4f
|
|
Packit |
c32a2d |
1: movw $32767,(%esi)
|
|
Packit |
c32a2d |
jmp 3f
|
|
Packit |
c32a2d |
2: movw $-32768,(%esi)
|
|
Packit |
c32a2d |
3: incl %edi
|
|
Packit |
c32a2d |
4:
|
|
Packit |
c32a2d |
.L67:
|
|
Packit |
c32a2d |
addl $-64,%ebx
|
|
Packit |
c32a2d |
addl $-128,%ecx
|
|
Packit |
c32a2d |
addl $4,%esi
|
|
Packit |
c32a2d |
decl %ebp
|
|
Packit |
c32a2d |
jnz .L68
|
|
Packit |
c32a2d |
movl %edi,%eax
|
|
Packit |
c32a2d |
popl %ebx
|
|
Packit |
c32a2d |
popl %esi
|
|
Packit |
c32a2d |
popl %edi
|
|
Packit |
c32a2d |
popl %ebp
|
|
Packit |
c32a2d |
addl $12,%esp
|
|
Packit |
c32a2d |
ret
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
|
|
Packit |
c32a2d |
L_dct64_i386:
|
|
Packit |
c32a2d |
.indirect_symbol ASM_NAME(dct64_i386)
|
|
Packit |
c32a2d |
hlt ; hlt ; hlt ; hlt ; hlt
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
NONEXEC_STACK
|