Blame src/libmpg123/synth_i586_dither.S

Packit c32a2d
/*
Packit c32a2d
	decode_i586_dither: asm synth with dither noise
Packit c32a2d
Packit c32a2d
	copyright ?-2007 by the mpg123 project - free software under the terms of the LGPL 2.1
Packit c32a2d
	see COPYING and AUTHORS files in distribution or http://mpg123.org
Packit c32a2d
	initially written by Stefan Bieschewski as decode_i586.s without dither
Packit c32a2d
Packit c32a2d
	This version uses "circular" 64k dither noise.
Packit c32a2d
	(Patch by Adrian <adrian.bacon@xs4all.nl>)
Packit c32a2d
Packit c32a2d
	Thomas learned something about assembler and the stack while making this one thread safe (removing static data).
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#include "mangle.h"
Packit c32a2d
Packit c32a2d
.data
Packit c32a2d
#ifndef __APPLE__
Packit c32a2d
		.section	.rodata
Packit c32a2d
#endif
Packit c32a2d
	ALIGN8
Packit c32a2d
.LC0:
Packit c32a2d
	.long 0x0,0x40dfffc0
Packit c32a2d
	ALIGN8
Packit c32a2d
.LC1:
Packit c32a2d
	.long 0x0,0xc0e00000
Packit c32a2d
	ALIGN8
Packit c32a2d
.text
Packit c32a2d
/* int synth_1to1_i586_asm_dither(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int bo_and_ditherindex[2], real *decwin, real* dithernoise); */
Packit c32a2d
.globl ASM_NAME(synth_1to1_i586_asm_dither)
Packit c32a2d
ASM_NAME(synth_1to1_i586_asm_dither):
Packit c32a2d
	subl $16,%esp
Packit c32a2d
	pushl %ebp
Packit c32a2d
	pushl %edi
Packit c32a2d
	pushl %esi
Packit c32a2d
	pushl %ebx
Packit c32a2d
/* stack: 0(%esp)=%ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo 56=decwin 60=dithernoise */
Packit c32a2d
#define BANDPTR 36(%esp)
Packit c32a2d
#define CHANNEL 40(%esp)
Packit c32a2d
#define OUT     44(%esp)
Packit c32a2d
#define BUFFS   48(%esp)
Packit c32a2d
#define BO      52(%esp)
Packit c32a2d
#define DECWIN  56(%esp)
Packit c32a2d
#define DITHERNOISE 60(%esp)
Packit c32a2d
/*#define DITHERNOISE $(ASM_NAME(dithernoise))*/
Packit c32a2d
#define LOC0    16(%esp)
Packit c32a2d
#define LOC1    20(%esp)
Packit c32a2d
#define LOC2    24(%esp)
Packit c32a2d
#define DITHERINDEX  28(%esp)
Packit c32a2d
/* During application of the dithering, we need the shifted locations because there's an additional value on the stack. */
Packit c32a2d
#define DITHERNOISE2 64(%esp)
Packit c32a2d
#define DITHERINDEX2 32(%esp)
Packit c32a2d
Packit c32a2d
	movl BANDPTR,%eax
Packit c32a2d
	movl OUT,%esi
Packit c32a2d
	movl BO, %ebx
Packit c32a2d
	movl (%ebx),%ebp    /* get bo value */
Packit c32a2d
	movl 4(%ebx),%edi;  /* get the ditherindex behind bo */
Packit c32a2d
	movl %edi,DITHERINDEX
Packit c32a2d
	xorl %edi,%edi
Packit c32a2d
	cmpl %edi,CHANNEL
Packit c32a2d
	jne .L48
Packit c32a2d
	decl %ebp
Packit c32a2d
	andl $15,%ebp
Packit c32a2d
	movl %ebp,(%ebx)   /* save bo back */
Packit c32a2d
	movl BUFFS,%ecx
Packit c32a2d
	jmp .L49
Packit c32a2d
.L48:
Packit c32a2d
/*       In stereo mode , "rewind" dither pointer 32 samples , so 2nd channel */
Packit c32a2d
/*       has same dither values. Tested OK for mono and stereo MP2 and MP3 */
Packit c32a2d
	subl $128,DITHERINDEX /* better move to %edi for the two calculations? */
Packit c32a2d
	andl $0x0003fffc,DITHERINDEX
Packit c32a2d
	addl $2,%esi
Packit c32a2d
	movl BUFFS,%ecx
Packit c32a2d
	addl $2176,%ecx
Packit c32a2d
.L49:
Packit c32a2d
/* now the call of dct64 is prepared, stuff pushed to the stack, but soon after it's removed again */
Packit c32a2d
	testl $1,%ebp
Packit c32a2d
	je .L50
Packit c32a2d
	movl %ecx,%ebx
Packit c32a2d
	movl %ebp,LOC0
Packit c32a2d
	pushl %eax
Packit c32a2d
	movl LOC1,%edx
Packit c32a2d
	leal (%ebx,%edx,4),%eax
Packit c32a2d
	pushl %eax
Packit c32a2d
	movl LOC2,%eax
Packit c32a2d
	incl %eax
Packit c32a2d
	andl $15,%eax
Packit c32a2d
	leal 1088(,%eax,4),%eax
Packit c32a2d
	addl %ebx,%eax
Packit c32a2d
	jmp .L74
Packit c32a2d
.L50:
Packit c32a2d
	leal 1088(%ecx),%ebx
Packit c32a2d
	leal 1(%ebp),%edx
Packit c32a2d
	movl %edx,LOC0
Packit c32a2d
	pushl %eax
Packit c32a2d
	leal 1092(%ecx,%ebp,4),%eax
Packit c32a2d
	pushl %eax
Packit c32a2d
	leal (%ecx,%ebp,4),%eax
Packit c32a2d
.L74:
Packit c32a2d
	pushl %eax
Packit c32a2d
	call FUNC(dct64_i386)
Packit c32a2d
	addl $12,%esp
Packit c32a2d
/* Now removed the parameters.
Packit c32a2d
   stack: 0(%esp)=%ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo 56=decwin 60=dithernoise */
Packit c32a2d
	movl LOC0,%edx
Packit c32a2d
	leal 0(,%edx,4),%edx
Packit c32a2d
	/* movl ASM_VALUE(decwin)+64,%eax */
Packit c32a2d
	movl DECWIN,%eax
Packit c32a2d
	addl $64,%eax
Packit c32a2d
	movl %eax,%ecx
Packit c32a2d
	subl %edx,%ecx
Packit c32a2d
	movl $16,%ebp
Packit c32a2d
.L55:
Packit c32a2d
	flds (%ecx)
Packit c32a2d
	fmuls (%ebx)
Packit c32a2d
	flds 4(%ecx)
Packit c32a2d
	fmuls 4(%ebx)
Packit c32a2d
	fxch %st(1)
Packit c32a2d
	flds 8(%ecx)
Packit c32a2d
	fmuls 8(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 12(%ecx)
Packit c32a2d
	fmuls 12(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 16(%ecx)
Packit c32a2d
	fmuls 16(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 20(%ecx)
Packit c32a2d
	fmuls 20(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 24(%ecx)
Packit c32a2d
	fmuls 24(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 28(%ecx)
Packit c32a2d
	fmuls 28(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 32(%ecx)
Packit c32a2d
	fmuls 32(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 36(%ecx)
Packit c32a2d
	fmuls 36(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 40(%ecx)
Packit c32a2d
	fmuls 40(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 44(%ecx)
Packit c32a2d
	fmuls 44(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 48(%ecx)
Packit c32a2d
	fmuls 48(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 52(%ecx)
Packit c32a2d
	fmuls 52(%ebx)
Packit c32a2d
	fxch %st(2)         
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 56(%ecx)
Packit c32a2d
	fmuls 56(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds 60(%ecx)
Packit c32a2d
	fmuls 60(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	subl $4,%esp
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	fxch %st(1)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
Packit c32a2d
	addl $4,DITHERINDEX2
Packit c32a2d
	andl $0x0003fffc,DITHERINDEX2
Packit c32a2d
	movl DITHERNOISE2,%edi
Packit c32a2d
	addl DITHERINDEX2,%edi	
Packit c32a2d
Packit c32a2d
	fadds (%edi)
Packit c32a2d
Packit c32a2d
/* fistpl and popl as a unit keep the stack unchanged */
Packit c32a2d
	fistpl (%esp)
Packit c32a2d
	popl %eax
Packit c32a2d
	cmpl $32767,%eax
Packit c32a2d
	jg 1f
Packit c32a2d
	cmpl $-32768,%eax
Packit c32a2d
	jl 2f
Packit c32a2d
	movw %ax,(%esi)
Packit c32a2d
	jmp 4f
Packit c32a2d
1:	movw $32767,(%esi)
Packit c32a2d
	jmp 3f
Packit c32a2d
2:	movw $-32768,(%esi)
Packit c32a2d
3:
Packit c32a2d
/*	incl %edi */
Packit c32a2d
4:
Packit c32a2d
.L54:
Packit c32a2d
	addl $64,%ebx
Packit c32a2d
	subl $-128,%ecx
Packit c32a2d
	addl $4,%esi
Packit c32a2d
	decl %ebp
Packit c32a2d
	jnz .L55
Packit c32a2d
	flds (%ecx)
Packit c32a2d
	fmuls (%ebx)
Packit c32a2d
	flds 8(%ecx)
Packit c32a2d
	fmuls 8(%ebx)
Packit c32a2d
	flds 16(%ecx)
Packit c32a2d
	fmuls 16(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 24(%ecx)
Packit c32a2d
	fmuls 24(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 32(%ecx)
Packit c32a2d
	fmuls 32(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 40(%ecx)
Packit c32a2d
	fmuls 40(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 48(%ecx)
Packit c32a2d
	fmuls 48(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	flds 56(%ecx)
Packit c32a2d
	fmuls 56(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	subl $4,%esp
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
	fxch %st(1)
Packit c32a2d
	faddp %st,%st(1)
Packit c32a2d
Packit c32a2d
	addl $4,DITHERINDEX2
Packit c32a2d
	andl $0x0003fffc,DITHERINDEX2
Packit c32a2d
	movl DITHERNOISE2,%edi
Packit c32a2d
	addl DITHERINDEX2,%edi	
Packit c32a2d
Packit c32a2d
	fadds (%edi)
Packit c32a2d
/* fistpl and popl as a unit keep the stack unchanged */
Packit c32a2d
	fistpl (%esp)
Packit c32a2d
	popl %eax
Packit c32a2d
	cmpl $32767,%eax
Packit c32a2d
	jg 1f
Packit c32a2d
	cmpl $-32768,%eax
Packit c32a2d
	jl 2f
Packit c32a2d
	movw %ax,(%esi)
Packit c32a2d
	jmp 4f
Packit c32a2d
1:	movw $32767,(%esi)
Packit c32a2d
	jmp 3f
Packit c32a2d
2:	movw $-32768,(%esi)
Packit c32a2d
3:
Packit c32a2d
/*	incl %edi */
Packit c32a2d
4:
Packit c32a2d
.L62:
Packit c32a2d
	addl $-64,%ebx
Packit c32a2d
	addl $4,%esi
Packit c32a2d
	movl LOC0,%edx
Packit c32a2d
	leal -128(%ecx,%edx,8),%ecx
Packit c32a2d
	movl $15,%ebp
Packit c32a2d
.L68:
Packit c32a2d
	flds -4(%ecx)
Packit c32a2d
	fchs
Packit c32a2d
	fmuls (%ebx)
Packit c32a2d
	flds -8(%ecx)
Packit c32a2d
	fmuls 4(%ebx)
Packit c32a2d
	fxch %st(1)
Packit c32a2d
	flds -12(%ecx)
Packit c32a2d
	fmuls 8(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -16(%ecx)
Packit c32a2d
	fmuls 12(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -20(%ecx)
Packit c32a2d
	fmuls 16(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -24(%ecx)
Packit c32a2d
	fmuls 20(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -28(%ecx)
Packit c32a2d
	fmuls 24(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -32(%ecx)
Packit c32a2d
	fmuls 28(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -36(%ecx)
Packit c32a2d
	fmuls 32(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -40(%ecx)
Packit c32a2d
	fmuls 36(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -44(%ecx)
Packit c32a2d
	fmuls 40(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -48(%ecx)
Packit c32a2d
	fmuls 44(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -52(%ecx)
Packit c32a2d
	fmuls 48(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -56(%ecx)
Packit c32a2d
	fmuls 52(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds -60(%ecx)
Packit c32a2d
	fmuls 56(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	flds (%ecx)
Packit c32a2d
	fmuls 60(%ebx)
Packit c32a2d
	fxch %st(2)
Packit c32a2d
	subl $4,%esp
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
	fxch %st(1)
Packit c32a2d
	fsubrp %st,%st(1)
Packit c32a2d
Packit c32a2d
	addl $4,DITHERINDEX2
Packit c32a2d
	andl $0x0003fffc,DITHERINDEX2
Packit c32a2d
	movl DITHERNOISE2,%edi
Packit c32a2d
	addl DITHERINDEX2,%edi	
Packit c32a2d
Packit c32a2d
	fadds (%edi)
Packit c32a2d
/* fistpl and popl as a unit keep the stack unchanged */
Packit c32a2d
	fistpl (%esp)
Packit c32a2d
	popl %eax
Packit c32a2d
	cmpl $32767,%eax
Packit c32a2d
	jg 1f
Packit c32a2d
	cmpl $-32768,%eax
Packit c32a2d
	jl 2f
Packit c32a2d
	movw %ax,(%esi)
Packit c32a2d
	jmp 4f
Packit c32a2d
1:	movw $32767,(%esi)
Packit c32a2d
	jmp 3f
Packit c32a2d
2:	movw $-32768,(%esi)
Packit c32a2d
3:
Packit c32a2d
/*	incl %edi */
Packit c32a2d
4:
Packit c32a2d
.L67:
Packit c32a2d
	addl $-64,%ebx
Packit c32a2d
	addl $-128,%ecx
Packit c32a2d
	addl $4,%esi
Packit c32a2d
	decl %ebp
Packit c32a2d
	jnz .L68
Packit c32a2d
/* return ipv edi 0 in eax */
Packit c32a2d
	movl $0,%eax
Packit c32a2d
/* save ditherindex */
Packit c32a2d
	movl BO,%ebx
Packit c32a2d
	movl DITHERINDEX,%esi
Packit c32a2d
	movl %esi,4(%ebx);
Packit c32a2d
/* stack: 0=ebx 4=esi 8=edi 12=ebp 16,20,24,28=local 32=back 36=bandptr 40=channel 44=out 48=buffs 52=bo */
Packit c32a2d
	popl %ebx
Packit c32a2d
	popl %esi
Packit c32a2d
	popl %edi
Packit c32a2d
	popl %ebp
Packit c32a2d
	addl $16,%esp
Packit c32a2d
/* The stack must be now: 0=back 4=bandptr 8=channel 12=out 16=buffs 20=bo */
Packit c32a2d
	ret
Packit c32a2d
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
Packit c32a2d
L_dct64_i386:
Packit c32a2d
	.indirect_symbol ASM_NAME(dct64_i386)
Packit c32a2d
	hlt ; hlt ; hlt ; hlt ; hlt
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
NONEXEC_STACK