Blame src/libmpg123/tabinit_mmx.S

Packit c32a2d
/*
Packit c32a2d
	tabinit_mmx: make_decode_tables_mmx
Packit c32a2d
Packit c32a2d
	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
Packit c32a2d
	see COPYING and AUTHORS files in distribution or http://mpg123.org
Packit c32a2d
	initially written by the mysterious higway (apparently)
Packit c32a2d
Packit c32a2d
	See synth_mmx.S about license history.
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#include "mangle.h"
Packit c32a2d
Packit c32a2d
.data
Packit c32a2d
	ALIGN32
Packit c32a2d
.globl ASM_NAME(costab_mmxsse)
Packit c32a2d
ASM_NAME(costab_mmxsse):
Packit c32a2d
	.long 1056974725
Packit c32a2d
	.long 1057056395
Packit c32a2d
	.long 1057223771
Packit c32a2d
	.long 1057485416
Packit c32a2d
	.long 1057855544
Packit c32a2d
	.long 1058356026
Packit c32a2d
	.long 1059019886
Packit c32a2d
	.long 1059897405
Packit c32a2d
	.long 1061067246
Packit c32a2d
	.long 1062657950
Packit c32a2d
	.long 1064892987
Packit c32a2d
	.long 1066774581
Packit c32a2d
	.long 1069414683
Packit c32a2d
	.long 1073984175
Packit c32a2d
	.long 1079645762
Packit c32a2d
	.long 1092815430
Packit c32a2d
	.long 1057005197
Packit c32a2d
	.long 1057342072
Packit c32a2d
	.long 1058087743
Packit c32a2d
	.long 1059427869
Packit c32a2d
	.long 1061799040
Packit c32a2d
	.long 1065862217
Packit c32a2d
	.long 1071413542
Packit c32a2d
	.long 1084439708
Packit c32a2d
	.long 1057128951
Packit c32a2d
	.long 1058664893
Packit c32a2d
	.long 1063675095
Packit c32a2d
	.long 1076102863
Packit c32a2d
	.long 1057655764
Packit c32a2d
	.long 1067924853
Packit c32a2d
	.long 1060439283
Packit c32a2d
	ALIGN32
Packit c32a2d
intwinbase:
Packit c32a2d
	.short      0,    -1,    -1,    -1,    -1,    -1,    -1,    -2
Packit c32a2d
	.short     -2,    -2,    -2,    -3,    -3,    -4,    -4,    -5
Packit c32a2d
	.short     -5,    -6,    -7,    -7,    -8,    -9,   -10,   -11
Packit c32a2d
	.short    -13,   -14,   -16,   -17,   -19,   -21,   -24,   -26
Packit c32a2d
	.short    -29,   -31,   -35,   -38,   -41,   -45,   -49,   -53
Packit c32a2d
	.short    -58,   -63,   -68,   -73,   -79,   -85,   -91,   -97
Packit c32a2d
	.short   -104,  -111,  -117,  -125,  -132,  -139,  -147,  -154
Packit c32a2d
	.short   -161,  -169,  -176,  -183,  -190,  -196,  -202,  -208
Packit c32a2d
	.short   -213,  -218,  -222,  -225,  -227,  -228,  -228,  -227
Packit c32a2d
	.short   -224,  -221,  -215,  -208,  -200,  -189,  -177,  -163
Packit c32a2d
	.short   -146,  -127,  -106,   -83,   -57,   -29,     2,    36
Packit c32a2d
	.short     72,   111,   153,   197,   244,   294,   347,   401
Packit c32a2d
	.short    459,   519,   581,   645,   711,   779,   848,   919
Packit c32a2d
	.short    991,  1064,  1137,  1210,  1283,  1356,  1428,  1498
Packit c32a2d
	.short   1567,  1634,  1698,  1759,  1817,  1870,  1919,  1962
Packit c32a2d
	.short   2001,  2032,  2057,  2075,  2085,  2087,  2080,  2063
Packit c32a2d
	.short   2037,  2000,  1952,  1893,  1822,  1739,  1644,  1535
Packit c32a2d
	.short   1414,  1280,  1131,   970,   794,   605,   402,   185
Packit c32a2d
	.short    -45,  -288,  -545,  -814, -1095, -1388, -1692, -2006
Packit c32a2d
	.short  -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788
Packit c32a2d
	.short  -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597
Packit c32a2d
	.short  -7910, -8209, -8491, -8755, -8998, -9219, -9416, -9585
Packit c32a2d
	.short  -9727, -9838, -9916, -9959, -9966, -9935, -9863, -9750
Packit c32a2d
	.short  -9592, -9389, -9139, -8840, -8492, -8092, -7640, -7134
Packit c32a2d
	.short  -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082
Packit c32a2d
	.short    -70,   998,  2122,  3300,  4533,  5818,  7154,  8540
Packit c32a2d
	.short   9975, 11455, 12980, 14548, 16155, 17799, 19478, 21189
Packit c32a2d
	.short  22929, 24694, 26482, 28289, 30112, 31947,-26209,-24360
Packit c32a2d
	.short -22511,-20664,-18824,-16994,-15179,-13383,-11610, -9863
Packit c32a2d
	.short  -8147, -6466, -4822, -3222, -1667,  -162,  1289,  2684
Packit c32a2d
	.short   4019,  5290,  6494,  7629,  8692,  9679, 10590, 11420
Packit c32a2d
	.short  12169, 12835, 13415, 13908, 14313, 14630, 14856, 14992
Packit c32a2d
	.short  15038
Packit c32a2d
Packit c32a2d
intwindiv:
Packit c32a2d
	.long 0x47800000			# 65536.0
Packit c32a2d
.text
Packit c32a2d
	ALIGN32
Packit c32a2d
/* void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins); */
Packit c32a2d
.globl ASM_NAME(make_decode_tables_mmx_asm)
Packit c32a2d
ASM_NAME(make_decode_tables_mmx_asm):
Packit c32a2d
	pushl %ebp
Packit c32a2d
	mov %esp,%ebp
Packit c32a2d
	sub $12,%esp
Packit c32a2d
	pushl %edi
Packit c32a2d
	pushl %esi
Packit c32a2d
	pushl %ebx
Packit c32a2d
Packit c32a2d
	GET_GOT
Packit c32a2d
	lea LOCAL_VAR(intwinbase),%edi
Packit c32a2d
	mov LOCAL_VAR(intwindiv),%ecx
Packit c32a2d
	mov %edi,-4(%ebp)
Packit c32a2d
	lea 444(%edi),%eax
Packit c32a2d
	mov %eax,-8(%ebp)
Packit c32a2d
	mov %ecx,-12(%ebp)
Packit c32a2d
Packit c32a2d
/* stack: -12(%ebp)=*intwindiv, -8(%ebp)=intwinbase+444, -4(%ebp)=intwinbase, ... ,
Packit c32a2d
          4(%ebp)=back, 8(%ebp)=scaleval, 12(%ebp)=decwin_mmx, 16(%ebp)=decwins */
Packit c32a2d
#define INTWINDIV -12(%ebp)
Packit c32a2d
#define INTWINBASE_PLUS_444 -8(%ebp)
Packit c32a2d
#define INTWINBASE -4(%ebp)
Packit c32a2d
#define SCALEVAL 8(%ebp)
Packit c32a2d
#define DECWIN_MMX 12(%ebp)
Packit c32a2d
#define DECWINS 16(%ebp)
Packit c32a2d
	
Packit c32a2d
	xorl %ecx,%ecx
Packit c32a2d
	xorl %ebx,%ebx
Packit c32a2d
	movl $32,%esi
Packit c32a2d
	
Packit c32a2d
	negl SCALEVAL	/* scaleval */
Packit c32a2d
	pushl $2	/* intwinbase step */
Packit c32a2d
.L00:
Packit c32a2d
	cmpl $528,%ecx
Packit c32a2d
	jnc .L02
Packit c32a2d
	movswl (%edi),%eax
Packit c32a2d
	cmpl INTWINBASE_PLUS_444,%edi
Packit c32a2d
	jc .L01
Packit c32a2d
	addl $60000,%eax
Packit c32a2d
.L01:
Packit c32a2d
	pushl %eax
Packit c32a2d
	fildl (%esp)
Packit c32a2d
	fdivs INTWINDIV
Packit c32a2d
	fimull SCALEVAL /* scaleval */
Packit c32a2d
/* eax used to be popped the line before... I'll just use it here a bit */
Packit c32a2d
	movl DECWIN_MMX,%eax /* decwin_mmx */
Packit c32a2d
	fsts    (%eax,%ecx,4)
Packit c32a2d
	fstps 64(%eax,%ecx,4)
Packit c32a2d
	popl %eax
Packit c32a2d
.L02:
Packit c32a2d
	leal -1(%esi),%edx
Packit c32a2d
	andl %ebx,%edx
Packit c32a2d
	cmpl $31,%edx
Packit c32a2d
	jnz .L03
Packit c32a2d
	addl $-1023,%ecx
Packit c32a2d
	testl %esi,%ebx
Packit c32a2d
	jz  .L03
Packit c32a2d
	negl SCALEVAL
Packit c32a2d
.L03:
Packit c32a2d
	addl %esi,%ecx
Packit c32a2d
	addl (%esp),%edi
Packit c32a2d
	incl %ebx
Packit c32a2d
	cmpl INTWINBASE,%edi
Packit c32a2d
	jz .L04
Packit c32a2d
	cmpl $256,%ebx
Packit c32a2d
	jnz .L00
Packit c32a2d
	negl (%esp)
Packit c32a2d
	jmp .L00
Packit c32a2d
.L04:
Packit c32a2d
	popl %eax
Packit c32a2d
Packit c32a2d
	xorl %ecx,%ecx
Packit c32a2d
	xorl %ebx,%ebx
Packit c32a2d
	pushl $2 /* paired with popl above */
Packit c32a2d
.L05:
Packit c32a2d
	cmpl $528,%ecx
Packit c32a2d
	jnc .L11
Packit c32a2d
	movswl (%edi),%eax
Packit c32a2d
	cmpl INTWINBASE_PLUS_444,%edi
Packit c32a2d
	jc .L06
Packit c32a2d
	addl $60000,%eax
Packit c32a2d
.L06:
Packit c32a2d
	cltd
Packit c32a2d
	imull SCALEVAL
Packit c32a2d
	shrdl $17,%edx,%eax
Packit c32a2d
	cmpl $32767,%eax
Packit c32a2d
	movl $1055,%edx
Packit c32a2d
	jle .L07
Packit c32a2d
	movl $32767,%eax
Packit c32a2d
	jmp .L08
Packit c32a2d
.L07:
Packit c32a2d
	cmpl $-32767,%eax
Packit c32a2d
	jge .L08
Packit c32a2d
	movl $-32767,%eax
Packit c32a2d
.L08:
Packit c32a2d
/* going to use ebx for decwins, watch the jumps */
Packit c32a2d
	pushl %ebx 
Packit c32a2d
	movl DECWINS,%ebx
Packit c32a2d
	cmpl $512,%ecx
Packit c32a2d
	jnc .L09
Packit c32a2d
	subl %ecx,%edx
Packit c32a2d
	movw %ax,(%ebx,%edx,2) /* decwins */
Packit c32a2d
	movw %ax,-32(%ebx,%edx,2)
Packit c32a2d
.L09:
Packit c32a2d
	testl $1,%ecx
Packit c32a2d
	jnz .L10
Packit c32a2d
	negl %eax
Packit c32a2d
.L10:
Packit c32a2d
	movw %ax,(%ebx,%ecx,2)
Packit c32a2d
	movw %ax,32(%ebx,%ecx,2)
Packit c32a2d
	popl %ebx /* that has to match the pushl before */
Packit c32a2d
.L11:
Packit c32a2d
	leal -1(%esi),%edx
Packit c32a2d
	andl %ebx,%edx
Packit c32a2d
	cmpl $31,%edx
Packit c32a2d
	jnz .L12
Packit c32a2d
	addl $-1023,%ecx
Packit c32a2d
	testl %esi,%ebx
Packit c32a2d
	jz  .L12
Packit c32a2d
	negl SCALEVAL
Packit c32a2d
.L12:
Packit c32a2d
	addl %esi,%ecx
Packit c32a2d
	addl (%esp),%edi
Packit c32a2d
	incl %ebx
Packit c32a2d
	cmpl INTWINBASE,%edi
Packit c32a2d
	jz .L13
Packit c32a2d
	cmpl $256,%ebx
Packit c32a2d
	jnz .L05
Packit c32a2d
	negl (%esp)
Packit c32a2d
	jmp .L05
Packit c32a2d
.L13:
Packit c32a2d
	popl %eax
Packit c32a2d
	
Packit c32a2d
	popl %ebx
Packit c32a2d
	popl %esi
Packit c32a2d
	popl %edi
Packit c32a2d
	mov %ebp,%esp
Packit c32a2d
	pop %ebp
Packit c32a2d
	ret
Packit c32a2d
Packit c32a2d
NONEXEC_STACK