Blame src/libmpg123/synth_stereo_sse_float.S

Packit c32a2d
/*
Packit c32a2d
	synth_sse_float: SSE optimized synth (stereo specific, float output version)
Packit c32a2d
Packit c32a2d
	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
Packit c32a2d
	see COPYING and AUTHORS files in distribution or http://mpg123.org
Packit c32a2d
	initially written by Taihei Monma
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#include "mangle.h"
Packit c32a2d
Packit c32a2d
/* real *window; */
Packit c32a2d
#define WINDOW %ebx
Packit c32a2d
/* real *b0l; */
Packit c32a2d
#define B0L %edx
Packit c32a2d
/* real *b0r; */
Packit c32a2d
#define B0R %esi
Packit c32a2d
/* real *samples; */
Packit c32a2d
#define SAMPLES %edi
Packit c32a2d
Packit c32a2d
#define TEMP(n) (12+16*n)(%esp)
Packit c32a2d
Packit c32a2d
/*
Packit c32a2d
	int synth_1to1_real_s_sse_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
Packit c32a2d
	return value: number of clipped samples (0)
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#ifndef __APPLE__
Packit c32a2d
	.section	.rodata
Packit c32a2d
#else
Packit c32a2d
	.data
Packit c32a2d
#endif
Packit c32a2d
	ALIGN32
Packit c32a2d
scale_sse:
Packit c32a2d
	.long   939524096
Packit c32a2d
	.long   939524096
Packit c32a2d
	.long   939524096
Packit c32a2d
	.long   939524096
Packit c32a2d
	.text
Packit c32a2d
	ALIGN16
Packit c32a2d
.globl ASM_NAME(synth_1to1_real_s_sse_asm)
Packit c32a2d
ASM_NAME(synth_1to1_real_s_sse_asm):
Packit c32a2d
	pushl		%ebp
Packit c32a2d
	movl		%esp, %ebp
Packit c32a2d
	andl		$-16, %esp
Packit c32a2d
	subl		$128, %esp
Packit c32a2d
	pushl		%ebx
Packit c32a2d
	pushl		%esi
Packit c32a2d
	pushl		%edi
Packit c32a2d
	
Packit c32a2d
	movl		8(%ebp), WINDOW
Packit c32a2d
	movl		12(%ebp), B0L
Packit c32a2d
	movl		16(%ebp), B0R
Packit c32a2d
	movl		20(%ebp), SAMPLES
Packit c32a2d
	movl		24(%ebp), %eax
Packit c32a2d
	shll		$2, %eax
Packit c32a2d
	
Packit c32a2d
	leal		64(WINDOW), WINDOW
Packit c32a2d
	subl		%eax, WINDOW
Packit c32a2d
Packit c32a2d
	#undef _EBX_
Packit c32a2d
	#define _EBX_ %eax
Packit c32a2d
	GET_GOT
Packit c32a2d
Packit c32a2d
	movl		$4, %ecx
Packit c32a2d
	
Packit c32a2d
	ALIGN16
Packit c32a2d
1:
Packit c32a2d
	movups		(WINDOW), %xmm0
Packit c32a2d
	movups		16(WINDOW), %xmm1
Packit c32a2d
	movups		32(WINDOW), %xmm2
Packit c32a2d
	movups		48(WINDOW), %xmm3
Packit c32a2d
	movaps		%xmm0, %xmm4
Packit c32a2d
	movaps		%xmm1, %xmm5
Packit c32a2d
	movaps		%xmm2, %xmm6
Packit c32a2d
	movaps		%xmm3, %xmm7
Packit c32a2d
	mulps		0(B0L), %xmm0
Packit c32a2d
	mulps		16(B0L), %xmm1
Packit c32a2d
	mulps		32(B0L), %xmm2
Packit c32a2d
	mulps		48(B0L), %xmm3
Packit c32a2d
	mulps		0(B0R), %xmm4
Packit c32a2d
	mulps		16(B0R), %xmm5
Packit c32a2d
	mulps		32(B0R), %xmm6
Packit c32a2d
	mulps		48(B0R), %xmm7
Packit c32a2d
	addps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm3, %xmm2
Packit c32a2d
	addps		%xmm5, %xmm4
Packit c32a2d
	addps		%xmm7, %xmm6
Packit c32a2d
	addps		%xmm2, %xmm0
Packit c32a2d
	addps		%xmm6, %xmm4
Packit c32a2d
	movaps		%xmm0, TEMP(0)
Packit c32a2d
	movaps		%xmm4, TEMP(4)
Packit c32a2d
	
Packit c32a2d
	leal		128(WINDOW), WINDOW
Packit c32a2d
	leal		64(B0L), B0L
Packit c32a2d
	leal		64(B0R), B0R
Packit c32a2d
	
Packit c32a2d
	movups		(WINDOW), %xmm0
Packit c32a2d
	movups		16(WINDOW), %xmm1
Packit c32a2d
	movups		32(WINDOW), %xmm2
Packit c32a2d
	movups		48(WINDOW), %xmm3
Packit c32a2d
	movaps		%xmm0, %xmm4
Packit c32a2d
	movaps		%xmm1, %xmm5
Packit c32a2d
	movaps		%xmm2, %xmm6
Packit c32a2d
	movaps		%xmm3, %xmm7
Packit c32a2d
	mulps		0(B0L), %xmm0
Packit c32a2d
	mulps		16(B0L), %xmm1
Packit c32a2d
	mulps		32(B0L), %xmm2
Packit c32a2d
	mulps		48(B0L), %xmm3
Packit c32a2d
	mulps		0(B0R), %xmm4
Packit c32a2d
	mulps		16(B0R), %xmm5
Packit c32a2d
	mulps		32(B0R), %xmm6
Packit c32a2d
	mulps		48(B0R), %xmm7
Packit c32a2d
	addps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm3, %xmm2
Packit c32a2d
	addps		%xmm5, %xmm4
Packit c32a2d
	addps		%xmm7, %xmm6
Packit c32a2d
	addps		%xmm2, %xmm0
Packit c32a2d
	addps		%xmm6, %xmm4
Packit c32a2d
	movaps		%xmm0, TEMP(1)
Packit c32a2d
	movaps		%xmm4, TEMP(5)
Packit c32a2d
	
Packit c32a2d
	leal		128(WINDOW), WINDOW
Packit c32a2d
	leal		64(B0L), B0L
Packit c32a2d
	leal		64(B0R), B0R
Packit c32a2d
	
Packit c32a2d
	movups		(WINDOW), %xmm0
Packit c32a2d
	movups		16(WINDOW), %xmm1
Packit c32a2d
	movups		32(WINDOW), %xmm2
Packit c32a2d
	movups		48(WINDOW), %xmm3
Packit c32a2d
	movaps		%xmm0, %xmm4
Packit c32a2d
	movaps		%xmm1, %xmm5
Packit c32a2d
	movaps		%xmm2, %xmm6
Packit c32a2d
	movaps		%xmm3, %xmm7
Packit c32a2d
	mulps		0(B0L), %xmm0
Packit c32a2d
	mulps		16(B0L), %xmm1
Packit c32a2d
	mulps		32(B0L), %xmm2
Packit c32a2d
	mulps		48(B0L), %xmm3
Packit c32a2d
	mulps		0(B0R), %xmm4
Packit c32a2d
	mulps		16(B0R), %xmm5
Packit c32a2d
	mulps		32(B0R), %xmm6
Packit c32a2d
	mulps		48(B0R), %xmm7
Packit c32a2d
	addps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm3, %xmm2
Packit c32a2d
	addps		%xmm5, %xmm4
Packit c32a2d
	addps		%xmm7, %xmm6
Packit c32a2d
	addps		%xmm2, %xmm0
Packit c32a2d
	addps		%xmm6, %xmm4
Packit c32a2d
	movaps		%xmm0, TEMP(2)
Packit c32a2d
	movaps		%xmm4, TEMP(6)
Packit c32a2d
	
Packit c32a2d
	leal		128(WINDOW), WINDOW
Packit c32a2d
	leal		64(B0L), B0L
Packit c32a2d
	leal		64(B0R), B0R
Packit c32a2d
	
Packit c32a2d
	movups		(WINDOW), %xmm0
Packit c32a2d
	movups		16(WINDOW), %xmm1
Packit c32a2d
	movups		32(WINDOW), %xmm2
Packit c32a2d
	movups		48(WINDOW), %xmm3
Packit c32a2d
	movaps		%xmm0, %xmm4
Packit c32a2d
	movaps		%xmm1, %xmm5
Packit c32a2d
	movaps		%xmm2, %xmm6
Packit c32a2d
	movaps		%xmm3, %xmm7
Packit c32a2d
	mulps		0(B0L), %xmm0
Packit c32a2d
	mulps		16(B0L), %xmm1
Packit c32a2d
	mulps		32(B0L), %xmm2
Packit c32a2d
	mulps		48(B0L), %xmm3
Packit c32a2d
	mulps		0(B0R), %xmm4
Packit c32a2d
	mulps		16(B0R), %xmm5
Packit c32a2d
	mulps		32(B0R), %xmm6
Packit c32a2d
	mulps		48(B0R), %xmm7
Packit c32a2d
	addps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm3, %xmm2
Packit c32a2d
	addps		%xmm5, %xmm4
Packit c32a2d
	addps		%xmm7, %xmm6
Packit c32a2d
	addps		%xmm2, %xmm0
Packit c32a2d
	addps		%xmm6, %xmm4
Packit c32a2d
	movaps		%xmm0, %xmm7
Packit c32a2d
	movaps		%xmm4, TEMP(7)
Packit c32a2d
	
Packit c32a2d
	leal		128(WINDOW), WINDOW
Packit c32a2d
	leal		64(B0L), B0L
Packit c32a2d
	leal		64(B0R), B0R
Packit c32a2d
	
Packit c32a2d
	movaps		TEMP(0), %xmm4
Packit c32a2d
	movaps		TEMP(1), %xmm5
Packit c32a2d
	movaps		TEMP(2), %xmm6
Packit c32a2d
	movaps		%xmm4, %xmm0
Packit c32a2d
	movaps		%xmm6, %xmm1
Packit c32a2d
	unpcklps	%xmm5, %xmm4
Packit c32a2d
	unpcklps	%xmm7, %xmm6
Packit c32a2d
	unpckhps	%xmm5, %xmm0
Packit c32a2d
	unpckhps	%xmm7, %xmm1
Packit c32a2d
	movaps		%xmm4, %xmm2
Packit c32a2d
	movaps		%xmm0, %xmm3
Packit c32a2d
	movlhps		%xmm6, %xmm4
Packit c32a2d
	movhlps		%xmm2, %xmm6
Packit c32a2d
	movlhps		%xmm1, %xmm0
Packit c32a2d
	movhlps		%xmm3, %xmm1
Packit c32a2d
	subps		%xmm6, %xmm4
Packit c32a2d
	subps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm4, %xmm0
Packit c32a2d
	movaps		%xmm0, %xmm2
Packit c32a2d
	
Packit c32a2d
	movaps		TEMP(4), %xmm4
Packit c32a2d
	movaps		TEMP(5), %xmm5
Packit c32a2d
	movaps		TEMP(6), %xmm6
Packit c32a2d
	movaps		TEMP(7), %xmm7
Packit c32a2d
	movaps		%xmm4, %xmm0
Packit c32a2d
	movaps		%xmm6, %xmm1
Packit c32a2d
	unpcklps	%xmm5, %xmm4
Packit c32a2d
	unpcklps	%xmm7, %xmm6
Packit c32a2d
	unpckhps	%xmm5, %xmm0
Packit c32a2d
	unpckhps	%xmm7, %xmm1
Packit c32a2d
	movaps		%xmm2, %xmm5
Packit c32a2d
	movaps		%xmm4, %xmm2
Packit c32a2d
	movaps		%xmm0, %xmm3
Packit c32a2d
	movlhps		%xmm6, %xmm4
Packit c32a2d
	movhlps		%xmm2, %xmm6
Packit c32a2d
	movlhps		%xmm1, %xmm0
Packit c32a2d
	movhlps		%xmm3, %xmm1
Packit c32a2d
	subps		%xmm6, %xmm4
Packit c32a2d
	subps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm4, %xmm0
Packit c32a2d
	
Packit c32a2d
	mulps		LOCAL_VAR(scale_sse), %xmm5
Packit c32a2d
	mulps		LOCAL_VAR(scale_sse), %xmm0
Packit c32a2d
	movaps		%xmm5, %xmm1
Packit c32a2d
	unpcklps	%xmm0, %xmm5
Packit c32a2d
	unpckhps	%xmm0, %xmm1
Packit c32a2d
	movups		%xmm5, (SAMPLES)
Packit c32a2d
	movups		%xmm1, 16(SAMPLES)
Packit c32a2d
	
Packit c32a2d
	leal		32(SAMPLES), SAMPLES
Packit c32a2d
	decl		%ecx
Packit c32a2d
	jnz			1b
Packit c32a2d
	
Packit c32a2d
	movl		$4, %ecx
Packit c32a2d
	
Packit c32a2d
	ALIGN16
Packit c32a2d
1:
Packit c32a2d
	movups		(WINDOW), %xmm0
Packit c32a2d
	movups		16(WINDOW), %xmm1
Packit c32a2d
	movups		32(WINDOW), %xmm2
Packit c32a2d
	movups		48(WINDOW), %xmm3
Packit c32a2d
	movaps		%xmm0, %xmm4
Packit c32a2d
	movaps		%xmm1, %xmm5
Packit c32a2d
	movaps		%xmm2, %xmm6
Packit c32a2d
	movaps		%xmm3, %xmm7
Packit c32a2d
	mulps		0(B0L), %xmm0
Packit c32a2d
	mulps		16(B0L), %xmm1
Packit c32a2d
	mulps		32(B0L), %xmm2
Packit c32a2d
	mulps		48(B0L), %xmm3
Packit c32a2d
	mulps		0(B0R), %xmm4
Packit c32a2d
	mulps		16(B0R), %xmm5
Packit c32a2d
	mulps		32(B0R), %xmm6
Packit c32a2d
	mulps		48(B0R), %xmm7
Packit c32a2d
	addps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm3, %xmm2
Packit c32a2d
	addps		%xmm5, %xmm4
Packit c32a2d
	addps		%xmm7, %xmm6
Packit c32a2d
	addps		%xmm2, %xmm0
Packit c32a2d
	addps		%xmm6, %xmm4
Packit c32a2d
	movaps		%xmm0, TEMP(0)
Packit c32a2d
	movaps		%xmm4, TEMP(4)
Packit c32a2d
	
Packit c32a2d
	leal		128(WINDOW), WINDOW
Packit c32a2d
	leal		-64(B0L), B0L
Packit c32a2d
	leal		-64(B0R), B0R
Packit c32a2d
	
Packit c32a2d
	movups		(WINDOW), %xmm0
Packit c32a2d
	movups		16(WINDOW), %xmm1
Packit c32a2d
	movups		32(WINDOW), %xmm2
Packit c32a2d
	movups		48(WINDOW), %xmm3
Packit c32a2d
	movaps		%xmm0, %xmm4
Packit c32a2d
	movaps		%xmm1, %xmm5
Packit c32a2d
	movaps		%xmm2, %xmm6
Packit c32a2d
	movaps		%xmm3, %xmm7
Packit c32a2d
	mulps		0(B0L), %xmm0
Packit c32a2d
	mulps		16(B0L), %xmm1
Packit c32a2d
	mulps		32(B0L), %xmm2
Packit c32a2d
	mulps		48(B0L), %xmm3
Packit c32a2d
	mulps		0(B0R), %xmm4
Packit c32a2d
	mulps		16(B0R), %xmm5
Packit c32a2d
	mulps		32(B0R), %xmm6
Packit c32a2d
	mulps		48(B0R), %xmm7
Packit c32a2d
	addps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm3, %xmm2
Packit c32a2d
	addps		%xmm5, %xmm4
Packit c32a2d
	addps		%xmm7, %xmm6
Packit c32a2d
	addps		%xmm2, %xmm0
Packit c32a2d
	addps		%xmm6, %xmm4
Packit c32a2d
	movaps		%xmm0, TEMP(1)
Packit c32a2d
	movaps		%xmm4, TEMP(5)
Packit c32a2d
	
Packit c32a2d
	leal		128(WINDOW), WINDOW
Packit c32a2d
	leal		-64(B0L), B0L
Packit c32a2d
	leal		-64(B0R), B0R
Packit c32a2d
	
Packit c32a2d
	movups		(WINDOW), %xmm0
Packit c32a2d
	movups		16(WINDOW), %xmm1
Packit c32a2d
	movups		32(WINDOW), %xmm2
Packit c32a2d
	movups		48(WINDOW), %xmm3
Packit c32a2d
	movaps		%xmm0, %xmm4
Packit c32a2d
	movaps		%xmm1, %xmm5
Packit c32a2d
	movaps		%xmm2, %xmm6
Packit c32a2d
	movaps		%xmm3, %xmm7
Packit c32a2d
	mulps		0(B0L), %xmm0
Packit c32a2d
	mulps		16(B0L), %xmm1
Packit c32a2d
	mulps		32(B0L), %xmm2
Packit c32a2d
	mulps		48(B0L), %xmm3
Packit c32a2d
	mulps		0(B0R), %xmm4
Packit c32a2d
	mulps		16(B0R), %xmm5
Packit c32a2d
	mulps		32(B0R), %xmm6
Packit c32a2d
	mulps		48(B0R), %xmm7
Packit c32a2d
	addps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm3, %xmm2
Packit c32a2d
	addps		%xmm5, %xmm4
Packit c32a2d
	addps		%xmm7, %xmm6
Packit c32a2d
	addps		%xmm2, %xmm0
Packit c32a2d
	addps		%xmm6, %xmm4
Packit c32a2d
	movaps		%xmm0, TEMP(2)
Packit c32a2d
	movaps		%xmm4, TEMP(6)
Packit c32a2d
	
Packit c32a2d
	leal		128(WINDOW), WINDOW
Packit c32a2d
	leal		-64(B0L), B0L
Packit c32a2d
	leal		-64(B0R), B0R
Packit c32a2d
	
Packit c32a2d
	movups		(WINDOW), %xmm0
Packit c32a2d
	movups		16(WINDOW), %xmm1
Packit c32a2d
	movups		32(WINDOW), %xmm2
Packit c32a2d
	movups		48(WINDOW), %xmm3
Packit c32a2d
	movaps		%xmm0, %xmm4
Packit c32a2d
	movaps		%xmm1, %xmm5
Packit c32a2d
	movaps		%xmm2, %xmm6
Packit c32a2d
	movaps		%xmm3, %xmm7
Packit c32a2d
	mulps		0(B0L), %xmm0
Packit c32a2d
	mulps		16(B0L), %xmm1
Packit c32a2d
	mulps		32(B0L), %xmm2
Packit c32a2d
	mulps		48(B0L), %xmm3
Packit c32a2d
	mulps		0(B0R), %xmm4
Packit c32a2d
	mulps		16(B0R), %xmm5
Packit c32a2d
	mulps		32(B0R), %xmm6
Packit c32a2d
	mulps		48(B0R), %xmm7
Packit c32a2d
	addps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm3, %xmm2
Packit c32a2d
	addps		%xmm5, %xmm4
Packit c32a2d
	addps		%xmm7, %xmm6
Packit c32a2d
	addps		%xmm2, %xmm0
Packit c32a2d
	addps		%xmm6, %xmm4
Packit c32a2d
	movaps		%xmm0, %xmm7
Packit c32a2d
	movaps		%xmm4, TEMP(7)
Packit c32a2d
	
Packit c32a2d
	leal		128(WINDOW), WINDOW
Packit c32a2d
	leal		-64(B0L), B0L
Packit c32a2d
	leal		-64(B0R), B0R
Packit c32a2d
	
Packit c32a2d
	movaps		TEMP(0), %xmm4
Packit c32a2d
	movaps		TEMP(1), %xmm5
Packit c32a2d
	movaps		TEMP(2), %xmm6
Packit c32a2d
	movaps		%xmm4, %xmm0
Packit c32a2d
	movaps		%xmm6, %xmm1
Packit c32a2d
	unpcklps	%xmm5, %xmm4
Packit c32a2d
	unpcklps	%xmm7, %xmm6
Packit c32a2d
	unpckhps	%xmm5, %xmm0
Packit c32a2d
	unpckhps	%xmm7, %xmm1
Packit c32a2d
	movaps		%xmm4, %xmm2
Packit c32a2d
	movaps		%xmm0, %xmm3
Packit c32a2d
	movlhps		%xmm6, %xmm4
Packit c32a2d
	movhlps		%xmm2, %xmm6
Packit c32a2d
	movlhps		%xmm1, %xmm0
Packit c32a2d
	movhlps		%xmm3, %xmm1
Packit c32a2d
	addps		%xmm6, %xmm4
Packit c32a2d
	addps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm4, %xmm0
Packit c32a2d
	movaps		%xmm0, %xmm2
Packit c32a2d
	
Packit c32a2d
	movaps		TEMP(4), %xmm4
Packit c32a2d
	movaps		TEMP(5), %xmm5
Packit c32a2d
	movaps		TEMP(6), %xmm6
Packit c32a2d
	movaps		TEMP(7), %xmm7
Packit c32a2d
	movaps		%xmm4, %xmm0
Packit c32a2d
	movaps		%xmm6, %xmm1
Packit c32a2d
	unpcklps	%xmm5, %xmm4
Packit c32a2d
	unpcklps	%xmm7, %xmm6
Packit c32a2d
	unpckhps	%xmm5, %xmm0
Packit c32a2d
	unpckhps	%xmm7, %xmm1
Packit c32a2d
	movaps		%xmm2, %xmm5
Packit c32a2d
	movaps		%xmm4, %xmm2
Packit c32a2d
	movaps		%xmm0, %xmm3
Packit c32a2d
	movlhps		%xmm6, %xmm4
Packit c32a2d
	movhlps		%xmm2, %xmm6
Packit c32a2d
	movlhps		%xmm1, %xmm0
Packit c32a2d
	movhlps		%xmm3, %xmm1
Packit c32a2d
	addps		%xmm6, %xmm4
Packit c32a2d
	addps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm4, %xmm0
Packit c32a2d
	
Packit c32a2d
	mulps		LOCAL_VAR(scale_sse), %xmm5
Packit c32a2d
	mulps		LOCAL_VAR(scale_sse), %xmm0
Packit c32a2d
	movaps		%xmm5, %xmm1
Packit c32a2d
	unpcklps	%xmm0, %xmm5
Packit c32a2d
	unpckhps	%xmm0, %xmm1
Packit c32a2d
	movups		%xmm5, (SAMPLES)
Packit c32a2d
	movups		%xmm1, 16(SAMPLES)
Packit c32a2d
	
Packit c32a2d
	leal		32(SAMPLES), SAMPLES
Packit c32a2d
	decl		%ecx
Packit c32a2d
	jnz			1b
Packit c32a2d
	
Packit c32a2d
	xorl		%eax, %eax
Packit c32a2d
	
Packit c32a2d
	popl		%edi
Packit c32a2d
	popl		%esi
Packit c32a2d
	popl		%ebx
Packit c32a2d
	movl		%ebp, %esp
Packit c32a2d
	popl		%ebp
Packit c32a2d
	
Packit c32a2d
	ret
Packit c32a2d
Packit c32a2d
NONEXEC_STACK