Blame src/libmpg123/dct64_sse_float.S

Packit c32a2d
/*
Packit c32a2d
	dct64_sse_float: SSE optimized dct64 (float output version)
Packit c32a2d
Packit c32a2d
	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
Packit c32a2d
	see COPYING and AUTHORS files in distribution or http://mpg123.org
Packit c32a2d
	initially written by Taihei Monma
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#include "mangle.h"
Packit c32a2d
Packit c32a2d
#define ARG(n) (8+n*4)(%ebp)
Packit c32a2d
#define TEMP(n) (4+n*16)(%esp)
Packit c32a2d
#define TEMP_BYTE(n) (4+n)(%esp)
Packit c32a2d
Packit c32a2d
/*
Packit c32a2d
	void dct64_real_sse(real *out0, real *out1, real *samples);
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#ifndef __APPLE__
Packit c32a2d
	.section	.rodata
Packit c32a2d
#else
Packit c32a2d
	.data
Packit c32a2d
#endif
Packit c32a2d
	ALIGN16
Packit c32a2d
pnpn:
Packit c32a2d
	.long	0
Packit c32a2d
	.long	-2147483648
Packit c32a2d
	.long	0
Packit c32a2d
	.long	-2147483648
Packit c32a2d
	ALIGN16
Packit c32a2d
mask:
Packit c32a2d
	.long	-1
Packit c32a2d
	.long	-1
Packit c32a2d
	.long	-1
Packit c32a2d
	.long	0
Packit c32a2d
	
Packit c32a2d
	.text
Packit c32a2d
	ALIGN16
Packit c32a2d
.globl ASM_NAME(dct64_real_sse)
Packit c32a2d
ASM_NAME(dct64_real_sse):
Packit c32a2d
	pushl		%ebp
Packit c32a2d
	movl		%esp, %ebp
Packit c32a2d
	
Packit c32a2d
	andl		$-16, %esp /* align the stack at 16 bytes */
Packit c32a2d
	subl		$128, %esp /* reserve space for temporal store */
Packit c32a2d
	pushl		%ebx
Packit c32a2d
Packit c32a2d
	GET_GOT
Packit c32a2d
	
Packit c32a2d
	movl		ARG(2), %eax
Packit c32a2d
	
Packit c32a2d
	MOVUAPS 	(%eax), %xmm7
Packit c32a2d
	MOVUAPS 	16(%eax), %xmm6
Packit c32a2d
	MOVUAPS 	112(%eax), %xmm0
Packit c32a2d
	MOVUAPS 	96(%eax), %xmm1
Packit c32a2d
	shufps 		$0x1b, %xmm0, %xmm0
Packit c32a2d
	shufps 		$0x1b, %xmm1, %xmm1
Packit c32a2d
	movaps 		%xmm7, %xmm4
Packit c32a2d
	movaps		%xmm6, %xmm5
Packit c32a2d
	addps 		%xmm0, %xmm4
Packit c32a2d
	addps 		%xmm1, %xmm5
Packit c32a2d
	subps 		%xmm0, %xmm7
Packit c32a2d
	subps 		%xmm1, %xmm6
Packit c32a2d
	movaps		%xmm4, TEMP(0)
Packit c32a2d
	movaps		%xmm5, TEMP(1)
Packit c32a2d
	
Packit c32a2d
	MOVUAPS 	32(%eax), %xmm2
Packit c32a2d
	MOVUAPS 	48(%eax), %xmm3
Packit c32a2d
	MOVUAPS 	80(%eax), %xmm0
Packit c32a2d
	MOVUAPS 	64(%eax), %xmm1
Packit c32a2d
	shufps 		$0x1b, %xmm0, %xmm0
Packit c32a2d
	shufps 		$0x1b, %xmm1, %xmm1
Packit c32a2d
	movaps 		%xmm2, %xmm5
Packit c32a2d
	movaps		%xmm3, %xmm4
Packit c32a2d
	addps 		%xmm0, %xmm2
Packit c32a2d
	addps 		%xmm1, %xmm3
Packit c32a2d
	subps 		%xmm0, %xmm5
Packit c32a2d
	subps 		%xmm1, %xmm4
Packit c32a2d
	
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	mov			GLOBAL_VAR_PTR(costab_mmxsse), %eax
Packit c32a2d
#else
Packit c32a2d
	lea			GLOBAL_VAR(costab_mmxsse), %eax
Packit c32a2d
#endif
Packit c32a2d
	mulps		(%eax), %xmm7
Packit c32a2d
	mulps		16(%eax), %xmm6
Packit c32a2d
	mulps		32(%eax), %xmm5
Packit c32a2d
	mulps		48(%eax), %xmm4
Packit c32a2d
	
Packit c32a2d
	shufps		$0x1b, %xmm2, %xmm2
Packit c32a2d
	shufps		$0x1b, %xmm3, %xmm3
Packit c32a2d
	shufps		$0x1b, %xmm4, %xmm4
Packit c32a2d
	shufps		$0x1b, %xmm5, %xmm5
Packit c32a2d
	movaps		TEMP(0), %xmm0
Packit c32a2d
	movaps		TEMP(1), %xmm1
Packit c32a2d
	subps		%xmm3, %xmm0
Packit c32a2d
	subps		%xmm2, %xmm1
Packit c32a2d
	addps		TEMP(0), %xmm3
Packit c32a2d
	addps		TEMP(1), %xmm2
Packit c32a2d
	movaps		%xmm3, TEMP(0)
Packit c32a2d
	movaps		%xmm2, TEMP(1)
Packit c32a2d
	movaps		%xmm6, %xmm2
Packit c32a2d
	movaps		%xmm7, %xmm3
Packit c32a2d
	subps		%xmm5, %xmm6
Packit c32a2d
	subps		%xmm4, %xmm7
Packit c32a2d
	addps		%xmm3, %xmm4
Packit c32a2d
	addps		%xmm2, %xmm5
Packit c32a2d
	mulps		64(%eax), %xmm0
Packit c32a2d
	mulps		80(%eax), %xmm1
Packit c32a2d
	mulps		80(%eax), %xmm6
Packit c32a2d
	mulps		64(%eax), %xmm7
Packit c32a2d
	
Packit c32a2d
	movaps		TEMP(0), %xmm2
Packit c32a2d
	movaps		TEMP(1), %xmm3
Packit c32a2d
	shufps		$0x1b, %xmm3, %xmm3
Packit c32a2d
	shufps		$0x1b, %xmm5, %xmm5
Packit c32a2d
	shufps		$0x1b, %xmm1, %xmm1
Packit c32a2d
	shufps		$0x1b, %xmm6, %xmm6
Packit c32a2d
	movaps		%xmm0, TEMP(1)
Packit c32a2d
	subps		%xmm3, %xmm2
Packit c32a2d
	subps		%xmm1, %xmm0
Packit c32a2d
	addps		TEMP(0), %xmm3
Packit c32a2d
	addps		TEMP(1), %xmm1
Packit c32a2d
	movaps		%xmm3, TEMP(0)
Packit c32a2d
	movaps		%xmm1, TEMP(2)
Packit c32a2d
	movaps		%xmm5, %xmm1
Packit c32a2d
	movaps		%xmm4, %xmm5
Packit c32a2d
	movaps		%xmm7, %xmm3
Packit c32a2d
	subps		%xmm1, %xmm5
Packit c32a2d
	subps		%xmm6, %xmm7
Packit c32a2d
	addps		%xmm1, %xmm4
Packit c32a2d
	addps		%xmm3, %xmm6
Packit c32a2d
	mulps		96(%eax), %xmm2
Packit c32a2d
	mulps		96(%eax), %xmm0
Packit c32a2d
	mulps		96(%eax), %xmm5
Packit c32a2d
	mulps		96(%eax), %xmm7
Packit c32a2d
	movaps		%xmm2, TEMP(1)
Packit c32a2d
	movaps		%xmm0, TEMP(3)
Packit c32a2d
	
Packit c32a2d
	movaps		%xmm4, %xmm2
Packit c32a2d
	movaps		%xmm5, %xmm3
Packit c32a2d
	shufps		$0x44, %xmm6, %xmm2
Packit c32a2d
	shufps		$0xbb, %xmm7, %xmm5
Packit c32a2d
	shufps		$0xbb, %xmm6, %xmm4
Packit c32a2d
	shufps		$0x44, %xmm7, %xmm3
Packit c32a2d
	movaps		%xmm2, %xmm6
Packit c32a2d
	movaps		%xmm3, %xmm7
Packit c32a2d
	subps		%xmm4, %xmm2
Packit c32a2d
	subps		%xmm5, %xmm3
Packit c32a2d
	addps		%xmm6, %xmm4
Packit c32a2d
	addps		%xmm7, %xmm5
Packit c32a2d
	movaps		112(%eax), %xmm0
Packit c32a2d
	movlhps		%xmm0, %xmm0
Packit c32a2d
	mulps		%xmm0, %xmm2
Packit c32a2d
	mulps		%xmm0, %xmm3
Packit c32a2d
	movaps		%xmm0, TEMP(4)
Packit c32a2d
	movaps		%xmm4, %xmm6
Packit c32a2d
	movaps		%xmm5, %xmm7
Packit c32a2d
	shufps		$0x14, %xmm2, %xmm4
Packit c32a2d
	shufps		$0xbe, %xmm2, %xmm6
Packit c32a2d
	shufps		$0x14, %xmm3, %xmm5
Packit c32a2d
	shufps		$0xbe, %xmm3, %xmm7
Packit c32a2d
	movaps		%xmm5, TEMP(5)
Packit c32a2d
	movaps		%xmm7, TEMP(7)
Packit c32a2d
	
Packit c32a2d
	movaps		TEMP(0), %xmm0
Packit c32a2d
	movaps		TEMP(1), %xmm1
Packit c32a2d
	movaps		%xmm0, %xmm2
Packit c32a2d
	movaps		%xmm1, %xmm3
Packit c32a2d
	shufps		$0x44, TEMP(2), %xmm2
Packit c32a2d
	shufps		$0xbb, TEMP(3), %xmm1
Packit c32a2d
	shufps		$0xbb, TEMP(2), %xmm0
Packit c32a2d
	shufps		$0x44, TEMP(3), %xmm3
Packit c32a2d
	movaps		%xmm2, %xmm5
Packit c32a2d
	movaps		%xmm3, %xmm7
Packit c32a2d
	subps		%xmm0, %xmm2
Packit c32a2d
	subps		%xmm1, %xmm3
Packit c32a2d
	addps		%xmm5, %xmm0
Packit c32a2d
	addps		%xmm7, %xmm1
Packit c32a2d
	mulps		TEMP(4), %xmm2
Packit c32a2d
	mulps		TEMP(4), %xmm3
Packit c32a2d
	movaps		%xmm0, %xmm5
Packit c32a2d
	movaps		%xmm1, %xmm7
Packit c32a2d
	shufps		$0x14, %xmm2, %xmm0
Packit c32a2d
	shufps		$0xbe, %xmm2, %xmm5
Packit c32a2d
	shufps		$0x14, %xmm3, %xmm1
Packit c32a2d
	shufps		$0xbe, %xmm3, %xmm7
Packit c32a2d
	
Packit c32a2d
	movaps		%xmm0, TEMP(0)
Packit c32a2d
	movaps		%xmm1, TEMP(1)
Packit c32a2d
	movaps		%xmm5, TEMP(2)
Packit c32a2d
	movaps		%xmm7, TEMP(3)
Packit c32a2d
	
Packit c32a2d
	movss		120(%eax), %xmm5
Packit c32a2d
	shufps		$0x00, %xmm5, %xmm5
Packit c32a2d
	xorps		LOCAL_VAR(pnpn), %xmm5
Packit c32a2d
	
Packit c32a2d
	movaps		%xmm4, %xmm0
Packit c32a2d
	movaps		%xmm6, %xmm1
Packit c32a2d
	unpcklps	TEMP(5), %xmm4
Packit c32a2d
	unpckhps	TEMP(5), %xmm0
Packit c32a2d
	unpcklps	TEMP(7), %xmm6
Packit c32a2d
	unpckhps	TEMP(7), %xmm1
Packit c32a2d
	movaps		%xmm4, %xmm2
Packit c32a2d
	movaps		%xmm6, %xmm3
Packit c32a2d
	unpcklps	%xmm0, %xmm4
Packit c32a2d
	unpckhps	%xmm0, %xmm2
Packit c32a2d
	unpcklps	%xmm1, %xmm6
Packit c32a2d
	unpckhps	%xmm1, %xmm3
Packit c32a2d
	movaps		%xmm4, %xmm0
Packit c32a2d
	movaps		%xmm6, %xmm1
Packit c32a2d
	subps		%xmm2, %xmm0
Packit c32a2d
	subps		%xmm3, %xmm1
Packit c32a2d
	addps		%xmm2, %xmm4
Packit c32a2d
	addps		%xmm3, %xmm6
Packit c32a2d
	mulps		%xmm5, %xmm0
Packit c32a2d
	mulps		%xmm5, %xmm1
Packit c32a2d
	movaps		%xmm5, TEMP(5)
Packit c32a2d
	movaps		%xmm4, %xmm5
Packit c32a2d
	movaps		%xmm6, %xmm7
Packit c32a2d
	unpcklps	%xmm0, %xmm4
Packit c32a2d
	unpckhps	%xmm0, %xmm5
Packit c32a2d
	unpcklps	%xmm1, %xmm6
Packit c32a2d
	unpckhps	%xmm1, %xmm7
Packit c32a2d
	
Packit c32a2d
	movaps		TEMP(0), %xmm0
Packit c32a2d
	movaps		TEMP(2), %xmm2
Packit c32a2d
	movaps		%xmm4, TEMP(4)
Packit c32a2d
	movaps		%xmm6, TEMP(6)
Packit c32a2d
	
Packit c32a2d
	movaps		%xmm0, %xmm4
Packit c32a2d
	movaps		%xmm2, %xmm6
Packit c32a2d
	unpcklps	TEMP(1), %xmm0
Packit c32a2d
	unpckhps	TEMP(1), %xmm4
Packit c32a2d
	unpcklps	TEMP(3), %xmm2
Packit c32a2d
	unpckhps	TEMP(3), %xmm6
Packit c32a2d
	movaps		%xmm0, %xmm1
Packit c32a2d
	movaps		%xmm2, %xmm3
Packit c32a2d
	unpcklps	%xmm4, %xmm0
Packit c32a2d
	unpckhps	%xmm4, %xmm1
Packit c32a2d
	unpcklps	%xmm6, %xmm2
Packit c32a2d
	unpckhps	%xmm6, %xmm3
Packit c32a2d
	movaps		%xmm0, %xmm4
Packit c32a2d
	movaps		%xmm2, %xmm6
Packit c32a2d
	subps		%xmm1, %xmm4
Packit c32a2d
	subps		%xmm3, %xmm6
Packit c32a2d
	addps		%xmm1, %xmm0
Packit c32a2d
	addps		%xmm3, %xmm2
Packit c32a2d
	mulps		TEMP(5), %xmm4
Packit c32a2d
	mulps		TEMP(5), %xmm6
Packit c32a2d
	movaps		%xmm0, %xmm1
Packit c32a2d
	movaps		%xmm2, %xmm3
Packit c32a2d
	unpcklps	%xmm4, %xmm0
Packit c32a2d
	unpckhps	%xmm4, %xmm1
Packit c32a2d
	unpcklps	%xmm6, %xmm2
Packit c32a2d
	unpckhps	%xmm6, %xmm3
Packit c32a2d
	
Packit c32a2d
	movaps		%xmm0, TEMP(0)
Packit c32a2d
	movaps		%xmm1, TEMP(1)
Packit c32a2d
	movaps		%xmm2, TEMP(2)
Packit c32a2d
	movaps		%xmm3, TEMP(3)
Packit c32a2d
	movaps		%xmm5, TEMP(5)
Packit c32a2d
	movaps		%xmm7, TEMP(7)
Packit c32a2d
	
Packit c32a2d
	movss		TEMP_BYTE(12), %xmm0
Packit c32a2d
	movss		TEMP_BYTE(28), %xmm1
Packit c32a2d
	movss		TEMP_BYTE(44), %xmm2
Packit c32a2d
	movss		TEMP_BYTE(60), %xmm3
Packit c32a2d
	addss		TEMP_BYTE(8), %xmm0
Packit c32a2d
	addss		TEMP_BYTE(24), %xmm1
Packit c32a2d
	addss		TEMP_BYTE(40), %xmm2
Packit c32a2d
	addss		TEMP_BYTE(56), %xmm3
Packit c32a2d
	movss		%xmm0, TEMP_BYTE(8)
Packit c32a2d
	movss		%xmm1, TEMP_BYTE(24)
Packit c32a2d
	movss		%xmm2, TEMP_BYTE(40)
Packit c32a2d
	movss		%xmm3, TEMP_BYTE(56)
Packit c32a2d
	movss		TEMP_BYTE(76), %xmm0
Packit c32a2d
	movss		TEMP_BYTE(92), %xmm1
Packit c32a2d
	movss		TEMP_BYTE(108), %xmm2
Packit c32a2d
	movss		TEMP_BYTE(124), %xmm3
Packit c32a2d
	addss		TEMP_BYTE(72), %xmm0
Packit c32a2d
	addss		TEMP_BYTE(88), %xmm1
Packit c32a2d
	addss		TEMP_BYTE(104), %xmm2
Packit c32a2d
	addss		TEMP_BYTE(120), %xmm3
Packit c32a2d
	movss		%xmm0, TEMP_BYTE(72)
Packit c32a2d
	movss		%xmm1, TEMP_BYTE(88)
Packit c32a2d
	movss		%xmm2, TEMP_BYTE(104)
Packit c32a2d
	movss		%xmm3, TEMP_BYTE(120)
Packit c32a2d
	
Packit c32a2d
	movaps		TEMP_BYTE(16), %xmm1
Packit c32a2d
	movaps		TEMP_BYTE(48), %xmm3
Packit c32a2d
	movaps		TEMP_BYTE(80), %xmm5
Packit c32a2d
	movaps		TEMP_BYTE(112), %xmm7
Packit c32a2d
	movaps		%xmm1, %xmm0
Packit c32a2d
	movaps		%xmm3, %xmm2
Packit c32a2d
	movaps		%xmm5, %xmm4
Packit c32a2d
	movaps		%xmm7, %xmm6
Packit c32a2d
	shufps		$0x1e, %xmm0, %xmm0
Packit c32a2d
	shufps		$0x1e, %xmm2, %xmm2
Packit c32a2d
	shufps		$0x1e, %xmm4, %xmm4
Packit c32a2d
	shufps		$0x1e, %xmm6, %xmm6
Packit c32a2d
	lea			LOCAL_VAR(mask), %eax
Packit c32a2d
	andps		(%eax), %xmm0
Packit c32a2d
	andps		(%eax), %xmm2
Packit c32a2d
	andps		(%eax), %xmm4
Packit c32a2d
	andps		(%eax), %xmm6
Packit c32a2d
	addps		%xmm0, %xmm1
Packit c32a2d
	addps		%xmm2, %xmm3
Packit c32a2d
	addps		%xmm4, %xmm5
Packit c32a2d
	addps		%xmm6, %xmm7
Packit c32a2d
	
Packit c32a2d
	movaps		TEMP_BYTE(32), %xmm2
Packit c32a2d
	movaps		TEMP_BYTE(96), %xmm6
Packit c32a2d
	movaps		%xmm2, %xmm0
Packit c32a2d
	movaps		%xmm6, %xmm4
Packit c32a2d
	shufps		$0x1e, %xmm0, %xmm0
Packit c32a2d
	shufps		$0x1e, %xmm4, %xmm4
Packit c32a2d
	andps		(%eax), %xmm0
Packit c32a2d
	andps		(%eax), %xmm4
Packit c32a2d
	addps		%xmm3, %xmm2
Packit c32a2d
	addps		%xmm0, %xmm3
Packit c32a2d
	addps		%xmm7, %xmm6
Packit c32a2d
	addps		%xmm4, %xmm7
Packit c32a2d
	
Packit c32a2d
	movaps		TEMP_BYTE(0), %xmm0
Packit c32a2d
	movaps		TEMP_BYTE(64), %xmm4
Packit c32a2d
	
Packit c32a2d
	movl		ARG(0), %ecx
Packit c32a2d
	movl		ARG(1), %ebx
Packit c32a2d
	
Packit c32a2d
	movss		%xmm0, 1024(%ecx)
Packit c32a2d
	movss		%xmm2, 896(%ecx)
Packit c32a2d
	movss		%xmm1, 768(%ecx)
Packit c32a2d
	movss		%xmm3, 640(%ecx)
Packit c32a2d
	
Packit c32a2d
	shufps		$0xe1, %xmm0, %xmm0
Packit c32a2d
	shufps		$0xe1, %xmm2, %xmm2
Packit c32a2d
	shufps		$0xe1, %xmm1, %xmm1
Packit c32a2d
	shufps		$0xe1, %xmm3, %xmm3
Packit c32a2d
	movss		%xmm0, (%ecx)
Packit c32a2d
	movss		%xmm0, (%ebx)
Packit c32a2d
	movss		%xmm2, 128(%ebx)
Packit c32a2d
	movss		%xmm1, 256(%ebx)
Packit c32a2d
	movss		%xmm3, 384(%ebx)
Packit c32a2d
	
Packit c32a2d
	movhlps		%xmm0, %xmm0
Packit c32a2d
	movhlps		%xmm2, %xmm2
Packit c32a2d
	movhlps		%xmm1, %xmm1
Packit c32a2d
	movhlps		%xmm3, %xmm3
Packit c32a2d
	movss		%xmm0, 512(%ecx)
Packit c32a2d
	movss		%xmm2, 384(%ecx)
Packit c32a2d
	movss		%xmm1, 256(%ecx)
Packit c32a2d
	movss		%xmm3, 128(%ecx)
Packit c32a2d
	
Packit c32a2d
	shufps		$0xe1, %xmm0, %xmm0
Packit c32a2d
	shufps		$0xe1, %xmm2, %xmm2
Packit c32a2d
	shufps		$0xe1, %xmm1, %xmm1
Packit c32a2d
	shufps		$0xe1, %xmm3, %xmm3
Packit c32a2d
	movss		%xmm0, 512(%ebx)
Packit c32a2d
	movss		%xmm2, 640(%ebx)
Packit c32a2d
	movss		%xmm1, 768(%ebx)
Packit c32a2d
	movss		%xmm3, 896(%ebx)
Packit c32a2d
	
Packit c32a2d
	movaps		%xmm4, %xmm0
Packit c32a2d
	shufps		$0x1e, %xmm0, %xmm0
Packit c32a2d
	movaps		%xmm5, %xmm1
Packit c32a2d
	andps		(%eax), %xmm0
Packit c32a2d
	
Packit c32a2d
	addps		%xmm6, %xmm4
Packit c32a2d
	addps		%xmm7, %xmm5
Packit c32a2d
	addps		%xmm1, %xmm6
Packit c32a2d
	addps		%xmm0, %xmm7
Packit c32a2d
	
Packit c32a2d
	movss		%xmm4, 960(%ecx)
Packit c32a2d
	movss		%xmm6, 832(%ecx)
Packit c32a2d
	movss		%xmm5, 704(%ecx)
Packit c32a2d
	movss		%xmm7, 576(%ecx)
Packit c32a2d
	movhlps		%xmm4, %xmm0
Packit c32a2d
	movhlps		%xmm6, %xmm1
Packit c32a2d
	movhlps		%xmm5, %xmm2
Packit c32a2d
	movhlps		%xmm7, %xmm3
Packit c32a2d
	movss		%xmm0, 448(%ecx)
Packit c32a2d
	movss		%xmm1, 320(%ecx)
Packit c32a2d
	movss		%xmm2, 192(%ecx)
Packit c32a2d
	movss		%xmm3, 64(%ecx)
Packit c32a2d
	
Packit c32a2d
	shufps		$0xe1, %xmm4, %xmm4
Packit c32a2d
	shufps		$0xe1, %xmm6, %xmm6
Packit c32a2d
	shufps		$0xe1, %xmm5, %xmm5
Packit c32a2d
	shufps		$0xe1, %xmm7, %xmm7
Packit c32a2d
	movss		%xmm4, 64(%ebx)
Packit c32a2d
	movss		%xmm6, 192(%ebx)
Packit c32a2d
	movss		%xmm5, 320(%ebx)
Packit c32a2d
	movss		%xmm7, 448(%ebx)
Packit c32a2d
	
Packit c32a2d
	shufps		$0xe1, %xmm0, %xmm0
Packit c32a2d
	shufps		$0xe1, %xmm1, %xmm1
Packit c32a2d
	shufps		$0xe1, %xmm2, %xmm2
Packit c32a2d
	shufps		$0xe1, %xmm3, %xmm3
Packit c32a2d
	movss		%xmm0, 576(%ebx)
Packit c32a2d
	movss		%xmm1, 704(%ebx)
Packit c32a2d
	movss		%xmm2, 832(%ebx)
Packit c32a2d
	movss		%xmm3, 960(%ebx)
Packit c32a2d
Packit c32a2d
	popl		%ebx
Packit c32a2d
	movl		%ebp, %esp
Packit c32a2d
	popl		%ebp
Packit c32a2d
	ret
Packit c32a2d
Packit c32a2d
#if defined(PIC) && defined(__APPLE__)
Packit c32a2d
	.section	__IMPORT,__pointers,non_lazy_symbol_pointers
Packit c32a2d
L_costab_mmxsse:
Packit c32a2d
	.indirect_symbol	ASM_NAME(costab_mmxsse)
Packit c32a2d
	.long	0
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
NONEXEC_STACK