Blame src/libmpg123/dct64_x86_64.S

Packit c32a2d
/*
Packit c32a2d
	dct64_x86_64: SSE optimized dct64 for x86-64
Packit c32a2d
Packit c32a2d
	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
Packit c32a2d
	see COPYING and AUTHORS files in distribution or http://mpg123.org
Packit c32a2d
	initially written by Taihei Monma
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#include "mangle.h"
Packit c32a2d
Packit c32a2d
#ifdef IS_MSABI
Packit c32a2d
/* short *out0 */
Packit c32a2d
#define ARG0 %r9
Packit c32a2d
/* short *out1 */
Packit c32a2d
#define ARG1 %rdx
Packit c32a2d
/* real *samples */
Packit c32a2d
#define ARG2 %r8
Packit c32a2d
#else
Packit c32a2d
/* short *out0 */
Packit c32a2d
#define ARG0 %rdi
Packit c32a2d
/* short *out1 */
Packit c32a2d
#define ARG1 %rsi
Packit c32a2d
/* real *samples */
Packit c32a2d
#define ARG2 %rdx
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
/*
Packit c32a2d
	void dct64_x86_64(short *out0, short *out1, real *samples);
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#ifndef __APPLE__
Packit c32a2d
	.section	.rodata
Packit c32a2d
#else
Packit c32a2d
	.data
Packit c32a2d
#endif
Packit c32a2d
	ALIGN32
Packit c32a2d
ASM_NAME(costab_x86_64):
Packit c32a2d
	.long 1056974725
Packit c32a2d
	.long 1057056395
Packit c32a2d
	.long 1057223771
Packit c32a2d
	.long 1057485416
Packit c32a2d
	.long 1057855544
Packit c32a2d
	.long 1058356026
Packit c32a2d
	.long 1059019886
Packit c32a2d
	.long 1059897405
Packit c32a2d
	.long 1061067246
Packit c32a2d
	.long 1062657950
Packit c32a2d
	.long 1064892987
Packit c32a2d
	.long 1066774581
Packit c32a2d
	.long 1069414683
Packit c32a2d
	.long 1073984175
Packit c32a2d
	.long 1079645762
Packit c32a2d
	.long 1092815430
Packit c32a2d
	.long 1057005197
Packit c32a2d
	.long 1057342072
Packit c32a2d
	.long 1058087743
Packit c32a2d
	.long 1059427869
Packit c32a2d
	.long 1061799040
Packit c32a2d
	.long 1065862217
Packit c32a2d
	.long 1071413542
Packit c32a2d
	.long 1084439708
Packit c32a2d
	.long 1057128951
Packit c32a2d
	.long 1058664893
Packit c32a2d
	.long 1063675095
Packit c32a2d
	.long 1076102863
Packit c32a2d
	.long 1057655764
Packit c32a2d
	.long 1067924853
Packit c32a2d
	.long 1060439283
Packit c32a2d
	.long 0
Packit c32a2d
	.text
Packit c32a2d
	ALIGN16
Packit c32a2d
.globl ASM_NAME(dct64_x86_64)
Packit c32a2d
ASM_NAME(dct64_x86_64):
Packit c32a2d
#ifdef IS_MSABI /* should save xmm6-15 */
Packit c32a2d
	movq		%rcx, ARG0
Packit c32a2d
	subq		$168, %rsp /* stack alignment + 10 xmm registers */
Packit c32a2d
	movaps		%xmm6, (%rsp)
Packit c32a2d
	movaps		%xmm7, 16(%rsp)
Packit c32a2d
	movaps		%xmm8, 32(%rsp)
Packit c32a2d
	movaps		%xmm9, 48(%rsp)
Packit c32a2d
	movaps		%xmm10, 64(%rsp)
Packit c32a2d
	movaps		%xmm11, 80(%rsp)
Packit c32a2d
	movaps		%xmm12, 96(%rsp)
Packit c32a2d
	movaps		%xmm13, 112(%rsp)
Packit c32a2d
	movaps		%xmm14, 128(%rsp)
Packit c32a2d
	movaps		%xmm15, 144(%rsp)
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
	leaq		ASM_NAME(costab_x86_64)(%rip), %rcx
Packit c32a2d
Packit c32a2d
	MOVUAPS 	(ARG2), %xmm15
Packit c32a2d
	MOVUAPS 	16(ARG2), %xmm14
Packit c32a2d
	MOVUAPS 	112(ARG2), %xmm0
Packit c32a2d
	MOVUAPS 	96(ARG2), %xmm1
Packit c32a2d
	shufps 		$0x1b, %xmm0, %xmm0
Packit c32a2d
	shufps 		$0x1b, %xmm1, %xmm1
Packit c32a2d
	movaps 		%xmm15, %xmm8
Packit c32a2d
	movaps		%xmm14, %xmm9
Packit c32a2d
	addps 		%xmm0, %xmm8
Packit c32a2d
	addps 		%xmm1, %xmm9
Packit c32a2d
	subps 		%xmm0, %xmm15
Packit c32a2d
	subps 		%xmm1, %xmm14
Packit c32a2d
	
Packit c32a2d
	MOVUAPS 	32(ARG2), %xmm13
Packit c32a2d
	MOVUAPS 	48(ARG2), %xmm12
Packit c32a2d
	MOVUAPS 	80(ARG2), %xmm0
Packit c32a2d
	MOVUAPS 	64(ARG2), %xmm1
Packit c32a2d
	shufps		$0x1b, %xmm0, %xmm0
Packit c32a2d
	shufps		$0x1b, %xmm1, %xmm1
Packit c32a2d
	movaps		%xmm13, %xmm10
Packit c32a2d
	movaps		%xmm12, %xmm11
Packit c32a2d
	addps		%xmm0, %xmm10
Packit c32a2d
	addps		%xmm1, %xmm11
Packit c32a2d
	subps		%xmm0, %xmm13
Packit c32a2d
	subps		%xmm1, %xmm12
Packit c32a2d
	
Packit c32a2d
	movaps		(%rcx), %xmm0
Packit c32a2d
	movaps		16(%rcx), %xmm1
Packit c32a2d
	movaps		32(%rcx), %xmm2
Packit c32a2d
	movaps		48(%rcx), %xmm3
Packit c32a2d
	mulps		%xmm0, %xmm15
Packit c32a2d
	mulps		%xmm1, %xmm14
Packit c32a2d
	mulps		%xmm2, %xmm13
Packit c32a2d
	mulps		%xmm3, %xmm12
Packit c32a2d
	
Packit c32a2d
	movaps		64(%rcx), %xmm0
Packit c32a2d
	movaps		80(%rcx), %xmm1
Packit c32a2d
	
Packit c32a2d
	pshufd		$0x1b, %xmm11, %xmm2
Packit c32a2d
	pshufd		$0x1b, %xmm10, %xmm3
Packit c32a2d
	shufps		$0x1b, %xmm13, %xmm13
Packit c32a2d
	shufps		$0x1b, %xmm12, %xmm12
Packit c32a2d
	movaps		%xmm8, %xmm11
Packit c32a2d
	movaps		%xmm9, %xmm10
Packit c32a2d
	movaps		%xmm14, %xmm4
Packit c32a2d
	movaps		%xmm15, %xmm5
Packit c32a2d
	subps		%xmm2, %xmm11
Packit c32a2d
	subps		%xmm3, %xmm10
Packit c32a2d
	subps		%xmm13, %xmm14
Packit c32a2d
	subps		%xmm12, %xmm15
Packit c32a2d
	addps		%xmm2, %xmm8
Packit c32a2d
	addps		%xmm3, %xmm9
Packit c32a2d
	addps		%xmm5, %xmm12
Packit c32a2d
	addps		%xmm4, %xmm13
Packit c32a2d
	mulps		%xmm0, %xmm11
Packit c32a2d
	mulps		%xmm1, %xmm10
Packit c32a2d
	mulps		%xmm1, %xmm14
Packit c32a2d
	mulps		%xmm0, %xmm15
Packit c32a2d
	
Packit c32a2d
	movaps		96(%rcx), %xmm0
Packit c32a2d
	
Packit c32a2d
	pshufd		$0x1b, %xmm9, %xmm1
Packit c32a2d
	pshufd		$0x1b, %xmm13, %xmm2
Packit c32a2d
	shufps		$0x1b, %xmm10, %xmm10
Packit c32a2d
	shufps		$0x1b, %xmm14, %xmm14
Packit c32a2d
	movaps		%xmm8, %xmm9
Packit c32a2d
	movaps		%xmm12, %xmm13
Packit c32a2d
	movaps		%xmm11, %xmm3
Packit c32a2d
	movaps		%xmm15, %xmm4
Packit c32a2d
	subps		%xmm1, %xmm9
Packit c32a2d
	subps		%xmm2, %xmm13
Packit c32a2d
	subps		%xmm10, %xmm11
Packit c32a2d
	subps		%xmm14, %xmm15
Packit c32a2d
	addps		%xmm1, %xmm8
Packit c32a2d
	addps		%xmm2, %xmm12
Packit c32a2d
	addps		%xmm3, %xmm10
Packit c32a2d
	addps		%xmm4, %xmm14
Packit c32a2d
	mulps		%xmm0, %xmm9
Packit c32a2d
	mulps		%xmm0, %xmm13
Packit c32a2d
	mulps		%xmm0, %xmm11
Packit c32a2d
	mulps		%xmm0, %xmm15
Packit c32a2d
	
Packit c32a2d
	movaps		112(%rcx), %xmm0
Packit c32a2d
	movaps		%xmm0, %xmm1
Packit c32a2d
	movlhps		%xmm1, %xmm1
Packit c32a2d
	
Packit c32a2d
	movaps		%xmm8, %xmm2
Packit c32a2d
	movaps		%xmm9, %xmm3
Packit c32a2d
	shufps		$0x44, %xmm10, %xmm2
Packit c32a2d
	shufps		$0xbb, %xmm11, %xmm9
Packit c32a2d
	shufps		$0xbb, %xmm10, %xmm8
Packit c32a2d
	shufps		$0x44, %xmm11, %xmm3
Packit c32a2d
	movaps		%xmm2, %xmm4
Packit c32a2d
	movaps		%xmm3, %xmm5
Packit c32a2d
	subps		%xmm8, %xmm2
Packit c32a2d
	subps		%xmm9, %xmm3
Packit c32a2d
	addps		%xmm4, %xmm8
Packit c32a2d
	addps		%xmm5, %xmm9
Packit c32a2d
	mulps		%xmm1, %xmm2
Packit c32a2d
	mulps		%xmm1, %xmm3
Packit c32a2d
	movaps		%xmm8, %xmm10
Packit c32a2d
	movaps		%xmm9, %xmm11
Packit c32a2d
	shufps		$0x14, %xmm2, %xmm8
Packit c32a2d
	shufps		$0xbe, %xmm2, %xmm10
Packit c32a2d
	shufps		$0x14, %xmm3, %xmm9
Packit c32a2d
	shufps		$0xbe, %xmm3, %xmm11
Packit c32a2d
	
Packit c32a2d
	movaps		%xmm12, %xmm2
Packit c32a2d
	movaps		%xmm13, %xmm3
Packit c32a2d
	shufps		$0x44, %xmm14, %xmm2
Packit c32a2d
	shufps		$0xbb, %xmm15, %xmm13
Packit c32a2d
	shufps		$0xbb, %xmm14, %xmm12
Packit c32a2d
	shufps		$0x44, %xmm15, %xmm3
Packit c32a2d
	movaps		%xmm2, %xmm4
Packit c32a2d
	movaps		%xmm3, %xmm5
Packit c32a2d
	subps		%xmm12, %xmm2
Packit c32a2d
	subps		%xmm13, %xmm3
Packit c32a2d
	addps		%xmm4, %xmm12
Packit c32a2d
	addps		%xmm5, %xmm13
Packit c32a2d
	mulps		%xmm1, %xmm2
Packit c32a2d
	mulps		%xmm1, %xmm3
Packit c32a2d
	movaps		%xmm12, %xmm14
Packit c32a2d
	movaps		%xmm13, %xmm15
Packit c32a2d
	shufps		$0x14, %xmm2, %xmm12
Packit c32a2d
	shufps		$0xbe, %xmm2, %xmm14
Packit c32a2d
	shufps		$0x14, %xmm3, %xmm13
Packit c32a2d
	shufps		$0xbe, %xmm3, %xmm15
Packit c32a2d
	
Packit c32a2d
	shufps		$0xaa, %xmm0, %xmm0
Packit c32a2d
	pcmpeqd		%xmm1, %xmm1
Packit c32a2d
	pslld		$31, %xmm1
Packit c32a2d
	psllq		$32, %xmm1
Packit c32a2d
	xorps		%xmm1, %xmm0
Packit c32a2d
	
Packit c32a2d
	movaps		%xmm8, %xmm1
Packit c32a2d
	movaps		%xmm10, %xmm2
Packit c32a2d
	unpcklps	%xmm9, %xmm8
Packit c32a2d
	unpckhps	%xmm9, %xmm1
Packit c32a2d
	unpcklps	%xmm11, %xmm10
Packit c32a2d
	unpckhps	%xmm11, %xmm2
Packit c32a2d
	movaps		%xmm8, %xmm3
Packit c32a2d
	movaps		%xmm10, %xmm4
Packit c32a2d
	unpcklps	%xmm1, %xmm8
Packit c32a2d
	unpckhps	%xmm1, %xmm3
Packit c32a2d
	unpcklps	%xmm2, %xmm10
Packit c32a2d
	unpckhps	%xmm2, %xmm4
Packit c32a2d
	movaps		%xmm8, %xmm1
Packit c32a2d
	movaps		%xmm10, %xmm2
Packit c32a2d
	subps		%xmm3, %xmm1
Packit c32a2d
	subps		%xmm4, %xmm2
Packit c32a2d
	addps		%xmm3, %xmm8
Packit c32a2d
	addps		%xmm4, %xmm10
Packit c32a2d
	mulps		%xmm0, %xmm1
Packit c32a2d
	mulps		%xmm0, %xmm2
Packit c32a2d
	movaps		%xmm8, %xmm9
Packit c32a2d
	movaps		%xmm10, %xmm11
Packit c32a2d
	unpcklps	%xmm1, %xmm8
Packit c32a2d
	unpckhps	%xmm1, %xmm9
Packit c32a2d
	unpcklps	%xmm2, %xmm10
Packit c32a2d
	unpckhps	%xmm2, %xmm11
Packit c32a2d
	
Packit c32a2d
	movaps		%xmm12, %xmm1
Packit c32a2d
	movaps		%xmm14, %xmm2
Packit c32a2d
	unpcklps	%xmm13, %xmm12
Packit c32a2d
	unpckhps	%xmm13, %xmm1
Packit c32a2d
	unpcklps	%xmm15, %xmm14
Packit c32a2d
	unpckhps	%xmm15, %xmm2
Packit c32a2d
	movaps		%xmm12, %xmm3
Packit c32a2d
	movaps		%xmm14, %xmm4
Packit c32a2d
	unpcklps	%xmm1, %xmm12
Packit c32a2d
	unpckhps	%xmm1, %xmm3
Packit c32a2d
	unpcklps	%xmm2, %xmm14
Packit c32a2d
	unpckhps	%xmm2, %xmm4
Packit c32a2d
	movaps		%xmm12, %xmm1
Packit c32a2d
	movaps		%xmm14, %xmm2
Packit c32a2d
	subps		%xmm3, %xmm1
Packit c32a2d
	subps		%xmm4, %xmm2
Packit c32a2d
	addps		%xmm3, %xmm12
Packit c32a2d
	addps		%xmm4, %xmm14
Packit c32a2d
	mulps		%xmm0, %xmm1
Packit c32a2d
	mulps		%xmm0, %xmm2
Packit c32a2d
	movaps		%xmm12, %xmm13
Packit c32a2d
	movaps		%xmm14, %xmm15
Packit c32a2d
	unpcklps	%xmm1, %xmm12
Packit c32a2d
	unpckhps	%xmm1, %xmm13
Packit c32a2d
	unpcklps	%xmm2, %xmm14
Packit c32a2d
	unpckhps	%xmm2, %xmm15
Packit c32a2d
	
Packit c32a2d
	
Packit c32a2d
	xorps		%xmm0, %xmm0
Packit c32a2d
	xorps		%xmm1, %xmm1
Packit c32a2d
	shufpd		$0x2, %xmm8, %xmm0
Packit c32a2d
	shufpd		$0x2, %xmm9, %xmm1
Packit c32a2d
	psrlq		$32, %xmm0
Packit c32a2d
	psrlq		$32, %xmm1
Packit c32a2d
	addps		%xmm0, %xmm8
Packit c32a2d
	addps		%xmm1, %xmm9
Packit c32a2d
	
Packit c32a2d
	xorps		%xmm0, %xmm0
Packit c32a2d
	xorps		%xmm1, %xmm1
Packit c32a2d
	shufpd		$0x2, %xmm10, %xmm0
Packit c32a2d
	shufpd		$0x2, %xmm11, %xmm1
Packit c32a2d
	psrlq		$32, %xmm0
Packit c32a2d
	psrlq		$32, %xmm1
Packit c32a2d
	addps		%xmm0, %xmm10
Packit c32a2d
	addps		%xmm1, %xmm11
Packit c32a2d
	
Packit c32a2d
	xorps		%xmm0, %xmm0
Packit c32a2d
	xorps		%xmm1, %xmm1
Packit c32a2d
	shufpd		$0x2, %xmm12, %xmm0
Packit c32a2d
	shufpd		$0x2, %xmm13, %xmm1
Packit c32a2d
	psrlq		$32, %xmm0
Packit c32a2d
	psrlq		$32, %xmm1
Packit c32a2d
	addps		%xmm0, %xmm12
Packit c32a2d
	addps		%xmm1, %xmm13
Packit c32a2d
	
Packit c32a2d
	xorps		%xmm0, %xmm0
Packit c32a2d
	xorps		%xmm1, %xmm1
Packit c32a2d
	shufpd		$0x2, %xmm14, %xmm0
Packit c32a2d
	shufpd		$0x2, %xmm15, %xmm1
Packit c32a2d
	psrlq		$32, %xmm0
Packit c32a2d
	psrlq		$32, %xmm1
Packit c32a2d
	addps		%xmm0, %xmm14
Packit c32a2d
	addps		%xmm1, %xmm15
Packit c32a2d
	
Packit c32a2d
	pshufd		$0x78, %xmm9, %xmm0
Packit c32a2d
	pshufd		$0x78, %xmm11, %xmm1
Packit c32a2d
	pshufd		$0x78, %xmm13, %xmm2
Packit c32a2d
	pshufd		$0x78, %xmm15, %xmm3
Packit c32a2d
	psrldq		$4, %xmm0
Packit c32a2d
	psrldq		$4, %xmm1
Packit c32a2d
	psrldq		$4, %xmm2
Packit c32a2d
	psrldq		$4, %xmm3
Packit c32a2d
	addps		%xmm0, %xmm9
Packit c32a2d
	addps		%xmm1, %xmm11
Packit c32a2d
	addps		%xmm2, %xmm13
Packit c32a2d
	addps		%xmm3, %xmm15
Packit c32a2d
	
Packit c32a2d
	pshufd		$0x78, %xmm10, %xmm0
Packit c32a2d
	pshufd		$0x78, %xmm14, %xmm1
Packit c32a2d
	psrldq		$4, %xmm0
Packit c32a2d
	psrldq		$4, %xmm1
Packit c32a2d
	addps		%xmm11, %xmm10
Packit c32a2d
	addps		%xmm15, %xmm14
Packit c32a2d
	addps		%xmm0, %xmm11
Packit c32a2d
	addps		%xmm1, %xmm15
Packit c32a2d
	
Packit c32a2d
	cvtps2dq	%xmm8, %xmm8
Packit c32a2d
	cvtps2dq	%xmm9, %xmm9
Packit c32a2d
	cvtps2dq	%xmm10, %xmm10
Packit c32a2d
	cvtps2dq	%xmm11, %xmm11
Packit c32a2d
	packssdw	%xmm10, %xmm8
Packit c32a2d
	packssdw	%xmm11, %xmm9
Packit c32a2d
	
Packit c32a2d
	movd		%xmm8, %eax
Packit c32a2d
	movd		%xmm9, %ecx
Packit c32a2d
	movw		%ax, 512(ARG0)
Packit c32a2d
	movw		%cx, 384(ARG0)
Packit c32a2d
	shrl		$16, %eax
Packit c32a2d
	shrl		$16, %ecx
Packit c32a2d
	movw		%ax, (ARG0)
Packit c32a2d
	movw		%ax, (ARG1)
Packit c32a2d
	movw		%cx, 128(ARG1)
Packit c32a2d
	
Packit c32a2d
	movhlps		%xmm8, %xmm0
Packit c32a2d
	movhlps		%xmm9, %xmm1
Packit c32a2d
	movd		%xmm0, %eax
Packit c32a2d
	movd		%xmm1, %ecx
Packit c32a2d
	movw		%ax, 448(ARG0)
Packit c32a2d
	movw		%cx, 320(ARG0)
Packit c32a2d
	shrl		$16, %eax
Packit c32a2d
	shrl		$16, %ecx
Packit c32a2d
	movw		%ax, 64(ARG1)
Packit c32a2d
	movw		%cx, 192(ARG1)
Packit c32a2d
	
Packit c32a2d
	pshuflw		$0xee, %xmm8, %xmm2
Packit c32a2d
	pshuflw		$0xee, %xmm9, %xmm3
Packit c32a2d
	movd		%xmm2, %eax
Packit c32a2d
	movd		%xmm3, %ecx
Packit c32a2d
	movw		%ax, 256(ARG0)
Packit c32a2d
	movw		%cx, 128(ARG0)
Packit c32a2d
	shrl		$16, %eax
Packit c32a2d
	shrl		$16, %ecx
Packit c32a2d
	movw		%ax, 256(ARG1)
Packit c32a2d
	movw		%cx, 384(ARG1)
Packit c32a2d
	
Packit c32a2d
	pshuflw		$0xee, %xmm0, %xmm0
Packit c32a2d
	pshuflw		$0xee, %xmm1, %xmm1
Packit c32a2d
	movd		%xmm0, %eax
Packit c32a2d
	movd		%xmm1, %ecx
Packit c32a2d
	movw		%ax, 192(ARG0)
Packit c32a2d
	movw		%cx, 64(ARG0)
Packit c32a2d
	shrl		$16, %eax
Packit c32a2d
	shrl		$16, %ecx
Packit c32a2d
	movw		%ax, 320(ARG1)
Packit c32a2d
	movw		%cx, 448(ARG1)
Packit c32a2d
	
Packit c32a2d
	movaps		%xmm12, %xmm0
Packit c32a2d
	movaps		%xmm13, %xmm1
Packit c32a2d
	movaps		%xmm14, %xmm2
Packit c32a2d
	movaps		%xmm15, %xmm3
Packit c32a2d
	shufps		$0x1e, %xmm0, %xmm0
Packit c32a2d
	pslldq		$4, %xmm0
Packit c32a2d
	psrldq		$4, %xmm0
Packit c32a2d
	addps		%xmm2, %xmm12
Packit c32a2d
	addps		%xmm3, %xmm13
Packit c32a2d
	addps		%xmm1, %xmm14
Packit c32a2d
	addps		%xmm0, %xmm15
Packit c32a2d
	
Packit c32a2d
	cvtps2dq	%xmm12, %xmm12
Packit c32a2d
	cvtps2dq	%xmm13, %xmm13
Packit c32a2d
	cvtps2dq	%xmm14, %xmm14
Packit c32a2d
	cvtps2dq	%xmm15, %xmm15
Packit c32a2d
	packssdw	%xmm13, %xmm12
Packit c32a2d
	packssdw	%xmm15, %xmm14
Packit c32a2d
	
Packit c32a2d
	movd		%xmm12, %eax
Packit c32a2d
	movd		%xmm14, %ecx
Packit c32a2d
	movw		%ax, 480(ARG0)
Packit c32a2d
	movw		%cx, 416(ARG0)
Packit c32a2d
	shrl		$16, %eax
Packit c32a2d
	shrl		$16, %ecx
Packit c32a2d
	movw		%ax, 32(ARG1)
Packit c32a2d
	movw		%cx, 96(ARG1)
Packit c32a2d
	
Packit c32a2d
	pshuflw		$0xee, %xmm12, %xmm0
Packit c32a2d
	pshuflw		$0xee, %xmm14, %xmm1
Packit c32a2d
	movd		%xmm0, %eax
Packit c32a2d
	movd		%xmm1, %ecx
Packit c32a2d
	movw		%ax, 224(ARG0)
Packit c32a2d
	movw		%cx, 160(ARG0)
Packit c32a2d
	shrl		$16, %eax
Packit c32a2d
	shrl		$16, %ecx
Packit c32a2d
	movw		%ax, 288(ARG1)
Packit c32a2d
	movw		%cx, 352(ARG1)
Packit c32a2d
	
Packit c32a2d
	movhlps		%xmm12, %xmm0
Packit c32a2d
	movhlps		%xmm14, %xmm1
Packit c32a2d
	movd		%xmm0, %eax
Packit c32a2d
	movd		%xmm1, %ecx
Packit c32a2d
	movw		%ax, 352(ARG0)
Packit c32a2d
	movw		%cx, 288(ARG0)
Packit c32a2d
	shrl		$16, %eax
Packit c32a2d
	shrl		$16, %ecx
Packit c32a2d
	movw		%ax, 160(ARG1)
Packit c32a2d
	movw		%cx, 224(ARG1)
Packit c32a2d
	
Packit c32a2d
	pshuflw		$0xee, %xmm0, %xmm0
Packit c32a2d
	pshuflw		$0xee, %xmm1, %xmm1
Packit c32a2d
	movd		%xmm0, %eax
Packit c32a2d
	movd		%xmm1, %ecx
Packit c32a2d
	movw		%ax, 96(ARG0)
Packit c32a2d
	movw		%cx, 32(ARG0)
Packit c32a2d
	shrl		$16, %eax
Packit c32a2d
	shrl		$16, %ecx
Packit c32a2d
	movw		%ax, 416(ARG1)
Packit c32a2d
	movw		%cx, 480(ARG1)
Packit c32a2d
	
Packit c32a2d
#ifdef IS_MSABI
Packit c32a2d
	movaps		(%rsp), %xmm6
Packit c32a2d
	movaps		16(%rsp), %xmm7
Packit c32a2d
	movaps		32(%rsp), %xmm8
Packit c32a2d
	movaps		48(%rsp), %xmm9
Packit c32a2d
	movaps		64(%rsp), %xmm10
Packit c32a2d
	movaps		80(%rsp), %xmm11
Packit c32a2d
	movaps		96(%rsp), %xmm12
Packit c32a2d
	movaps		112(%rsp), %xmm13
Packit c32a2d
	movaps		128(%rsp), %xmm14
Packit c32a2d
	movaps		144(%rsp), %xmm15
Packit c32a2d
	addq		$168, %rsp
Packit c32a2d
#endif
Packit c32a2d
	ret
Packit c32a2d
Packit c32a2d
NONEXEC_STACK