|
Packit |
c32a2d |
/*
|
|
Packit |
c32a2d |
dct64_sse_float: SSE optimized dct64 (float output version)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
|
|
Packit |
c32a2d |
see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
Packit |
c32a2d |
initially written by Taihei Monma
|
|
Packit |
c32a2d |
*/
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#include "mangle.h"
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#define ARG(n) (8+n*4)(%ebp)
|
|
Packit |
c32a2d |
#define TEMP(n) (4+n*16)(%esp)
|
|
Packit |
c32a2d |
#define TEMP_BYTE(n) (4+n)(%esp)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
/*
|
|
Packit |
c32a2d |
void dct64_real_sse(real *out0, real *out1, real *samples);
|
|
Packit |
c32a2d |
*/
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#ifndef __APPLE__
|
|
Packit |
c32a2d |
.section .rodata
|
|
Packit |
c32a2d |
#else
|
|
Packit |
c32a2d |
.data
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
ALIGN16
|
|
Packit |
c32a2d |
pnpn:
|
|
Packit |
c32a2d |
.long 0
|
|
Packit |
c32a2d |
.long -2147483648
|
|
Packit |
c32a2d |
.long 0
|
|
Packit |
c32a2d |
.long -2147483648
|
|
Packit |
c32a2d |
ALIGN16
|
|
Packit |
c32a2d |
mask:
|
|
Packit |
c32a2d |
.long -1
|
|
Packit |
c32a2d |
.long -1
|
|
Packit |
c32a2d |
.long -1
|
|
Packit |
c32a2d |
.long 0
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
.text
|
|
Packit |
c32a2d |
ALIGN16
|
|
Packit |
c32a2d |
.globl ASM_NAME(dct64_real_sse)
|
|
Packit |
c32a2d |
ASM_NAME(dct64_real_sse):
|
|
Packit |
c32a2d |
pushl %ebp
|
|
Packit |
c32a2d |
movl %esp, %ebp
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
andl $-16, %esp /* align the stack at 16 bytes */
|
|
Packit |
c32a2d |
subl $128, %esp /* reserve space for temporal store */
|
|
Packit |
c32a2d |
pushl %ebx
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
GET_GOT
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movl ARG(2), %eax
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
MOVUAPS (%eax), %xmm7
|
|
Packit |
c32a2d |
MOVUAPS 16(%eax), %xmm6
|
|
Packit |
c32a2d |
MOVUAPS 112(%eax), %xmm0
|
|
Packit |
c32a2d |
MOVUAPS 96(%eax), %xmm1
|
|
Packit |
c32a2d |
shufps $0x1b, %xmm0, %xmm0
|
|
Packit |
c32a2d |
shufps $0x1b, %xmm1, %xmm1
|
|
Packit |
c32a2d |
movaps %xmm7, %xmm4
|
|
Packit |
c32a2d |
movaps %xmm6, %xmm5
|
|
Packit |
c32a2d |
addps %xmm0, %xmm4
|
|
Packit |
c32a2d |
addps %xmm1, %xmm5
|
|
Packit |
c32a2d |
subps %xmm0, %xmm7
|
|
Packit |
c32a2d |
subps %xmm1, %xmm6
|
|
Packit |
c32a2d |
movaps %xmm4, TEMP(0)
|
|
Packit |
c32a2d |
movaps %xmm5, TEMP(1)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
MOVUAPS 32(%eax), %xmm2
|
|
Packit |
c32a2d |
MOVUAPS 48(%eax), %xmm3
|
|
Packit |
c32a2d |
MOVUAPS 80(%eax), %xmm0
|
|
Packit |
c32a2d |
MOVUAPS 64(%eax), %xmm1
|
|
Packit |
c32a2d |
shufps $0x1b, %xmm0, %xmm0
|
|
Packit |
c32a2d |
shufps $0x1b, %xmm1, %xmm1
|
|
Packit |
c32a2d |
movaps %xmm2, %xmm5
|
|
Packit |
c32a2d |
movaps %xmm3, %xmm4
|
|
Packit |
c32a2d |
addps %xmm0, %xmm2
|
|
Packit |
c32a2d |
addps %xmm1, %xmm3
|
|
Packit |
c32a2d |
subps %xmm0, %xmm5
|
|
Packit |
c32a2d |
subps %xmm1, %xmm4
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
mov GLOBAL_VAR_PTR(costab_mmxsse), %eax
|
|
Packit |
c32a2d |
#else
|
|
Packit |
c32a2d |
lea GLOBAL_VAR(costab_mmxsse), %eax
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
mulps (%eax), %xmm7
|
|
Packit |
c32a2d |
mulps 16(%eax), %xmm6
|
|
Packit |
c32a2d |
mulps 32(%eax), %xmm5
|
|
Packit |
c32a2d |
mulps 48(%eax), %xmm4
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
shufps $0x1b, %xmm2, %xmm2
|
|
Packit |
c32a2d |
shufps $0x1b, %xmm3, %xmm3
|
|
Packit |
c32a2d |
shufps $0x1b, %xmm4, %xmm4
|
|
Packit |
c32a2d |
shufps $0x1b, %xmm5, %xmm5
|
|
Packit |
c32a2d |
movaps TEMP(0), %xmm0
|
|
Packit |
c32a2d |
movaps TEMP(1), %xmm1
|
|
Packit |
c32a2d |
subps %xmm3, %xmm0
|
|
Packit |
c32a2d |
subps %xmm2, %xmm1
|
|
Packit |
c32a2d |
addps TEMP(0), %xmm3
|
|
Packit |
c32a2d |
addps TEMP(1), %xmm2
|
|
Packit |
c32a2d |
movaps %xmm3, TEMP(0)
|
|
Packit |
c32a2d |
movaps %xmm2, TEMP(1)
|
|
Packit |
c32a2d |
movaps %xmm6, %xmm2
|
|
Packit |
c32a2d |
movaps %xmm7, %xmm3
|
|
Packit |
c32a2d |
subps %xmm5, %xmm6
|
|
Packit |
c32a2d |
subps %xmm4, %xmm7
|
|
Packit |
c32a2d |
addps %xmm3, %xmm4
|
|
Packit |
c32a2d |
addps %xmm2, %xmm5
|
|
Packit |
c32a2d |
mulps 64(%eax), %xmm0
|
|
Packit |
c32a2d |
mulps 80(%eax), %xmm1
|
|
Packit |
c32a2d |
mulps 80(%eax), %xmm6
|
|
Packit |
c32a2d |
mulps 64(%eax), %xmm7
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movaps TEMP(0), %xmm2
|
|
Packit |
c32a2d |
movaps TEMP(1), %xmm3
|
|
Packit |
c32a2d |
shufps $0x1b, %xmm3, %xmm3
|
|
Packit |
c32a2d |
shufps $0x1b, %xmm5, %xmm5
|
|
Packit |
c32a2d |
shufps $0x1b, %xmm1, %xmm1
|
|
Packit |
c32a2d |
shufps $0x1b, %xmm6, %xmm6
|
|
Packit |
c32a2d |
movaps %xmm0, TEMP(1)
|
|
Packit |
c32a2d |
subps %xmm3, %xmm2
|
|
Packit |
c32a2d |
subps %xmm1, %xmm0
|
|
Packit |
c32a2d |
addps TEMP(0), %xmm3
|
|
Packit |
c32a2d |
addps TEMP(1), %xmm1
|
|
Packit |
c32a2d |
movaps %xmm3, TEMP(0)
|
|
Packit |
c32a2d |
movaps %xmm1, TEMP(2)
|
|
Packit |
c32a2d |
movaps %xmm5, %xmm1
|
|
Packit |
c32a2d |
movaps %xmm4, %xmm5
|
|
Packit |
c32a2d |
movaps %xmm7, %xmm3
|
|
Packit |
c32a2d |
subps %xmm1, %xmm5
|
|
Packit |
c32a2d |
subps %xmm6, %xmm7
|
|
Packit |
c32a2d |
addps %xmm1, %xmm4
|
|
Packit |
c32a2d |
addps %xmm3, %xmm6
|
|
Packit |
c32a2d |
mulps 96(%eax), %xmm2
|
|
Packit |
c32a2d |
mulps 96(%eax), %xmm0
|
|
Packit |
c32a2d |
mulps 96(%eax), %xmm5
|
|
Packit |
c32a2d |
mulps 96(%eax), %xmm7
|
|
Packit |
c32a2d |
movaps %xmm2, TEMP(1)
|
|
Packit |
c32a2d |
movaps %xmm0, TEMP(3)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movaps %xmm4, %xmm2
|
|
Packit |
c32a2d |
movaps %xmm5, %xmm3
|
|
Packit |
c32a2d |
shufps $0x44, %xmm6, %xmm2
|
|
Packit |
c32a2d |
shufps $0xbb, %xmm7, %xmm5
|
|
Packit |
c32a2d |
shufps $0xbb, %xmm6, %xmm4
|
|
Packit |
c32a2d |
shufps $0x44, %xmm7, %xmm3
|
|
Packit |
c32a2d |
movaps %xmm2, %xmm6
|
|
Packit |
c32a2d |
movaps %xmm3, %xmm7
|
|
Packit |
c32a2d |
subps %xmm4, %xmm2
|
|
Packit |
c32a2d |
subps %xmm5, %xmm3
|
|
Packit |
c32a2d |
addps %xmm6, %xmm4
|
|
Packit |
c32a2d |
addps %xmm7, %xmm5
|
|
Packit |
c32a2d |
movaps 112(%eax), %xmm0
|
|
Packit |
c32a2d |
movlhps %xmm0, %xmm0
|
|
Packit |
c32a2d |
mulps %xmm0, %xmm2
|
|
Packit |
c32a2d |
mulps %xmm0, %xmm3
|
|
Packit |
c32a2d |
movaps %xmm0, TEMP(4)
|
|
Packit |
c32a2d |
movaps %xmm4, %xmm6
|
|
Packit |
c32a2d |
movaps %xmm5, %xmm7
|
|
Packit |
c32a2d |
shufps $0x14, %xmm2, %xmm4
|
|
Packit |
c32a2d |
shufps $0xbe, %xmm2, %xmm6
|
|
Packit |
c32a2d |
shufps $0x14, %xmm3, %xmm5
|
|
Packit |
c32a2d |
shufps $0xbe, %xmm3, %xmm7
|
|
Packit |
c32a2d |
movaps %xmm5, TEMP(5)
|
|
Packit |
c32a2d |
movaps %xmm7, TEMP(7)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movaps TEMP(0), %xmm0
|
|
Packit |
c32a2d |
movaps TEMP(1), %xmm1
|
|
Packit |
c32a2d |
movaps %xmm0, %xmm2
|
|
Packit |
c32a2d |
movaps %xmm1, %xmm3
|
|
Packit |
c32a2d |
shufps $0x44, TEMP(2), %xmm2
|
|
Packit |
c32a2d |
shufps $0xbb, TEMP(3), %xmm1
|
|
Packit |
c32a2d |
shufps $0xbb, TEMP(2), %xmm0
|
|
Packit |
c32a2d |
shufps $0x44, TEMP(3), %xmm3
|
|
Packit |
c32a2d |
movaps %xmm2, %xmm5
|
|
Packit |
c32a2d |
movaps %xmm3, %xmm7
|
|
Packit |
c32a2d |
subps %xmm0, %xmm2
|
|
Packit |
c32a2d |
subps %xmm1, %xmm3
|
|
Packit |
c32a2d |
addps %xmm5, %xmm0
|
|
Packit |
c32a2d |
addps %xmm7, %xmm1
|
|
Packit |
c32a2d |
mulps TEMP(4), %xmm2
|
|
Packit |
c32a2d |
mulps TEMP(4), %xmm3
|
|
Packit |
c32a2d |
movaps %xmm0, %xmm5
|
|
Packit |
c32a2d |
movaps %xmm1, %xmm7
|
|
Packit |
c32a2d |
shufps $0x14, %xmm2, %xmm0
|
|
Packit |
c32a2d |
shufps $0xbe, %xmm2, %xmm5
|
|
Packit |
c32a2d |
shufps $0x14, %xmm3, %xmm1
|
|
Packit |
c32a2d |
shufps $0xbe, %xmm3, %xmm7
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movaps %xmm0, TEMP(0)
|
|
Packit |
c32a2d |
movaps %xmm1, TEMP(1)
|
|
Packit |
c32a2d |
movaps %xmm5, TEMP(2)
|
|
Packit |
c32a2d |
movaps %xmm7, TEMP(3)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movss 120(%eax), %xmm5
|
|
Packit |
c32a2d |
shufps $0x00, %xmm5, %xmm5
|
|
Packit |
c32a2d |
xorps LOCAL_VAR(pnpn), %xmm5
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movaps %xmm4, %xmm0
|
|
Packit |
c32a2d |
movaps %xmm6, %xmm1
|
|
Packit |
c32a2d |
unpcklps TEMP(5), %xmm4
|
|
Packit |
c32a2d |
unpckhps TEMP(5), %xmm0
|
|
Packit |
c32a2d |
unpcklps TEMP(7), %xmm6
|
|
Packit |
c32a2d |
unpckhps TEMP(7), %xmm1
|
|
Packit |
c32a2d |
movaps %xmm4, %xmm2
|
|
Packit |
c32a2d |
movaps %xmm6, %xmm3
|
|
Packit |
c32a2d |
unpcklps %xmm0, %xmm4
|
|
Packit |
c32a2d |
unpckhps %xmm0, %xmm2
|
|
Packit |
c32a2d |
unpcklps %xmm1, %xmm6
|
|
Packit |
c32a2d |
unpckhps %xmm1, %xmm3
|
|
Packit |
c32a2d |
movaps %xmm4, %xmm0
|
|
Packit |
c32a2d |
movaps %xmm6, %xmm1
|
|
Packit |
c32a2d |
subps %xmm2, %xmm0
|
|
Packit |
c32a2d |
subps %xmm3, %xmm1
|
|
Packit |
c32a2d |
addps %xmm2, %xmm4
|
|
Packit |
c32a2d |
addps %xmm3, %xmm6
|
|
Packit |
c32a2d |
mulps %xmm5, %xmm0
|
|
Packit |
c32a2d |
mulps %xmm5, %xmm1
|
|
Packit |
c32a2d |
movaps %xmm5, TEMP(5)
|
|
Packit |
c32a2d |
movaps %xmm4, %xmm5
|
|
Packit |
c32a2d |
movaps %xmm6, %xmm7
|
|
Packit |
c32a2d |
unpcklps %xmm0, %xmm4
|
|
Packit |
c32a2d |
unpckhps %xmm0, %xmm5
|
|
Packit |
c32a2d |
unpcklps %xmm1, %xmm6
|
|
Packit |
c32a2d |
unpckhps %xmm1, %xmm7
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movaps TEMP(0), %xmm0
|
|
Packit |
c32a2d |
movaps TEMP(2), %xmm2
|
|
Packit |
c32a2d |
movaps %xmm4, TEMP(4)
|
|
Packit |
c32a2d |
movaps %xmm6, TEMP(6)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movaps %xmm0, %xmm4
|
|
Packit |
c32a2d |
movaps %xmm2, %xmm6
|
|
Packit |
c32a2d |
unpcklps TEMP(1), %xmm0
|
|
Packit |
c32a2d |
unpckhps TEMP(1), %xmm4
|
|
Packit |
c32a2d |
unpcklps TEMP(3), %xmm2
|
|
Packit |
c32a2d |
unpckhps TEMP(3), %xmm6
|
|
Packit |
c32a2d |
movaps %xmm0, %xmm1
|
|
Packit |
c32a2d |
movaps %xmm2, %xmm3
|
|
Packit |
c32a2d |
unpcklps %xmm4, %xmm0
|
|
Packit |
c32a2d |
unpckhps %xmm4, %xmm1
|
|
Packit |
c32a2d |
unpcklps %xmm6, %xmm2
|
|
Packit |
c32a2d |
unpckhps %xmm6, %xmm3
|
|
Packit |
c32a2d |
movaps %xmm0, %xmm4
|
|
Packit |
c32a2d |
movaps %xmm2, %xmm6
|
|
Packit |
c32a2d |
subps %xmm1, %xmm4
|
|
Packit |
c32a2d |
subps %xmm3, %xmm6
|
|
Packit |
c32a2d |
addps %xmm1, %xmm0
|
|
Packit |
c32a2d |
addps %xmm3, %xmm2
|
|
Packit |
c32a2d |
mulps TEMP(5), %xmm4
|
|
Packit |
c32a2d |
mulps TEMP(5), %xmm6
|
|
Packit |
c32a2d |
movaps %xmm0, %xmm1
|
|
Packit |
c32a2d |
movaps %xmm2, %xmm3
|
|
Packit |
c32a2d |
unpcklps %xmm4, %xmm0
|
|
Packit |
c32a2d |
unpckhps %xmm4, %xmm1
|
|
Packit |
c32a2d |
unpcklps %xmm6, %xmm2
|
|
Packit |
c32a2d |
unpckhps %xmm6, %xmm3
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movaps %xmm0, TEMP(0)
|
|
Packit |
c32a2d |
movaps %xmm1, TEMP(1)
|
|
Packit |
c32a2d |
movaps %xmm2, TEMP(2)
|
|
Packit |
c32a2d |
movaps %xmm3, TEMP(3)
|
|
Packit |
c32a2d |
movaps %xmm5, TEMP(5)
|
|
Packit |
c32a2d |
movaps %xmm7, TEMP(7)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movss TEMP_BYTE(12), %xmm0
|
|
Packit |
c32a2d |
movss TEMP_BYTE(28), %xmm1
|
|
Packit |
c32a2d |
movss TEMP_BYTE(44), %xmm2
|
|
Packit |
c32a2d |
movss TEMP_BYTE(60), %xmm3
|
|
Packit |
c32a2d |
addss TEMP_BYTE(8), %xmm0
|
|
Packit |
c32a2d |
addss TEMP_BYTE(24), %xmm1
|
|
Packit |
c32a2d |
addss TEMP_BYTE(40), %xmm2
|
|
Packit |
c32a2d |
addss TEMP_BYTE(56), %xmm3
|
|
Packit |
c32a2d |
movss %xmm0, TEMP_BYTE(8)
|
|
Packit |
c32a2d |
movss %xmm1, TEMP_BYTE(24)
|
|
Packit |
c32a2d |
movss %xmm2, TEMP_BYTE(40)
|
|
Packit |
c32a2d |
movss %xmm3, TEMP_BYTE(56)
|
|
Packit |
c32a2d |
movss TEMP_BYTE(76), %xmm0
|
|
Packit |
c32a2d |
movss TEMP_BYTE(92), %xmm1
|
|
Packit |
c32a2d |
movss TEMP_BYTE(108), %xmm2
|
|
Packit |
c32a2d |
movss TEMP_BYTE(124), %xmm3
|
|
Packit |
c32a2d |
addss TEMP_BYTE(72), %xmm0
|
|
Packit |
c32a2d |
addss TEMP_BYTE(88), %xmm1
|
|
Packit |
c32a2d |
addss TEMP_BYTE(104), %xmm2
|
|
Packit |
c32a2d |
addss TEMP_BYTE(120), %xmm3
|
|
Packit |
c32a2d |
movss %xmm0, TEMP_BYTE(72)
|
|
Packit |
c32a2d |
movss %xmm1, TEMP_BYTE(88)
|
|
Packit |
c32a2d |
movss %xmm2, TEMP_BYTE(104)
|
|
Packit |
c32a2d |
movss %xmm3, TEMP_BYTE(120)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movaps TEMP_BYTE(16), %xmm1
|
|
Packit |
c32a2d |
movaps TEMP_BYTE(48), %xmm3
|
|
Packit |
c32a2d |
movaps TEMP_BYTE(80), %xmm5
|
|
Packit |
c32a2d |
movaps TEMP_BYTE(112), %xmm7
|
|
Packit |
c32a2d |
movaps %xmm1, %xmm0
|
|
Packit |
c32a2d |
movaps %xmm3, %xmm2
|
|
Packit |
c32a2d |
movaps %xmm5, %xmm4
|
|
Packit |
c32a2d |
movaps %xmm7, %xmm6
|
|
Packit |
c32a2d |
shufps $0x1e, %xmm0, %xmm0
|
|
Packit |
c32a2d |
shufps $0x1e, %xmm2, %xmm2
|
|
Packit |
c32a2d |
shufps $0x1e, %xmm4, %xmm4
|
|
Packit |
c32a2d |
shufps $0x1e, %xmm6, %xmm6
|
|
Packit |
c32a2d |
lea LOCAL_VAR(mask), %eax
|
|
Packit |
c32a2d |
andps (%eax), %xmm0
|
|
Packit |
c32a2d |
andps (%eax), %xmm2
|
|
Packit |
c32a2d |
andps (%eax), %xmm4
|
|
Packit |
c32a2d |
andps (%eax), %xmm6
|
|
Packit |
c32a2d |
addps %xmm0, %xmm1
|
|
Packit |
c32a2d |
addps %xmm2, %xmm3
|
|
Packit |
c32a2d |
addps %xmm4, %xmm5
|
|
Packit |
c32a2d |
addps %xmm6, %xmm7
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movaps TEMP_BYTE(32), %xmm2
|
|
Packit |
c32a2d |
movaps TEMP_BYTE(96), %xmm6
|
|
Packit |
c32a2d |
movaps %xmm2, %xmm0
|
|
Packit |
c32a2d |
movaps %xmm6, %xmm4
|
|
Packit |
c32a2d |
shufps $0x1e, %xmm0, %xmm0
|
|
Packit |
c32a2d |
shufps $0x1e, %xmm4, %xmm4
|
|
Packit |
c32a2d |
andps (%eax), %xmm0
|
|
Packit |
c32a2d |
andps (%eax), %xmm4
|
|
Packit |
c32a2d |
addps %xmm3, %xmm2
|
|
Packit |
c32a2d |
addps %xmm0, %xmm3
|
|
Packit |
c32a2d |
addps %xmm7, %xmm6
|
|
Packit |
c32a2d |
addps %xmm4, %xmm7
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movaps TEMP_BYTE(0), %xmm0
|
|
Packit |
c32a2d |
movaps TEMP_BYTE(64), %xmm4
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movl ARG(0), %ecx
|
|
Packit |
c32a2d |
movl ARG(1), %ebx
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movss %xmm0, 1024(%ecx)
|
|
Packit |
c32a2d |
movss %xmm2, 896(%ecx)
|
|
Packit |
c32a2d |
movss %xmm1, 768(%ecx)
|
|
Packit |
c32a2d |
movss %xmm3, 640(%ecx)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm0, %xmm0
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm2, %xmm2
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm1, %xmm1
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm3, %xmm3
|
|
Packit |
c32a2d |
movss %xmm0, (%ecx)
|
|
Packit |
c32a2d |
movss %xmm0, (%ebx)
|
|
Packit |
c32a2d |
movss %xmm2, 128(%ebx)
|
|
Packit |
c32a2d |
movss %xmm1, 256(%ebx)
|
|
Packit |
c32a2d |
movss %xmm3, 384(%ebx)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movhlps %xmm0, %xmm0
|
|
Packit |
c32a2d |
movhlps %xmm2, %xmm2
|
|
Packit |
c32a2d |
movhlps %xmm1, %xmm1
|
|
Packit |
c32a2d |
movhlps %xmm3, %xmm3
|
|
Packit |
c32a2d |
movss %xmm0, 512(%ecx)
|
|
Packit |
c32a2d |
movss %xmm2, 384(%ecx)
|
|
Packit |
c32a2d |
movss %xmm1, 256(%ecx)
|
|
Packit |
c32a2d |
movss %xmm3, 128(%ecx)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm0, %xmm0
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm2, %xmm2
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm1, %xmm1
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm3, %xmm3
|
|
Packit |
c32a2d |
movss %xmm0, 512(%ebx)
|
|
Packit |
c32a2d |
movss %xmm2, 640(%ebx)
|
|
Packit |
c32a2d |
movss %xmm1, 768(%ebx)
|
|
Packit |
c32a2d |
movss %xmm3, 896(%ebx)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movaps %xmm4, %xmm0
|
|
Packit |
c32a2d |
shufps $0x1e, %xmm0, %xmm0
|
|
Packit |
c32a2d |
movaps %xmm5, %xmm1
|
|
Packit |
c32a2d |
andps (%eax), %xmm0
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
addps %xmm6, %xmm4
|
|
Packit |
c32a2d |
addps %xmm7, %xmm5
|
|
Packit |
c32a2d |
addps %xmm1, %xmm6
|
|
Packit |
c32a2d |
addps %xmm0, %xmm7
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
movss %xmm4, 960(%ecx)
|
|
Packit |
c32a2d |
movss %xmm6, 832(%ecx)
|
|
Packit |
c32a2d |
movss %xmm5, 704(%ecx)
|
|
Packit |
c32a2d |
movss %xmm7, 576(%ecx)
|
|
Packit |
c32a2d |
movhlps %xmm4, %xmm0
|
|
Packit |
c32a2d |
movhlps %xmm6, %xmm1
|
|
Packit |
c32a2d |
movhlps %xmm5, %xmm2
|
|
Packit |
c32a2d |
movhlps %xmm7, %xmm3
|
|
Packit |
c32a2d |
movss %xmm0, 448(%ecx)
|
|
Packit |
c32a2d |
movss %xmm1, 320(%ecx)
|
|
Packit |
c32a2d |
movss %xmm2, 192(%ecx)
|
|
Packit |
c32a2d |
movss %xmm3, 64(%ecx)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm4, %xmm4
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm6, %xmm6
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm5, %xmm5
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm7, %xmm7
|
|
Packit |
c32a2d |
movss %xmm4, 64(%ebx)
|
|
Packit |
c32a2d |
movss %xmm6, 192(%ebx)
|
|
Packit |
c32a2d |
movss %xmm5, 320(%ebx)
|
|
Packit |
c32a2d |
movss %xmm7, 448(%ebx)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm0, %xmm0
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm1, %xmm1
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm2, %xmm2
|
|
Packit |
c32a2d |
shufps $0xe1, %xmm3, %xmm3
|
|
Packit |
c32a2d |
movss %xmm0, 576(%ebx)
|
|
Packit |
c32a2d |
movss %xmm1, 704(%ebx)
|
|
Packit |
c32a2d |
movss %xmm2, 832(%ebx)
|
|
Packit |
c32a2d |
movss %xmm3, 960(%ebx)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
popl %ebx
|
|
Packit |
c32a2d |
movl %ebp, %esp
|
|
Packit |
c32a2d |
popl %ebp
|
|
Packit |
c32a2d |
ret
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#if defined(PIC) && defined(__APPLE__)
|
|
Packit |
c32a2d |
.section __IMPORT,__pointers,non_lazy_symbol_pointers
|
|
Packit |
c32a2d |
L_costab_mmxsse:
|
|
Packit |
c32a2d |
.indirect_symbol ASM_NAME(costab_mmxsse)
|
|
Packit |
c32a2d |
.long 0
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
NONEXEC_STACK
|