|
Packit |
577717 |
#include <stdlib.h>
|
|
Packit |
577717 |
#include <stdio.h>
|
|
Packit |
577717 |
#include <string.h>
|
|
Packit |
577717 |
#define NUMBER 100
|
|
Packit |
577717 |
|
|
Packit |
577717 |
inline void
|
|
Packit |
577717 |
inline_packed_sse_add( float *aa, float *bb, float *cc )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
__asm__ __volatile__( "movaps (%0), %%xmm0;"
|
|
Packit |
577717 |
"movaps (%1), %%xmm1;"
|
|
Packit |
577717 |
"addps %%xmm0, %%xmm1;"
|
|
Packit |
577717 |
"movaps %%xmm1, (%2);"::"r"( aa ),
|
|
Packit |
577717 |
"r"( bb ), "r"( cc )
|
|
Packit |
577717 |
:"%xmm0", "%xmm1" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
inline void
|
|
Packit |
577717 |
inline_packed_sse_mul( float *aa, float *bb, float *cc )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
__asm__ __volatile__( "movaps (%0), %%xmm0;"
|
|
Packit |
577717 |
"movaps (%1), %%xmm1;"
|
|
Packit |
577717 |
"mulps %%xmm0, %%xmm1;"
|
|
Packit |
577717 |
"movaps %%xmm1, (%2);"::"r"( aa ),
|
|
Packit |
577717 |
"r"( bb ), "r"( cc )
|
|
Packit |
577717 |
:"%xmm0", "%xmm1" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
inline void
|
|
Packit |
577717 |
inline_packed_sse2_add( double *aa, double *bb, double *cc )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
__asm__ __volatile__( "movapd (%0), %%xmm0;"
|
|
Packit |
577717 |
"movapd (%1), %%xmm1;"
|
|
Packit |
577717 |
"addpd %%xmm0, %%xmm1;"
|
|
Packit |
577717 |
"movapd %%xmm1, (%2);"::"r"( aa ),
|
|
Packit |
577717 |
"r"( bb ), "r"( cc )
|
|
Packit |
577717 |
:"%xmm0", "%xmm1" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
inline void
|
|
Packit |
577717 |
inline_packed_sse2_mul( double *aa, double *bb, double *cc )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
__asm__ __volatile__( "movapd (%0), %%xmm0;"
|
|
Packit |
577717 |
"movapd (%1), %%xmm1;"
|
|
Packit |
577717 |
"mulpd %%xmm0, %%xmm1;"
|
|
Packit |
577717 |
"movapd %%xmm1, (%2);"::"r"( aa ),
|
|
Packit |
577717 |
"r"( bb ), "r"( cc )
|
|
Packit |
577717 |
:"%xmm0", "%xmm1" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
inline void
|
|
Packit |
577717 |
inline_unpacked_sse_add( float *aa, float *bb, float *cc )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
__asm__ __volatile__( "movss (%0), %%xmm0;"
|
|
Packit |
577717 |
"movss (%1), %%xmm1;"
|
|
Packit |
577717 |
"addss %%xmm0, %%xmm1;"
|
|
Packit |
577717 |
"movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
|
|
Packit |
577717 |
:"%xmm0", "%xmm1" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
inline void
|
|
Packit |
577717 |
inline_unpacked_sse_mul( float *aa, float *bb, float *cc )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
__asm__ __volatile__( "movss (%0), %%xmm0;"
|
|
Packit |
577717 |
"movss (%1), %%xmm1;"
|
|
Packit |
577717 |
"mulss %%xmm0, %%xmm1;"
|
|
Packit |
577717 |
"movss %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
|
|
Packit |
577717 |
:"%xmm0", "%xmm1" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
inline void
|
|
Packit |
577717 |
inline_unpacked_sse2_add( double *aa, double *bb, double *cc )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
__asm__ __volatile__( "movsd (%0), %%xmm0;"
|
|
Packit |
577717 |
"movsd (%1), %%xmm1;"
|
|
Packit |
577717 |
"addsd %%xmm0, %%xmm1;"
|
|
Packit |
577717 |
"movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
|
|
Packit |
577717 |
:"%xmm0", "%xmm1" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
inline void
|
|
Packit |
577717 |
inline_unpacked_sse2_mul( double *aa, double *bb, double *cc )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
__asm__ __volatile__( "movsd (%0), %%xmm0;"
|
|
Packit |
577717 |
"movsd (%1), %%xmm1;"
|
|
Packit |
577717 |
"mulsd %%xmm0, %%xmm1;"
|
|
Packit |
577717 |
"movsd %%xmm1, (%2);"::"r"( aa ), "r"( bb ), "r"( cc )
|
|
Packit |
577717 |
:"%xmm0", "%xmm1" );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
int
|
|
Packit |
577717 |
main( int argc, char **argv )
|
|
Packit |
577717 |
{
|
|
Packit |
577717 |
int i, packed = 0, sse = 0;
|
|
Packit |
577717 |
float a[4] = { 1.0, 2.0, 3.0, 4.0 };
|
|
Packit |
577717 |
float b[4] = { 2.0, 3.0, 4.0, 5.0 };
|
|
Packit |
577717 |
float c[4] = { 0.0, 0.0, 0.0, 0.0 };
|
|
Packit |
577717 |
double d[4] = { 1.0, 2.0, 3.0, 4.0 };
|
|
Packit |
577717 |
double e[4] = { 2.0, 3.0, 4.0, 5.0 };
|
|
Packit |
577717 |
double f[4] = { 0.0, 0.0, 0.0, 0.0 };
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( argc != 3 ) {
|
|
Packit |
577717 |
bail:
|
|
Packit |
577717 |
printf( "Usage %s: <packed|unpacked> <sse|sse2>\n", argv[0] );
|
|
Packit |
577717 |
exit( 1 );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
if ( strcasecmp( argv[1], "packed" ) == 0 )
|
|
Packit |
577717 |
packed = 1;
|
|
Packit |
577717 |
else if ( strcasecmp( argv[1], "unpacked" ) == 0 )
|
|
Packit |
577717 |
packed = 0;
|
|
Packit |
577717 |
else
|
|
Packit |
577717 |
goto bail;
|
|
Packit |
577717 |
if ( strcasecmp( argv[2], "sse" ) == 0 )
|
|
Packit |
577717 |
sse = 1;
|
|
Packit |
577717 |
else if ( strcasecmp( argv[2], "sse2" ) == 0 )
|
|
Packit |
577717 |
sse = 0;
|
|
Packit |
577717 |
else
|
|
Packit |
577717 |
goto bail;
|
|
Packit |
577717 |
|
|
Packit |
577717 |
#if 0
|
|
Packit |
577717 |
if ( ( sse ) &&
|
|
Packit |
577717 |
( system( "cat /proc/cpuinfo | grep sse > /dev/null" ) != 0 ) ) {
|
|
Packit |
577717 |
printf( "This processor does not have SSE.\n" );
|
|
Packit |
577717 |
exit( 1 );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
if ( ( sse == 0 ) &&
|
|
Packit |
577717 |
( system( "cat /proc/cpuinfo | grep sse2 > /dev/null" ) != 0 ) ) {
|
|
Packit |
577717 |
printf( "This processor does not have SSE2.\n" );
|
|
Packit |
577717 |
exit( 1 );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
#endif
|
|
Packit |
577717 |
|
|
Packit |
577717 |
printf( "Vector 1: %f %f %f %f\n", a[0], a[1], a[2], a[3] );
|
|
Packit |
577717 |
printf( "Vector 2: %f %f %f %f\n\n", b[0], b[1], b[2], b[3] );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( ( packed == 0 ) && ( sse == 1 ) ) {
|
|
Packit |
577717 |
for ( i = 0; i < NUMBER; i++ ) {
|
|
Packit |
577717 |
inline_unpacked_sse_add( &a[0], &b[0], &c[0] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
printf( "%d SSE Unpacked Adds: Result %f\n", NUMBER, c[0] );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
for ( i = 0; i < NUMBER; i++ ) {
|
|
Packit |
577717 |
inline_unpacked_sse_mul( &a[0], &b[0], &c[0] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
printf( "%d SSE Unpacked Muls: Result %f\n", NUMBER, c[0] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
if ( ( packed == 1 ) && ( sse == 1 ) ) {
|
|
Packit |
577717 |
for ( i = 0; i < NUMBER; i++ ) {
|
|
Packit |
577717 |
inline_packed_sse_add( a, b, c );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
printf( "%d SSE Packed Adds: Result %f %f %f %f\n", NUMBER, c[0], c[1],
|
|
Packit |
577717 |
c[2], c[3] );
|
|
Packit |
577717 |
for ( i = 0; i < NUMBER; i++ ) {
|
|
Packit |
577717 |
inline_packed_sse_mul( a, b, c );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
printf( "%d SSE Packed Muls: Result %f %f %f %f\n", NUMBER, c[0], c[1],
|
|
Packit |
577717 |
c[2], c[3] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
if ( ( packed == 0 ) && ( sse == 0 ) ) {
|
|
Packit |
577717 |
for ( i = 0; i < NUMBER; i++ ) {
|
|
Packit |
577717 |
inline_unpacked_sse2_add( &d[0], &e[0], &f[0] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
printf( "%d SSE2 Unpacked Adds: Result %f\n", NUMBER, c[0] );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
for ( i = 0; i < NUMBER; i++ ) {
|
|
Packit |
577717 |
inline_unpacked_sse2_mul( &d[0], &e[0], &f[0] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
printf( "%d SSE2 Unpacked Muls: Result %f\n", NUMBER, c[0] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
if ( ( packed == 1 ) && ( sse == 0 ) ) {
|
|
Packit |
577717 |
for ( i = 0; i < NUMBER; i++ ) {
|
|
Packit |
577717 |
inline_packed_sse2_add( &d[0], &e[0], &f[0] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
printf( "%d SSE2 Packed Adds: Result %f\n", NUMBER, c[0] );
|
|
Packit |
577717 |
|
|
Packit |
577717 |
for ( i = 0; i < NUMBER; i++ ) {
|
|
Packit |
577717 |
inline_packed_sse2_mul( &d[0], &e[0], &f[0] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
printf( "%d SSE2 Packed Muls: Result %f\n", NUMBER, c[0] );
|
|
Packit |
577717 |
}
|
|
Packit |
577717 |
|
|
Packit |
577717 |
|
|
Packit |
577717 |
exit( 0 );
|
|
Packit |
577717 |
}
|