Blame IlmImfTest/testDwaCompressorSimd.cpp

Packit 0d464f
///////////////////////////////////////////////////////////////////////////
Packit 0d464f
//
Packit 0d464f
// Copyright (c) 2009-2014 DreamWorks Animation LLC. 
Packit 0d464f
//
Packit 0d464f
// All rights reserved.
Packit 0d464f
//
Packit 0d464f
// Redistribution and use in source and binary forms, with or without
Packit 0d464f
// modification, are permitted provided that the following conditions are
Packit 0d464f
// met:
Packit 0d464f
// *       Redistributions of source code must retain the above copyright
Packit 0d464f
// notice, this list of conditions and the following disclaimer.
Packit 0d464f
// *       Redistributions in binary form must reproduce the above
Packit 0d464f
// copyright notice, this list of conditions and the following disclaimer
Packit 0d464f
// in the documentation and/or other materials provided with the
Packit 0d464f
// distribution.
Packit 0d464f
// *       Neither the name of DreamWorks Animation nor the names of
Packit 0d464f
// its contributors may be used to endorse or promote products derived
Packit 0d464f
// from this software without specific prior written permission.
Packit 0d464f
//
Packit 0d464f
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
Packit 0d464f
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
Packit 0d464f
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
Packit 0d464f
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
Packit 0d464f
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
Packit 0d464f
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
Packit 0d464f
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
Packit 0d464f
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
Packit 0d464f
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
Packit 0d464f
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
Packit 0d464f
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Packit 0d464f
//
Packit 0d464f
///////////////////////////////////////////////////////////////////////////
Packit 0d464f
Packit 0d464f
#include <half.h>
Packit 0d464f
#include <math.h>
Packit 0d464f
#include <string.h>
Packit 0d464f
#include <ImfDwaCompressorSimd.h>
Packit 0d464f
#include <ImfSystemSpecific.h>
Packit 0d464f
#include <ImathRandom.h>
Packit 0d464f
#include <iostream>
Packit 0d464f
#include <assert.h>
Packit 0d464f
Packit 0d464f
using namespace OPENEXR_IMF_NAMESPACE;
Packit 0d464f
using namespace IMATH_NAMESPACE;
Packit 0d464f
using namespace std;
Packit 0d464f
Packit 0d464f
Packit 0d464f
namespace
Packit 0d464f
{
Packit 0d464f
Packit 0d464f
void
Packit 0d464f
dumpBuffer (const SimdAlignedBuffer64f &buffer)
Packit 0d464f
{
Packit 0d464f
    for (int i=0; i<8; ++i) 
Packit 0d464f
    {
Packit 0d464f
        for (int j=0; j<8; ++j) 
Packit 0d464f
        {
Packit 0d464f
            cout << buffer._buffer[i*8+j] << "  ";
Packit 0d464f
        }
Packit 0d464f
        cout << endl;
Packit 0d464f
    }
Packit 0d464f
}
Packit 0d464f
Packit 0d464f
void
Packit 0d464f
compareBuffer (const SimdAlignedBuffer64f &src,
Packit 0d464f
               const SimdAlignedBuffer64f &dst,
Packit 0d464f
               const float                 errThresh)
Packit 0d464f
{
Packit 0d464f
    for (int i=0; i<64; ++i) 
Packit 0d464f
    {
Packit 0d464f
        double diff   = fabs(src._buffer[i] - dst._buffer[i]);
Packit 0d464f
Packit 0d464f
        if (diff > errThresh) 
Packit 0d464f
        {
Packit 0d464f
            cout << scientific;
Packit 0d464f
            cout << "Error exceeded threshold on element "  << i << endl;
Packit 0d464f
            cout << " diff: " << diff << endl;
Packit 0d464f
            cout << "Goal (src): " << scientific << endl;
Packit 0d464f
            dumpBuffer(src);
Packit 0d464f
            cout << "Test (dst): " << endl;
Packit 0d464f
            dumpBuffer(dst);
Packit 0d464f
Packit 0d464f
            assert(false);
Packit 0d464f
        }
Packit 0d464f
    }
Packit 0d464f
}
Packit 0d464f
Packit 0d464f
void
Packit 0d464f
compareBufferRelative (const SimdAlignedBuffer64f &src,
Packit 0d464f
                       const SimdAlignedBuffer64f &dst,
Packit 0d464f
                       const float                 relErrThresh,
Packit 0d464f
                       const float                 absErrThresh)
Packit 0d464f
{
Packit 0d464f
    for (int i=0; i<64; ++i)
Packit 0d464f
    {
Packit 0d464f
        double diff    = fabs(src._buffer[i] - dst._buffer[i]);
Packit 0d464f
        double relDiff = diff / fabs(src._buffer[i]);
Packit 0d464f
Packit 0d464f
        if (relDiff > relErrThresh && diff > absErrThresh)
Packit 0d464f
        {
Packit 0d464f
            cout << scientific;
Packit 0d464f
            cout << "Error exceeded threshold on element "  << i << endl;
Packit 0d464f
            cout << " diff: " << diff << " relErr: " << fixed << 100.0*relDiff << " %" << endl;
Packit 0d464f
            cout << "Goal (src): " << scientific << endl;
Packit 0d464f
            dumpBuffer(src);
Packit 0d464f
            cout << "Test (dst): " << endl;
Packit 0d464f
            dumpBuffer(dst);
Packit 0d464f
Packit 0d464f
            assert(false);
Packit 0d464f
        }
Packit 0d464f
    }
Packit 0d464f
}
Packit 0d464f
Packit 0d464f
// 
Packit 0d464f
// Test that we can round trip CSC data with reasonable precision
Packit 0d464f
//
Packit 0d464f
void
Packit 0d464f
testCsc()
Packit 0d464f
{
Packit 0d464f
    const int            numIter = 1000000;
Packit 0d464f
    Rand48               rand48(0);
Packit 0d464f
    SimdAlignedBuffer64f orig[3];
Packit 0d464f
    SimdAlignedBuffer64f test[3];
Packit 0d464f
Packit 0d464f
    cout << "   Color Space Conversion Round Trip " << endl;
Packit 0d464f
    cout << "      csc709Forward64() - 64 x csc709Inverse()" << endl;
Packit 0d464f
    for (int iter=0; iter
Packit 0d464f
    {   
Packit 0d464f
        for (int i=0; i<64; ++i)
Packit 0d464f
        {
Packit 0d464f
            test[0]._buffer[i] = orig[0]._buffer[i] = rand48.nextf();
Packit 0d464f
            test[1]._buffer[i] = orig[1]._buffer[i] = rand48.nextf();
Packit 0d464f
            test[2]._buffer[i] = orig[2]._buffer[i] = rand48.nextf();
Packit 0d464f
        }
Packit 0d464f
        
Packit 0d464f
        csc709Forward64(test[0]._buffer, test[1]._buffer, test[2]._buffer);
Packit 0d464f
        for (int i=0; i<64; ++i)
Packit 0d464f
        {
Packit 0d464f
            csc709Inverse(test[0]._buffer[i], test[1]._buffer[i], test[2]._buffer[i]);
Packit 0d464f
        }
Packit 0d464f
Packit 0d464f
        compareBuffer(orig[0], test[0], 1e-3);
Packit 0d464f
        compareBuffer(orig[1], test[1], 1e-3);
Packit 0d464f
        compareBuffer(orig[2], test[2], 1e-3);
Packit 0d464f
Packit 0d464f
    } // iter
Packit 0d464f
Packit 0d464f
    cout << "      csc709Forward64() - csc709Inverse64()" << endl;
Packit 0d464f
    for (int iter=0; iter
Packit 0d464f
    {    
Packit 0d464f
        for (int i=0; i<64; ++i)
Packit 0d464f
        {
Packit 0d464f
            test[0]._buffer[i] = orig[0]._buffer[i] = rand48.nextf();
Packit 0d464f
            test[1]._buffer[i] = orig[1]._buffer[i] = rand48.nextf();
Packit 0d464f
            test[2]._buffer[i] = orig[2]._buffer[i] = rand48.nextf();
Packit 0d464f
        }
Packit 0d464f
        
Packit 0d464f
        csc709Forward64(test[0]._buffer, test[1]._buffer, test[2]._buffer);
Packit 0d464f
        csc709Inverse64(test[0]._buffer, test[1]._buffer, test[2]._buffer);
Packit 0d464f
Packit 0d464f
        compareBuffer(orig[0], test[0], 1e-3);
Packit 0d464f
        compareBuffer(orig[1], test[1], 1e-3);
Packit 0d464f
        compareBuffer(orig[2], test[2], 1e-3);
Packit 0d464f
Packit 0d464f
    } // iter
Packit 0d464f
}
Packit 0d464f
Packit 0d464f
//
Packit 0d464f
// Test interleaving two byte arrays
Packit 0d464f
//
Packit 0d464f
void
Packit 0d464f
testInterleave()
Packit 0d464f
{
Packit 0d464f
    const int bufferLen = 100000;
Packit 0d464f
    const int numIter   = 10000;
Packit 0d464f
    Rand48    rand48(0);
Packit 0d464f
    char     *srcA    = new char[bufferLen];
Packit 0d464f
    char     *srcB    = new char[bufferLen];
Packit 0d464f
    char     *dst     = new char[2*bufferLen];
Packit 0d464f
    char     *test    = new char[2*bufferLen];
Packit 0d464f
    
Packit 0d464f
    cout << "   Byte Interleaving " << endl;
Packit 0d464f
Packit 0d464f
    for (int i=0; i
Packit 0d464f
    {
Packit 0d464f
        srcA[i]    = (char)rand48.nextf(0.0, 255.0);
Packit 0d464f
        srcB[i]    = (char)rand48.nextf(0.0, 255.0);
Packit 0d464f
        dst[2*i]   = srcA[i];
Packit 0d464f
        dst[2*i+1] = srcB[i];
Packit 0d464f
    }
Packit 0d464f
Packit 0d464f
    for (int iter=0; iter
Packit 0d464f
    {
Packit 0d464f
        memset(test, 0, 2*bufferLen);
Packit 0d464f
Packit 0d464f
        int offset = (int)rand48.nextf(0.0, bufferLen/2);
Packit 0d464f
        int len    = (int)rand48.nextf(1.0, bufferLen - 1 - offset);
Packit 0d464f
Packit 0d464f
        interleaveByte2( test+2*offset, srcA+offset, srcB+offset, len);
Packit 0d464f
        for (int i=0; i
Packit 0d464f
            assert( test[2*offset + 2*i]     == dst[2*offset + 2*i]);
Packit 0d464f
            assert( test[2*offset + 2*i + 1] == dst[2*offset + 2*i + 1]);
Packit 0d464f
        }
Packit 0d464f
    }
Packit 0d464f
Packit 0d464f
    delete[] srcA;
Packit 0d464f
    delete[] srcB;
Packit 0d464f
    delete[] dst;
Packit 0d464f
    delete[] test;
Packit 0d464f
}
Packit 0d464f
Packit 0d464f
//
Packit 0d464f
// Test that we can route trip DCT data with reasonable precision
Packit 0d464f
//
Packit 0d464f
void
Packit 0d464f
testDct()
Packit 0d464f
{
Packit 0d464f
    const int            numIter = 1000000;
Packit 0d464f
    Rand48               rand48(0);
Packit 0d464f
    SimdAlignedBuffer64f orig;
Packit 0d464f
    SimdAlignedBuffer64f test;
Packit 0d464f
   
Packit 0d464f
    cout << "   DCT Round Trip " << endl;
Packit 0d464f
    for (int iter=0; iter
Packit 0d464f
    {
Packit 0d464f
        for (int i=0; i<64; ++i) 
Packit 0d464f
        {
Packit 0d464f
            orig._buffer[i] = test._buffer[i] = rand48.nextf();
Packit 0d464f
        }
Packit 0d464f
Packit 0d464f
        dctForward8x8(test._buffer);
Packit 0d464f
        dctInverse8x8_scalar<0>(test._buffer);
Packit 0d464f
Packit 0d464f
        compareBufferRelative(orig, test, .02, 1e-3);
Packit 0d464f
    } 
Packit 0d464f
Packit 0d464f
    cout << "      Inverse, DC Only" << endl;
Packit 0d464f
    for (int iter=0; iter
Packit 0d464f
    {
Packit 0d464f
        orig._buffer[0] = test._buffer[0] = rand48.nextf();
Packit 0d464f
        for (int i=1; i<64; ++i) 
Packit 0d464f
        {
Packit 0d464f
            orig._buffer[i] = test._buffer[i] = 0;
Packit 0d464f
        }
Packit 0d464f
Packit 0d464f
        dctInverse8x8_scalar<0>(orig._buffer);
Packit 0d464f
        dctInverse8x8DcOnly(test._buffer);
Packit 0d464f
Packit 0d464f
        compareBufferRelative(orig, test, .01, 1e-6);
Packit 0d464f
    } 
Packit 0d464f
Packit 0d464f
Packit 0d464f
#define INVERSE_DCT_SCALAR_TEST_N(_func, _n, _desc)                \
Packit 0d464f
    cout << "         " << _desc << endl;                          \
Packit 0d464f
    for (int iter=0; iter
Packit 0d464f
    {                                                              \
Packit 0d464f
        for (int i=0; i<64; ++i)                                   \
Packit 0d464f
        {                                                          \
Packit 0d464f
            if (i < 8*(8-_n))                                      \
Packit 0d464f
            {                                                      \
Packit 0d464f
               orig._buffer[i] = test._buffer[i] = rand48.nextf(); \
Packit 0d464f
            } else {                                               \
Packit 0d464f
               orig._buffer[i] = test._buffer[i] = 0;              \
Packit 0d464f
            }                                                      \
Packit 0d464f
        }                                                          \
Packit 0d464f
        dctInverse8x8_scalar<0>(orig._buffer);                     \
Packit 0d464f
        _func<_n>(test._buffer);                                   \
Packit 0d464f
        compareBufferRelative(orig, test, .01, 1e-6);              \
Packit 0d464f
    }
Packit 0d464f
Packit 0d464f
    cout << "      Inverse, Scalar: " << endl;
Packit 0d464f
    INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 0, "8x8")
Packit 0d464f
    INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 1, "7x8")
Packit 0d464f
    INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 2, "6x8")
Packit 0d464f
    INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 3, "5x8")
Packit 0d464f
    INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 4, "4x8")
Packit 0d464f
    INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 5, "3x8")
Packit 0d464f
    INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 6, "2x8")
Packit 0d464f
    INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 7, "1x8")
Packit 0d464f
Packit 0d464f
    CpuId cpuid;
Packit 0d464f
    if (cpuid.sse2) 
Packit 0d464f
    {
Packit 0d464f
        cout << "      Inverse, SSE2: " << endl;
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 0, "8x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 1, "7x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 2, "6x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 3, "5x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 4, "4x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 5, "3x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 6, "2x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 7, "1x8")
Packit 0d464f
    }
Packit 0d464f
Packit 0d464f
    if (cpuid.avx) 
Packit 0d464f
    {
Packit 0d464f
        cout << "      Inverse, AVX: " << endl;
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 0, "8x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 1, "7x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 2, "6x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 3, "5x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 4, "4x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 5, "3x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 6, "2x8")
Packit 0d464f
        INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 7, "1x8")
Packit 0d464f
    }
Packit 0d464f
}
Packit 0d464f
Packit 0d464f
//
Packit 0d464f
// Test FLOAT -> HALF conversion, mostly for F16C enabled processors
Packit 0d464f
//
Packit 0d464f
void
Packit 0d464f
testFloatToHalf()
Packit 0d464f
{
Packit 0d464f
    cout << "   FLOAT -> HALF conversion" << endl;
Packit 0d464f
Packit 0d464f
    const int             numIter = 1000000;
Packit 0d464f
    Rand48                rand48(0);
Packit 0d464f
    SimdAlignedBuffer64f  src;
Packit 0d464f
    SimdAlignedBuffer64us dst;
Packit 0d464f
Packit 0d464f
    cout << "      convertFloatToHalf64_scalar()" << endl;
Packit 0d464f
    for (int iter=0; iter
Packit 0d464f
    {
Packit 0d464f
        for (int i=0; i<64; ++i)
Packit 0d464f
        {
Packit 0d464f
            if (i < 32)
Packit 0d464f
            {
Packit 0d464f
                src._buffer[i] = (float)140000*(rand48.nextf()-.5);
Packit 0d464f
            } else
Packit 0d464f
            {
Packit 0d464f
                src._buffer[i] = (float)(rand48.nextf()-.5);
Packit 0d464f
            }
Packit 0d464f
            dst._buffer[i] = 0;
Packit 0d464f
        }
Packit 0d464f
Packit 0d464f
        convertFloatToHalf64_scalar(dst._buffer, src._buffer);
Packit 0d464f
Packit 0d464f
        for (int i=0; i<64; ++i)
Packit 0d464f
        {
Packit 0d464f
            half value = (half)src._buffer[i];
Packit 0d464f
            if (value.bits() != dst._buffer[i])
Packit 0d464f
            {
Packit 0d464f
                cout << src._buffer[i] << " -> " << dst._buffer[i] 
Packit 0d464f
                                 << " expected " << value.bits() << endl;
Packit 0d464f
                assert(false);
Packit 0d464f
            }
Packit 0d464f
        }
Packit 0d464f
    }
Packit 0d464f
Packit 0d464f
Packit 0d464f
    CpuId cpuid;
Packit 0d464f
    if (cpuid.avx && cpuid.f16c)
Packit 0d464f
    {
Packit 0d464f
        cout << "      convertFloatToHalf64_f16c()" << endl;
Packit 0d464f
        for (int iter=0; iter
Packit 0d464f
        {
Packit 0d464f
            for (int i=0; i<64; ++i)
Packit 0d464f
            {
Packit 0d464f
                if (i < 32)
Packit 0d464f
                {
Packit 0d464f
                    src._buffer[i] = (float)140000*(rand48.nextf()-.5);
Packit 0d464f
                } 
Packit 0d464f
                else
Packit 0d464f
                {
Packit 0d464f
                    src._buffer[i] = (float)(rand48.nextf()-.5);
Packit 0d464f
                }
Packit 0d464f
                dst._buffer[i] = 0;
Packit 0d464f
            }
Packit 0d464f
Packit 0d464f
            convertFloatToHalf64_f16c(dst._buffer, src._buffer);
Packit 0d464f
Packit 0d464f
            for (int i=0; i<64; ++i)
Packit 0d464f
            {
Packit 0d464f
                half value = (half)src._buffer[i];
Packit 0d464f
                if (value.bits() != dst._buffer[i])
Packit 0d464f
                {
Packit 0d464f
                    cout << src._buffer[i] << " -> " << dst._buffer[i] 
Packit 0d464f
                                     << " expected " << value.bits() << endl;
Packit 0d464f
                    assert(false);
Packit 0d464f
                }
Packit 0d464f
            }
Packit 0d464f
        }
Packit 0d464f
    }
Packit 0d464f
}
Packit 0d464f
Packit 0d464f
//
Packit 0d464f
// Test ZigZag reordering + HALF -> FLOAT conversion
Packit 0d464f
//
Packit 0d464f
void
Packit 0d464f
testFromHalfZigZag()
Packit 0d464f
{
Packit 0d464f
    SimdAlignedBuffer64us src;
Packit 0d464f
    SimdAlignedBuffer64f  dst;
Packit 0d464f
Packit 0d464f
    cout << "   ZigZag re-ordering with HALF -> FLOAT conversion" << endl;
Packit 0d464f
Packit 0d464f
    // First off, simple check to see that the reordering is working
Packit 0d464f
    // This pattern, when converted, should give 0.0 - 63.0 as floats
Packit 0d464f
    // in order.
Packit 0d464f
    unsigned short pattern[] = {
Packit 0d464f
        0x0000, 0x3c00, 0x4800, 0x4c00, 0x4880, 0x4000, 0x4200, 0x4900,
Packit 0d464f
        0x4c40, 0x4e00, 0x5000, 0x4e40, 0x4c80, 0x4980, 0x4400, 0x4500,
Packit 0d464f
        0x4a00, 0x4cc0, 0x4e80, 0x5020, 0x5100, 0x5200, 0x5120, 0x5040,
Packit 0d464f
        0x4ec0, 0x4d00, 0x4a80, 0x4600, 0x4700, 0x4b00, 0x4d40, 0x4f00,
Packit 0d464f
        0x5060, 0x5140, 0x5220, 0x5300, 0x5320, 0x5240, 0x5160, 0x5080,
Packit 0d464f
        0x4f40, 0x4d80, 0x4b80, 0x4dc0, 0x4f80, 0x50a0, 0x5180, 0x5260,
Packit 0d464f
        0x5340, 0x5360, 0x5280, 0x51a0, 0x50c0, 0x4fc0, 0x50e0, 0x51c0,
Packit 0d464f
        0x52a0, 0x5380, 0x53a0, 0x52c0, 0x51e0, 0x52e0, 0x53c0, 0x53e0
Packit 0d464f
    };
Packit 0d464f
Packit 0d464f
    cout << "      fromHalfZigZag_scaler()" << endl;
Packit 0d464f
    for (int i=0; i<64; ++i)
Packit 0d464f
    {
Packit 0d464f
        src._buffer[i] = pattern[i];
Packit 0d464f
    }
Packit 0d464f
    fromHalfZigZag_scalar(src._buffer, dst._buffer);
Packit 0d464f
    for (int i=0; i<64; ++i)
Packit 0d464f
    {
Packit 0d464f
        if ( fabsf(dst._buffer[i] - (float)i) > 1e-5 )
Packit 0d464f
        {
Packit 0d464f
            cout << "At index " << i << ": ";
Packit 0d464f
            cout << "expecting " << (float)i << "; got " << dst._buffer[i] << endl;
Packit 0d464f
            assert(false);
Packit 0d464f
        }
Packit 0d464f
    }
Packit 0d464f
       
Packit 0d464f
    // Then compare the two implementations, if supported
Packit 0d464f
    CpuId cpuid;
Packit 0d464f
    if (cpuid.avx && cpuid.f16c)
Packit 0d464f
    {
Packit 0d464f
        const int             numIter = 1000000;
Packit 0d464f
        Rand48                rand48(0);
Packit 0d464f
        half                  h;
Packit 0d464f
        SimdAlignedBuffer64f  dstF16c;
Packit 0d464f
Packit 0d464f
        cout << "      fromHalfZigZag_f16c()" << endl;
Packit 0d464f
Packit 0d464f
        for (int iter=0; iter
Packit 0d464f
        {
Packit 0d464f
            for (int i=0; i<64; ++i)
Packit 0d464f
            {
Packit 0d464f
                if (i < 32)
Packit 0d464f
                {
Packit 0d464f
                    h = (half)(140000.*(rand48.nextf() - .5));
Packit 0d464f
                }
Packit 0d464f
                else 
Packit 0d464f
                {
Packit 0d464f
                    h = (half)(rand48.nextf() - .5);
Packit 0d464f
                }
Packit 0d464f
                src._buffer[i] = h.bits();
Packit 0d464f
            }
Packit 0d464f
Packit 0d464f
            fromHalfZigZag_scalar(src._buffer, dst._buffer);
Packit 0d464f
            fromHalfZigZag_f16c(src._buffer, dstF16c._buffer);
Packit 0d464f
Packit 0d464f
            for (int i=0; i<64; ++i)
Packit 0d464f
            {
Packit 0d464f
                if ( fabsf(dst._buffer[i] - dstF16c._buffer[i]) > 1e-5 )
Packit 0d464f
                {
Packit 0d464f
                    cout << "At index " << i << ": ";
Packit 0d464f
                    cout << "expecting " << dst._buffer[i] << "; got "
Packit 0d464f
                         << dstF16c._buffer[i] << endl;
Packit 0d464f
                    assert(false);
Packit 0d464f
                }
Packit 0d464f
            }
Packit 0d464f
        } // iter
Packit 0d464f
    } // f16c
Packit 0d464f
}
Packit 0d464f
Packit 0d464f
Packit 0d464f
} // namespace
Packit 0d464f
Packit 0d464f
void 
Packit 0d464f
testDwaCompressorSimd (const string&)
Packit 0d464f
{
Packit 0d464f
    cout << "SIMD helper functions for DwaCompressor:" << endl;
Packit 0d464f
Packit 0d464f
    try
Packit 0d464f
    {
Packit 0d464f
    
Packit 0d464f
        testCsc();
Packit 0d464f
        testInterleave();
Packit 0d464f
        testFloatToHalf();
Packit 0d464f
        testFromHalfZigZag();
Packit 0d464f
Packit 0d464f
        testDct();
Packit 0d464f
Packit 0d464f
    }
Packit 0d464f
    catch (const exception &e)
Packit 0d464f
    {
Packit 0d464f
        cout << "unexpected exception: " << e.what() << endl;
Packit 0d464f
        assert (false);
Packit 0d464f
    }
Packit 0d464f
    catch (...)
Packit 0d464f
    {
Packit 0d464f
        cout << "unexpected exception" << endl;
Packit 0d464f
        assert (false);
Packit 0d464f
    }
Packit 0d464f
Packit 0d464f
    cout << "ok\n" << endl;
Packit 0d464f
}