/////////////////////////////////////////////////////////////////////////// // // Copyright (c) 2009-2014 DreamWorks Animation LLC. // // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of DreamWorks Animation nor the names of // its contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // /////////////////////////////////////////////////////////////////////////// #include #include #include #include #include #include #include #include using namespace OPENEXR_IMF_NAMESPACE; using namespace IMATH_NAMESPACE; using namespace std; namespace { void dumpBuffer (const SimdAlignedBuffer64f &buffer) { for (int i=0; i<8; ++i) { for (int j=0; j<8; ++j) { cout << buffer._buffer[i*8+j] << " "; } cout << endl; } } void compareBuffer (const SimdAlignedBuffer64f &src, const SimdAlignedBuffer64f &dst, const float errThresh) { for (int i=0; i<64; ++i) { double diff = fabs(src._buffer[i] - dst._buffer[i]); if (diff > errThresh) { cout << scientific; cout << "Error exceeded threshold on element " << i << endl; cout << " diff: " << diff << endl; cout << "Goal (src): " << scientific << endl; dumpBuffer(src); cout << "Test (dst): " << endl; dumpBuffer(dst); assert(false); } } } void compareBufferRelative (const SimdAlignedBuffer64f &src, const SimdAlignedBuffer64f &dst, const float relErrThresh, const float absErrThresh) { for (int i=0; i<64; ++i) { double diff = fabs(src._buffer[i] - dst._buffer[i]); double relDiff = diff / fabs(src._buffer[i]); if (relDiff > relErrThresh && diff > absErrThresh) { cout << scientific; cout << "Error exceeded threshold on element " << i << endl; cout << " diff: " << diff << " relErr: " << fixed << 100.0*relDiff << " %" << endl; cout << "Goal (src): " << scientific << endl; dumpBuffer(src); cout << "Test (dst): " << endl; dumpBuffer(dst); assert(false); } } } // // Test that we can round trip CSC data with reasonable precision // void testCsc() { const int numIter = 1000000; Rand48 rand48(0); SimdAlignedBuffer64f orig[3]; SimdAlignedBuffer64f test[3]; cout << " Color Space Conversion Round Trip " << endl; cout << " csc709Forward64() - 64 x csc709Inverse()" << endl; for (int iter=0; iter(test._buffer); compareBufferRelative(orig, test, .02, 1e-3); } cout << " Inverse, DC Only" << endl; for (int iter=0; iter(orig._buffer); dctInverse8x8DcOnly(test._buffer); compareBufferRelative(orig, test, .01, 1e-6); } #define INVERSE_DCT_SCALAR_TEST_N(_func, _n, _desc) \ cout << " " << _desc << endl; \ for (int iter=0; iter(orig._buffer); \ _func<_n>(test._buffer); \ compareBufferRelative(orig, test, .01, 1e-6); \ } cout << " Inverse, Scalar: " << endl; INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 0, "8x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 1, "7x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 2, "6x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 3, "5x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 4, "4x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 5, "3x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 6, "2x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_scalar, 7, "1x8") CpuId cpuid; if (cpuid.sse2) { cout << " Inverse, SSE2: " << endl; INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 0, "8x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 1, "7x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 2, "6x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 3, "5x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 4, "4x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 5, "3x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 6, "2x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_sse2, 7, "1x8") } if (cpuid.avx) { cout << " Inverse, AVX: " << endl; INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 0, "8x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 1, "7x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 2, "6x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 3, "5x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 4, "4x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 5, "3x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 6, "2x8") INVERSE_DCT_SCALAR_TEST_N(dctInverse8x8_avx, 7, "1x8") } } // // Test FLOAT -> HALF conversion, mostly for F16C enabled processors // void testFloatToHalf() { cout << " FLOAT -> HALF conversion" << endl; const int numIter = 1000000; Rand48 rand48(0); SimdAlignedBuffer64f src; SimdAlignedBuffer64us dst; cout << " convertFloatToHalf64_scalar()" << endl; for (int iter=0; iter " << dst._buffer[i] << " expected " << value.bits() << endl; assert(false); } } } CpuId cpuid; if (cpuid.avx && cpuid.f16c) { cout << " convertFloatToHalf64_f16c()" << endl; for (int iter=0; iter " << dst._buffer[i] << " expected " << value.bits() << endl; assert(false); } } } } } // // Test ZigZag reordering + HALF -> FLOAT conversion // void testFromHalfZigZag() { SimdAlignedBuffer64us src; SimdAlignedBuffer64f dst; cout << " ZigZag re-ordering with HALF -> FLOAT conversion" << endl; // First off, simple check to see that the reordering is working // This pattern, when converted, should give 0.0 - 63.0 as floats // in order. unsigned short pattern[] = { 0x0000, 0x3c00, 0x4800, 0x4c00, 0x4880, 0x4000, 0x4200, 0x4900, 0x4c40, 0x4e00, 0x5000, 0x4e40, 0x4c80, 0x4980, 0x4400, 0x4500, 0x4a00, 0x4cc0, 0x4e80, 0x5020, 0x5100, 0x5200, 0x5120, 0x5040, 0x4ec0, 0x4d00, 0x4a80, 0x4600, 0x4700, 0x4b00, 0x4d40, 0x4f00, 0x5060, 0x5140, 0x5220, 0x5300, 0x5320, 0x5240, 0x5160, 0x5080, 0x4f40, 0x4d80, 0x4b80, 0x4dc0, 0x4f80, 0x50a0, 0x5180, 0x5260, 0x5340, 0x5360, 0x5280, 0x51a0, 0x50c0, 0x4fc0, 0x50e0, 0x51c0, 0x52a0, 0x5380, 0x53a0, 0x52c0, 0x51e0, 0x52e0, 0x53c0, 0x53e0 }; cout << " fromHalfZigZag_scaler()" << endl; for (int i=0; i<64; ++i) { src._buffer[i] = pattern[i]; } fromHalfZigZag_scalar(src._buffer, dst._buffer); for (int i=0; i<64; ++i) { if ( fabsf(dst._buffer[i] - (float)i) > 1e-5 ) { cout << "At index " << i << ": "; cout << "expecting " << (float)i << "; got " << dst._buffer[i] << endl; assert(false); } } // Then compare the two implementations, if supported CpuId cpuid; if (cpuid.avx && cpuid.f16c) { const int numIter = 1000000; Rand48 rand48(0); half h; SimdAlignedBuffer64f dstF16c; cout << " fromHalfZigZag_f16c()" << endl; for (int iter=0; iter 1e-5 ) { cout << "At index " << i << ": "; cout << "expecting " << dst._buffer[i] << "; got " << dstF16c._buffer[i] << endl; assert(false); } } } // iter } // f16c } } // namespace void testDwaCompressorSimd (const string&) { cout << "SIMD helper functions for DwaCompressor:" << endl; try { testCsc(); testInterleave(); testFloatToHalf(); testFromHalfZigZag(); testDct(); } catch (const exception &e) { cout << "unexpected exception: " << e.what() << endl; assert (false); } catch (...) { cout << "unexpected exception" << endl; assert (false); } cout << "ok\n" << endl; }