Blame IlmImf/ImfOptimizedPixelReading.h

Packit Service 6754ca
///////////////////////////////////////////////////////////////////////////
Packit Service 6754ca
//
Packit Service 6754ca
// Copyright (c) 2012, Autodesk, Inc.
Packit Service 6754ca
// 
Packit Service 6754ca
// All rights reserved.
Packit Service 6754ca
//
Packit Service 6754ca
// Implementation of IIF-specific file format and speed optimizations 
Packit Service 6754ca
// provided by Innobec Technologies inc on behalf of Autodesk.
Packit Service 6754ca
// 
Packit Service 6754ca
// Redistribution and use in source and binary forms, with or without
Packit Service 6754ca
// modification, are permitted provided that the following conditions are
Packit Service 6754ca
// met:
Packit Service 6754ca
// *       Redistributions of source code must retain the above copyright
Packit Service 6754ca
// notice, this list of conditions and the following disclaimer.
Packit Service 6754ca
// *       Redistributions in binary form must reproduce the above
Packit Service 6754ca
// copyright notice, this list of conditions and the following disclaimer
Packit Service 6754ca
// in the documentation and/or other materials provided with the
Packit Service 6754ca
// distribution.
Packit Service 6754ca
// *       Neither the name of Industrial Light & Magic nor the names of
Packit Service 6754ca
// its contributors may be used to endorse or promote products derived
Packit Service 6754ca
// from this software without specific prior written permission. 
Packit Service 6754ca
// 
Packit Service 6754ca
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
Packit Service 6754ca
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
Packit Service 6754ca
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
Packit Service 6754ca
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
Packit Service 6754ca
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
Packit Service 6754ca
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
Packit Service 6754ca
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
Packit Service 6754ca
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
Packit Service 6754ca
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
Packit Service 6754ca
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
Packit Service 6754ca
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Packit Service 6754ca
//
Packit Service 6754ca
///////////////////////////////////////////////////////////////////////////
Packit Service 6754ca
Packit Service 6754ca
#pragma once
Packit Service 6754ca
Packit Service 6754ca
#ifndef INCLUDED_IMF_OPTIMIZED_PIXEL_READING_H
Packit Service 6754ca
#define INCLUDED_IMF_OPTIMIZED_PIXEL_READING_H
Packit Service 6754ca
Packit Service 6754ca
#include "ImfSimd.h"
Packit Service 6754ca
#include "ImfSystemSpecific.h"
Packit Service 6754ca
#include <iostream>
Packit Service 6754ca
#include "ImfChannelList.h"
Packit Service 6754ca
#include "ImfFrameBuffer.h"
Packit Service 6754ca
#include "ImfStringVectorAttribute.h"
Packit Service 6754ca
Packit Service 6754ca
OPENEXR_IMF_INTERNAL_NAMESPACE_HEADER_ENTER
Packit Service 6754ca
Packit Service 6754ca
class OptimizationMode
Packit Service 6754ca
{
Packit Service 6754ca
public:
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
    bool _optimizable;
Packit Service 6754ca
    int _ySampling;
Packit Service 6754ca
    OptimizationMode() : _optimizable(false) {}
Packit Service 6754ca
    
Packit Service 6754ca
};
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
#if IMF_HAVE_SSE2
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
//------------------------------------------------------------------------
Packit Service 6754ca
// Test for SSE pointer alignemnt
Packit Service 6754ca
//------------------------------------------------------------------------
Packit Service 6754ca
EXR_FORCEINLINE
Packit Service 6754ca
bool
Packit Service 6754ca
isPointerSSEAligned (const void* EXR_RESTRICT pPointer)
Packit Service 6754ca
{
Packit Service 6754ca
    unsigned long trailingBits = ((unsigned long)pPointer) & 15;
Packit Service 6754ca
    return trailingBits == 0;
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
//------------------------------------------------------------------------
Packit Service 6754ca
// Load SSE from address into register
Packit Service 6754ca
//------------------------------------------------------------------------
Packit Service 6754ca
template<bool IS_ALIGNED>
Packit Service 6754ca
EXR_FORCEINLINE
Packit Service 6754ca
__m128i loadSSE (__m128i*& loadAddress)
Packit Service 6754ca
{
Packit Service 6754ca
    // throw exception :: this is not accepted
Packit Service 6754ca
    return _mm_loadu_si128 (loadAddress);
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
template<>
Packit Service 6754ca
EXR_FORCEINLINE
Packit Service 6754ca
__m128i loadSSE<false> (__m128i*& loadAddress)
Packit Service 6754ca
{
Packit Service 6754ca
    return _mm_loadu_si128 (loadAddress);
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
template<>
Packit Service 6754ca
EXR_FORCEINLINE
Packit Service 6754ca
__m128i loadSSE<true> (__m128i*& loadAddress)
Packit Service 6754ca
{
Packit Service 6754ca
    return _mm_load_si128 (loadAddress);
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
//------------------------------------------------------------------------
Packit Service 6754ca
// Store SSE from register into address
Packit Service 6754ca
//------------------------------------------------------------------------
Packit Service 6754ca
template<bool IS_ALIGNED>
Packit Service 6754ca
EXR_FORCEINLINE
Packit Service 6754ca
void storeSSE (__m128i*& storeAddress, __m128i& dataToStore)
Packit Service 6754ca
{
Packit Service 6754ca
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
template<>
Packit Service 6754ca
EXR_FORCEINLINE
Packit Service 6754ca
void
Packit Service 6754ca
storeSSE<false> (__m128i*& storeAddress, __m128i& dataToStore)
Packit Service 6754ca
{
Packit Service 6754ca
    _mm_storeu_si128 (storeAddress, dataToStore);
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
template<>
Packit Service 6754ca
EXR_FORCEINLINE
Packit Service 6754ca
void
Packit Service 6754ca
storeSSE<true> (__m128i*& storeAddress, __m128i& dataToStore)
Packit Service 6754ca
{
Packit Service 6754ca
    _mm_stream_si128 (storeAddress, dataToStore);
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
//------------------------------------------------------------------------
Packit Service 6754ca
//
Packit Service 6754ca
// Write to RGBA
Packit Service 6754ca
//
Packit Service 6754ca
//------------------------------------------------------------------------
Packit Service 6754ca
Packit Service 6754ca
//
Packit Service 6754ca
// Using SSE intrinsics
Packit Service 6754ca
//
Packit Service 6754ca
template<bool READ_PTR_ALIGNED, bool WRITE_PTR_ALIGNED>
Packit Service 6754ca
EXR_FORCEINLINE 
Packit Service 6754ca
void writeToRGBASSETemplate 
Packit Service 6754ca
    (__m128i*& readPtrSSERed,
Packit Service 6754ca
     __m128i*& readPtrSSEGreen,
Packit Service 6754ca
     __m128i*& readPtrSSEBlue,
Packit Service 6754ca
     __m128i*& readPtrSSEAlpha,
Packit Service 6754ca
     __m128i*& writePtrSSE,
Packit Service 6754ca
     const size_t& lPixelsToCopySSE)
Packit Service 6754ca
{
Packit Service 6754ca
    for (size_t i = 0; i < lPixelsToCopySSE; ++i)
Packit Service 6754ca
    {
Packit Service 6754ca
        __m128i redRegister   = loadSSE<READ_PTR_ALIGNED> (readPtrSSERed);
Packit Service 6754ca
        __m128i greenRegister = loadSSE<READ_PTR_ALIGNED> (readPtrSSEGreen);
Packit Service 6754ca
        __m128i blueRegister  = loadSSE<READ_PTR_ALIGNED> (readPtrSSEBlue);
Packit Service 6754ca
        __m128i alphaRegister = loadSSE<READ_PTR_ALIGNED> (readPtrSSEAlpha);
Packit Service 6754ca
Packit Service 6754ca
        __m128i redGreenRegister  = _mm_unpacklo_epi16 (redRegister,
Packit Service 6754ca
                                                        greenRegister);
Packit Service 6754ca
        __m128i blueAlphaRegister = _mm_unpacklo_epi16 (blueRegister,
Packit Service 6754ca
                                                        alphaRegister);
Packit Service 6754ca
Packit Service 6754ca
        __m128i pixel12Register   = _mm_unpacklo_epi32 (redGreenRegister,
Packit Service 6754ca
                                                        blueAlphaRegister);
Packit Service 6754ca
        __m128i pixel34Register   = _mm_unpackhi_epi32 (redGreenRegister,
Packit Service 6754ca
                                                        blueAlphaRegister);
Packit Service 6754ca
Packit Service 6754ca
        storeSSE<WRITE_PTR_ALIGNED> (writePtrSSE, pixel12Register);
Packit Service 6754ca
        ++writePtrSSE;
Packit Service 6754ca
Packit Service 6754ca
        storeSSE<WRITE_PTR_ALIGNED> (writePtrSSE, pixel34Register);
Packit Service 6754ca
        ++writePtrSSE;
Packit Service 6754ca
Packit Service 6754ca
        redGreenRegister  = _mm_unpackhi_epi16 (redRegister, greenRegister);
Packit Service 6754ca
        blueAlphaRegister = _mm_unpackhi_epi16 (blueRegister, alphaRegister);
Packit Service 6754ca
Packit Service 6754ca
        pixel12Register   = _mm_unpacklo_epi32 (redGreenRegister,
Packit Service 6754ca
                                                blueAlphaRegister);
Packit Service 6754ca
        pixel34Register   = _mm_unpackhi_epi32 (redGreenRegister,
Packit Service 6754ca
                                                blueAlphaRegister);
Packit Service 6754ca
Packit Service 6754ca
        storeSSE<WRITE_PTR_ALIGNED> (writePtrSSE, pixel12Register);
Packit Service 6754ca
        ++writePtrSSE;
Packit Service 6754ca
        
Packit Service 6754ca
        storeSSE<WRITE_PTR_ALIGNED> (writePtrSSE, pixel34Register);
Packit Service 6754ca
        ++writePtrSSE;
Packit Service 6754ca
Packit Service 6754ca
        ++readPtrSSEAlpha;
Packit Service 6754ca
        ++readPtrSSEBlue;
Packit Service 6754ca
        ++readPtrSSEGreen;
Packit Service 6754ca
        ++readPtrSSERed;
Packit Service 6754ca
    }
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
//
Packit Service 6754ca
// Not using SSE intrinsics.  This is still faster than the alternative
Packit Service 6754ca
// because we have multiple read pointers and therefore we are able to
Packit Service 6754ca
// take advantage of data locality for write operations.
Packit Service 6754ca
//
Packit Service 6754ca
EXR_FORCEINLINE 
Packit Service 6754ca
void writeToRGBANormal (unsigned short*& readPtrRed,
Packit Service 6754ca
                        unsigned short*& readPtrGreen,
Packit Service 6754ca
                        unsigned short*& readPtrBlue,
Packit Service 6754ca
                        unsigned short*& readPtrAlpha,
Packit Service 6754ca
                        unsigned short*& writePtr,
Packit Service 6754ca
                        const size_t& lPixelsToCopy)
Packit Service 6754ca
{
Packit Service 6754ca
    for (size_t i = 0; i < lPixelsToCopy; ++i)
Packit Service 6754ca
    {
Packit Service 6754ca
        *(writePtr++) = *(readPtrRed++);
Packit Service 6754ca
        *(writePtr++) = *(readPtrGreen++);
Packit Service 6754ca
        *(writePtr++) = *(readPtrBlue++);
Packit Service 6754ca
        *(writePtr++) = *(readPtrAlpha++);
Packit Service 6754ca
    }
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
//
Packit Service 6754ca
// Determine which (template) version to use by checking whether pointers
Packit Service 6754ca
// are aligned
Packit Service 6754ca
//
Packit Service 6754ca
EXR_FORCEINLINE 
Packit Service 6754ca
void optimizedWriteToRGBA (unsigned short*& readPtrRed,
Packit Service 6754ca
                           unsigned short*& readPtrGreen,
Packit Service 6754ca
                           unsigned short*& readPtrBlue,
Packit Service 6754ca
                           unsigned short*& readPtrAlpha,
Packit Service 6754ca
                           unsigned short*& writePtr,
Packit Service 6754ca
                           const size_t& pixelsToCopySSE,
Packit Service 6754ca
                           const size_t& pixelsToCopyNormal)
Packit Service 6754ca
{
Packit Service 6754ca
    bool readPtrAreAligned = true;
Packit Service 6754ca
Packit Service 6754ca
    readPtrAreAligned &= isPointerSSEAligned(readPtrRed);
Packit Service 6754ca
    readPtrAreAligned &= isPointerSSEAligned(readPtrGreen);
Packit Service 6754ca
    readPtrAreAligned &= isPointerSSEAligned(readPtrBlue);
Packit Service 6754ca
    readPtrAreAligned &= isPointerSSEAligned(readPtrAlpha);
Packit Service 6754ca
Packit Service 6754ca
    bool writePtrIsAligned = isPointerSSEAligned(writePtr);
Packit Service 6754ca
Packit Service 6754ca
    if (!readPtrAreAligned && !writePtrIsAligned)
Packit Service 6754ca
    {
Packit Service 6754ca
        writeToRGBASSETemplate<false, false> ((__m128i*&)readPtrRed,
Packit Service 6754ca
                                              (__m128i*&)readPtrGreen,
Packit Service 6754ca
                                              (__m128i*&)readPtrBlue,
Packit Service 6754ca
                                              (__m128i*&)readPtrAlpha,
Packit Service 6754ca
                                              (__m128i*&)writePtr,
Packit Service 6754ca
                                              pixelsToCopySSE);
Packit Service 6754ca
    }
Packit Service 6754ca
    else if (!readPtrAreAligned && writePtrIsAligned)
Packit Service 6754ca
    {
Packit Service 6754ca
        writeToRGBASSETemplate<false, true> ((__m128i*&)readPtrRed,
Packit Service 6754ca
                                             (__m128i*&)readPtrGreen,
Packit Service 6754ca
                                             (__m128i*&)readPtrBlue,
Packit Service 6754ca
                                             (__m128i*&)readPtrAlpha,
Packit Service 6754ca
                                             (__m128i*&)writePtr,
Packit Service 6754ca
                                             pixelsToCopySSE);
Packit Service 6754ca
    }
Packit Service 6754ca
    else if (readPtrAreAligned && !writePtrIsAligned)
Packit Service 6754ca
    {
Packit Service 6754ca
        writeToRGBASSETemplate<true, false> ((__m128i*&)readPtrRed,
Packit Service 6754ca
                                             (__m128i*&)readPtrGreen,
Packit Service 6754ca
                                             (__m128i*&)readPtrBlue,
Packit Service 6754ca
                                             (__m128i*&)readPtrAlpha,
Packit Service 6754ca
                                             (__m128i*&)writePtr,
Packit Service 6754ca
                                             pixelsToCopySSE);
Packit Service 6754ca
    }
Packit Service 6754ca
    else if(readPtrAreAligned && writePtrIsAligned)
Packit Service 6754ca
    {
Packit Service 6754ca
        writeToRGBASSETemplate<true, true> ((__m128i*&)readPtrRed,
Packit Service 6754ca
                                            (__m128i*&)readPtrGreen,
Packit Service 6754ca
                                            (__m128i*&)readPtrBlue,
Packit Service 6754ca
                                            (__m128i*&)readPtrAlpha,
Packit Service 6754ca
                                            (__m128i*&)writePtr,
Packit Service 6754ca
                                            pixelsToCopySSE);
Packit Service 6754ca
    }
Packit Service 6754ca
Packit Service 6754ca
    writeToRGBANormal (readPtrRed, readPtrGreen, readPtrBlue, readPtrAlpha,
Packit Service 6754ca
                       writePtr, pixelsToCopyNormal);
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
//------------------------------------------------------------------------
Packit Service 6754ca
//
Packit Service 6754ca
// Write to RGBA Fill A
Packit Service 6754ca
//
Packit Service 6754ca
//------------------------------------------------------------------------
Packit Service 6754ca
Packit Service 6754ca
//
Packit Service 6754ca
// Using SSE intrinsics
Packit Service 6754ca
//
Packit Service 6754ca
template<bool READ_PTR_ALIGNED, bool WRITE_PTR_ALIGNED>
Packit Service 6754ca
EXR_FORCEINLINE 
Packit Service 6754ca
void
Packit Service 6754ca
writeToRGBAFillASSETemplate (__m128i*& readPtrSSERed,
Packit Service 6754ca
                             __m128i*& readPtrSSEGreen,
Packit Service 6754ca
                             __m128i*& readPtrSSEBlue,
Packit Service 6754ca
                             const unsigned short& alphaFillValue,
Packit Service 6754ca
                             __m128i*& writePtrSSE,
Packit Service 6754ca
                             const size_t& pixelsToCopySSE)
Packit Service 6754ca
{
Packit Service 6754ca
    const __m128i dummyAlphaRegister = _mm_set_epi16 (alphaFillValue,
Packit Service 6754ca
                                                      alphaFillValue,
Packit Service 6754ca
                                                      alphaFillValue,
Packit Service 6754ca
                                                      alphaFillValue,
Packit Service 6754ca
                                                      alphaFillValue,
Packit Service 6754ca
                                                      alphaFillValue,
Packit Service 6754ca
                                                      alphaFillValue,
Packit Service 6754ca
                                                      alphaFillValue);
Packit Service 6754ca
Packit Service 6754ca
    for (size_t pixelCounter = 0; pixelCounter < pixelsToCopySSE; ++pixelCounter)
Packit Service 6754ca
    {
Packit Service 6754ca
        __m128i redRegister   = loadSSE<READ_PTR_ALIGNED> (readPtrSSERed);
Packit Service 6754ca
        __m128i greenRegister = loadSSE<READ_PTR_ALIGNED> (readPtrSSEGreen);
Packit Service 6754ca
        __m128i blueRegister  = loadSSE<READ_PTR_ALIGNED> (readPtrSSEBlue);
Packit Service 6754ca
Packit Service 6754ca
        __m128i redGreenRegister  = _mm_unpacklo_epi16 (redRegister,
Packit Service 6754ca
                                                        greenRegister);
Packit Service 6754ca
        __m128i blueAlphaRegister = _mm_unpacklo_epi16 (blueRegister,
Packit Service 6754ca
                                                        dummyAlphaRegister);
Packit Service 6754ca
Packit Service 6754ca
        __m128i pixel12Register   = _mm_unpacklo_epi32 (redGreenRegister,
Packit Service 6754ca
                                                        blueAlphaRegister);
Packit Service 6754ca
        __m128i pixel34Register   = _mm_unpackhi_epi32 (redGreenRegister,
Packit Service 6754ca
                                                        blueAlphaRegister);
Packit Service 6754ca
Packit Service 6754ca
        storeSSE<WRITE_PTR_ALIGNED> (writePtrSSE, pixel12Register);
Packit Service 6754ca
        ++writePtrSSE;
Packit Service 6754ca
Packit Service 6754ca
        storeSSE<WRITE_PTR_ALIGNED> (writePtrSSE, pixel34Register);
Packit Service 6754ca
        ++writePtrSSE;
Packit Service 6754ca
Packit Service 6754ca
        redGreenRegister  = _mm_unpackhi_epi16 (redRegister,
Packit Service 6754ca
                                                greenRegister);
Packit Service 6754ca
        blueAlphaRegister = _mm_unpackhi_epi16 (blueRegister,
Packit Service 6754ca
                                                dummyAlphaRegister);
Packit Service 6754ca
Packit Service 6754ca
        pixel12Register   = _mm_unpacklo_epi32 (redGreenRegister,
Packit Service 6754ca
                                                blueAlphaRegister);
Packit Service 6754ca
        pixel34Register   = _mm_unpackhi_epi32 (redGreenRegister,
Packit Service 6754ca
                                                blueAlphaRegister);
Packit Service 6754ca
Packit Service 6754ca
        storeSSE<WRITE_PTR_ALIGNED> (writePtrSSE, pixel12Register);
Packit Service 6754ca
        ++writePtrSSE;
Packit Service 6754ca
Packit Service 6754ca
        storeSSE<WRITE_PTR_ALIGNED> (writePtrSSE, pixel34Register);
Packit Service 6754ca
        ++writePtrSSE;
Packit Service 6754ca
Packit Service 6754ca
        ++readPtrSSEBlue;
Packit Service 6754ca
        ++readPtrSSEGreen;
Packit Service 6754ca
        ++readPtrSSERed;
Packit Service 6754ca
    }
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
//
Packit Service 6754ca
// Not using SSE intrinsics.  This is still faster than the alternative
Packit Service 6754ca
// because we have multiple read pointers and therefore we are able to
Packit Service 6754ca
// take advantage of data locality for write operations.
Packit Service 6754ca
//
Packit Service 6754ca
EXR_FORCEINLINE
Packit Service 6754ca
void
Packit Service 6754ca
writeToRGBAFillANormal (unsigned short*& readPtrRed,
Packit Service 6754ca
                        unsigned short*& readPtrGreen,
Packit Service 6754ca
                        unsigned short*& readPtrBlue,
Packit Service 6754ca
                        const unsigned short& alphaFillValue,
Packit Service 6754ca
                        unsigned short*& writePtr,
Packit Service 6754ca
                        const size_t& pixelsToCopy)
Packit Service 6754ca
{
Packit Service 6754ca
    for (size_t i = 0; i < pixelsToCopy; ++i)
Packit Service 6754ca
    {
Packit Service 6754ca
        *(writePtr++) = *(readPtrRed++);
Packit Service 6754ca
        *(writePtr++) = *(readPtrGreen++);
Packit Service 6754ca
        *(writePtr++) = *(readPtrBlue++);
Packit Service 6754ca
        *(writePtr++) = alphaFillValue;
Packit Service 6754ca
    }
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
//
Packit Service 6754ca
// Determine which (template) version to use by checking whether pointers
Packit Service 6754ca
// are aligned.
Packit Service 6754ca
//
Packit Service 6754ca
EXR_FORCEINLINE 
Packit Service 6754ca
void
Packit Service 6754ca
optimizedWriteToRGBAFillA (unsigned short*& readPtrRed,
Packit Service 6754ca
                           unsigned short*& readPtrGreen,
Packit Service 6754ca
                           unsigned short*& readPtrBlue,
Packit Service 6754ca
                           const unsigned short& alphaFillValue,
Packit Service 6754ca
                           unsigned short*& writePtr,
Packit Service 6754ca
                           const size_t& pixelsToCopySSE,
Packit Service 6754ca
                           const size_t& pixelsToCopyNormal)
Packit Service 6754ca
{
Packit Service 6754ca
    bool readPtrAreAligned = true;
Packit Service 6754ca
Packit Service 6754ca
    readPtrAreAligned &= isPointerSSEAligned (readPtrRed);
Packit Service 6754ca
    readPtrAreAligned &= isPointerSSEAligned (readPtrGreen);
Packit Service 6754ca
    readPtrAreAligned &= isPointerSSEAligned (readPtrBlue);
Packit Service 6754ca
Packit Service 6754ca
    bool writePtrIsAligned = isPointerSSEAligned (writePtr);
Packit Service 6754ca
Packit Service 6754ca
    if (!readPtrAreAligned && !writePtrIsAligned)
Packit Service 6754ca
    {
Packit Service 6754ca
        writeToRGBAFillASSETemplate<false, false> ((__m128i*&)readPtrRed,
Packit Service 6754ca
                                                   (__m128i*&)readPtrGreen,
Packit Service 6754ca
                                                   (__m128i*&)readPtrBlue,
Packit Service 6754ca
                                                   alphaFillValue,
Packit Service 6754ca
                                                   (__m128i*&)writePtr,
Packit Service 6754ca
                                                   pixelsToCopySSE);
Packit Service 6754ca
    }
Packit Service 6754ca
    else if (!readPtrAreAligned && writePtrIsAligned)
Packit Service 6754ca
    {
Packit Service 6754ca
        writeToRGBAFillASSETemplate<false, true> ((__m128i*&)readPtrRed,
Packit Service 6754ca
                                                  (__m128i*&)readPtrGreen,
Packit Service 6754ca
                                                  (__m128i*&)readPtrBlue,
Packit Service 6754ca
                                                  alphaFillValue,
Packit Service 6754ca
                                                  (__m128i*&)writePtr,
Packit Service 6754ca
                                                  pixelsToCopySSE);
Packit Service 6754ca
    }
Packit Service 6754ca
    else if (readPtrAreAligned && !writePtrIsAligned)
Packit Service 6754ca
    {
Packit Service 6754ca
        writeToRGBAFillASSETemplate<true, false> ((__m128i*&)readPtrRed,
Packit Service 6754ca
                                                  (__m128i*&)readPtrGreen,
Packit Service 6754ca
                                                  (__m128i*&)readPtrBlue,
Packit Service 6754ca
                                                  alphaFillValue,
Packit Service 6754ca
                                                  (__m128i*&)writePtr,
Packit Service 6754ca
                                                  pixelsToCopySSE);
Packit Service 6754ca
    }
Packit Service 6754ca
    else if (readPtrAreAligned && writePtrIsAligned)
Packit Service 6754ca
    {
Packit Service 6754ca
        writeToRGBAFillASSETemplate<true, true> ((__m128i*&)readPtrRed,
Packit Service 6754ca
                                                 (__m128i*&)readPtrGreen,
Packit Service 6754ca
                                                 (__m128i*&)readPtrBlue,
Packit Service 6754ca
                                                 alphaFillValue,
Packit Service 6754ca
                                                 (__m128i*&)writePtr,
Packit Service 6754ca
                                                 pixelsToCopySSE);
Packit Service 6754ca
    }
Packit Service 6754ca
Packit Service 6754ca
    writeToRGBAFillANormal (readPtrRed,
Packit Service 6754ca
                            readPtrGreen, readPtrBlue, alphaFillValue,
Packit Service 6754ca
                            writePtr, pixelsToCopyNormal);
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
//------------------------------------------------------------------------
Packit Service 6754ca
//
Packit Service 6754ca
// Write to RGB
Packit Service 6754ca
//
Packit Service 6754ca
//------------------------------------------------------------------------
Packit Service 6754ca
Packit Service 6754ca
//
Packit Service 6754ca
// Using SSE intrinsics
Packit Service 6754ca
//
Packit Service 6754ca
template<bool READ_PTR_ALIGNED, bool WRITE_PTR_ALIGNED>
Packit Service 6754ca
EXR_FORCEINLINE 
Packit Service 6754ca
void
Packit Service 6754ca
writeToRGBSSETemplate (__m128i*& readPtrSSERed,
Packit Service 6754ca
                       __m128i*& readPtrSSEGreen,
Packit Service 6754ca
                       __m128i*& readPtrSSEBlue,
Packit Service 6754ca
                       __m128i*& writePtrSSE,
Packit Service 6754ca
                       const size_t& pixelsToCopySSE)
Packit Service 6754ca
{
Packit Service 6754ca
Packit Service 6754ca
    for (size_t pixelCounter = 0; pixelCounter < pixelsToCopySSE; ++pixelCounter)
Packit Service 6754ca
    {
Packit Service 6754ca
        //
Packit Service 6754ca
        // Need to shuffle and unpack pointers to obtain my first register
Packit Service 6754ca
        // We must save 8 pixels at a time, so we must have the following three registers at the end:
Packit Service 6754ca
        // 1) R1 G1 B1 R2 G2 B2 R3 G3
Packit Service 6754ca
        // 2) B3 R4 G4 B4 R5 G5 B5 R6
Packit Service 6754ca
        // 3) G6 B6 R7 G7 B7 R8 G8 B8
Packit Service 6754ca
        //
Packit Service 6754ca
        __m128i redRegister = loadSSE<READ_PTR_ALIGNED> (readPtrSSERed);
Packit Service 6754ca
        __m128i greenRegister = loadSSE<READ_PTR_ALIGNED> (readPtrSSEGreen);
Packit Service 6754ca
        __m128i blueRegister = loadSSE<READ_PTR_ALIGNED> (readPtrSSEBlue);
Packit Service 6754ca
Packit Service 6754ca
        //
Packit Service 6754ca
        // First register: R1 G1 B1 R2 G2 B2 R3 G3
Packit Service 6754ca
        // Construct 2 registers and then unpack them to obtain our final result:
Packit Service 6754ca
        //
Packit Service 6754ca
        __m128i redGreenRegister  = _mm_unpacklo_epi16 (redRegister,
Packit Service 6754ca
                                                        greenRegister);
Packit Service 6754ca
        __m128i redBlueRegister   = _mm_unpacklo_epi16 (redRegister,
Packit Service 6754ca
                                                        blueRegister);
Packit Service 6754ca
        __m128i greenBlueRegister = _mm_unpacklo_epi16 (greenRegister,
Packit Service 6754ca
                                                        blueRegister);
Packit Service 6754ca
Packit Service 6754ca
        // Left Part (R1 G1 B1 R2)
Packit Service 6754ca
        __m128i quarterRight = _mm_shufflelo_epi16 (redBlueRegister,
Packit Service 6754ca
                                                    _MM_SHUFFLE(3,0,2,1));
Packit Service 6754ca
        __m128i halfLeft     = _mm_unpacklo_epi32 (redGreenRegister,
Packit Service 6754ca
                                                   quarterRight);
Packit Service 6754ca
Packit Service 6754ca
        // Right Part (G2 B2 R3 G3)
Packit Service 6754ca
        __m128i quarterLeft  = _mm_shuffle_epi32 (greenBlueRegister,
Packit Service 6754ca
                                                 _MM_SHUFFLE(3,2,0,1));
Packit Service 6754ca
        quarterRight         = _mm_shuffle_epi32 (redGreenRegister,
Packit Service 6754ca
                                                 _MM_SHUFFLE(3,0,1,2));
Packit Service 6754ca
        __m128i halfRight    = _mm_unpacklo_epi32 (quarterLeft, quarterRight);
Packit Service 6754ca
Packit Service 6754ca
        __m128i fullRegister = _mm_unpacklo_epi64 (halfLeft, halfRight);
Packit Service 6754ca
        storeSSE<WRITE_PTR_ALIGNED> (writePtrSSE, fullRegister);
Packit Service 6754ca
        ++writePtrSSE;
Packit Service 6754ca
Packit Service 6754ca
        //
Packit Service 6754ca
        // Second register: B3 R4 G4 B4 R5 G5 B5 R6
Packit Service 6754ca
        //
Packit Service 6754ca
Packit Service 6754ca
        // Left Part (B3, R4, G4, B4)
Packit Service 6754ca
        quarterLeft  = _mm_shufflehi_epi16 (redBlueRegister,
Packit Service 6754ca
                                            _MM_SHUFFLE(0, 3, 2, 1));
Packit Service 6754ca
        quarterRight = _mm_shufflehi_epi16 (greenBlueRegister,
Packit Service 6754ca
                                            _MM_SHUFFLE(1, 0, 3, 2));
Packit Service 6754ca
        halfLeft     = _mm_unpackhi_epi32 (quarterLeft, quarterRight);
Packit Service 6754ca
Packit Service 6754ca
        // Update the registers
Packit Service 6754ca
        redGreenRegister  = _mm_unpackhi_epi16 (redRegister, greenRegister);
Packit Service 6754ca
        redBlueRegister   = _mm_unpackhi_epi16 (redRegister, blueRegister);
Packit Service 6754ca
        greenBlueRegister = _mm_unpackhi_epi16 (greenRegister, blueRegister);
Packit Service 6754ca
Packit Service 6754ca
        // Right Part (R5 G5 B5 R6)
Packit Service 6754ca
        quarterRight = _mm_shufflelo_epi16 (redBlueRegister,
Packit Service 6754ca
                                            _MM_SHUFFLE(3,0,2,1));
Packit Service 6754ca
        halfRight    = _mm_unpacklo_epi32 (redGreenRegister, quarterRight);
Packit Service 6754ca
Packit Service 6754ca
        fullRegister = _mm_unpacklo_epi64 (halfLeft, halfRight);
Packit Service 6754ca
        storeSSE<WRITE_PTR_ALIGNED> (writePtrSSE, fullRegister);
Packit Service 6754ca
        ++writePtrSSE;
Packit Service 6754ca
Packit Service 6754ca
        //
Packit Service 6754ca
        // Third register: G6 B6 R7 G7 B7 R8 G8 B8
Packit Service 6754ca
        //
Packit Service 6754ca
Packit Service 6754ca
        // Left part (G6 B6 R7 G7)
Packit Service 6754ca
        quarterLeft  = _mm_shuffle_epi32 (greenBlueRegister,
Packit Service 6754ca
                                          _MM_SHUFFLE(3,2,0,1));
Packit Service 6754ca
        quarterRight = _mm_shuffle_epi32 (redGreenRegister,
Packit Service 6754ca
                                          _MM_SHUFFLE(3,0,1,2));
Packit Service 6754ca
        halfLeft     = _mm_unpacklo_epi32 (quarterLeft, quarterRight);
Packit Service 6754ca
Packit Service 6754ca
        // Right part (B7 R8 G8 B8)
Packit Service 6754ca
        quarterLeft  = _mm_shufflehi_epi16 (redBlueRegister,
Packit Service 6754ca
                                            _MM_SHUFFLE(0, 3, 2, 1));
Packit Service 6754ca
        quarterRight = _mm_shufflehi_epi16 (greenBlueRegister,
Packit Service 6754ca
                                            _MM_SHUFFLE(1, 0, 3, 2));
Packit Service 6754ca
        halfRight    = _mm_unpackhi_epi32 (quarterLeft, quarterRight);
Packit Service 6754ca
Packit Service 6754ca
        fullRegister = _mm_unpacklo_epi64 (halfLeft, halfRight);
Packit Service 6754ca
        storeSSE<WRITE_PTR_ALIGNED> (writePtrSSE, fullRegister);
Packit Service 6754ca
        ++writePtrSSE;
Packit Service 6754ca
Packit Service 6754ca
        //
Packit Service 6754ca
        // Increment read pointers
Packit Service 6754ca
        //
Packit Service 6754ca
        ++readPtrSSEBlue;
Packit Service 6754ca
        ++readPtrSSEGreen;
Packit Service 6754ca
        ++readPtrSSERed;
Packit Service 6754ca
    }
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
//
Packit Service 6754ca
// Not using SSE intrinsics.  This is still faster than the alternative
Packit Service 6754ca
// because we have multiple read pointers and therefore we are able to
Packit Service 6754ca
// take advantage of data locality for write operations.
Packit Service 6754ca
//
Packit Service 6754ca
EXR_FORCEINLINE 
Packit Service 6754ca
void
Packit Service 6754ca
writeToRGBNormal (unsigned short*& readPtrRed,
Packit Service 6754ca
                  unsigned short*& readPtrGreen,
Packit Service 6754ca
                  unsigned short*& readPtrBlue,
Packit Service 6754ca
                  unsigned short*& writePtr,
Packit Service 6754ca
                  const size_t& pixelsToCopy)
Packit Service 6754ca
{
Packit Service 6754ca
    for (size_t i = 0; i < pixelsToCopy; ++i)
Packit Service 6754ca
    {
Packit Service 6754ca
        *(writePtr++) = *(readPtrRed++);
Packit Service 6754ca
        *(writePtr++) = *(readPtrGreen++);
Packit Service 6754ca
        *(writePtr++) = *(readPtrBlue++);
Packit Service 6754ca
    }
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
//
Packit Service 6754ca
// Determine which (template) version to use by checking whether pointers
Packit Service 6754ca
// are aligned
Packit Service 6754ca
//
Packit Service 6754ca
EXR_FORCEINLINE 
Packit Service 6754ca
void optimizedWriteToRGB (unsigned short*& readPtrRed,
Packit Service 6754ca
                          unsigned short*& readPtrGreen,
Packit Service 6754ca
                          unsigned short*& readPtrBlue,
Packit Service 6754ca
                          unsigned short*& writePtr,
Packit Service 6754ca
                          const size_t& pixelsToCopySSE,
Packit Service 6754ca
                          const size_t& pixelsToCopyNormal)
Packit Service 6754ca
{
Packit Service 6754ca
    bool readPtrAreAligned = true;
Packit Service 6754ca
Packit Service 6754ca
    readPtrAreAligned &= isPointerSSEAligned(readPtrRed);
Packit Service 6754ca
    readPtrAreAligned &= isPointerSSEAligned(readPtrGreen);
Packit Service 6754ca
    readPtrAreAligned &= isPointerSSEAligned(readPtrBlue);
Packit Service 6754ca
Packit Service 6754ca
    bool writePtrIsAligned = isPointerSSEAligned(writePtr);
Packit Service 6754ca
Packit Service 6754ca
    if (!readPtrAreAligned && !writePtrIsAligned)
Packit Service 6754ca
    {
Packit Service 6754ca
        writeToRGBSSETemplate<false, false> ((__m128i*&)readPtrRed,
Packit Service 6754ca
                                             (__m128i*&)readPtrGreen,
Packit Service 6754ca
                                             (__m128i*&)readPtrBlue,
Packit Service 6754ca
                                             (__m128i*&)writePtr,
Packit Service 6754ca
                                             pixelsToCopySSE);
Packit Service 6754ca
    }
Packit Service 6754ca
    else if (!readPtrAreAligned && writePtrIsAligned)
Packit Service 6754ca
    {
Packit Service 6754ca
        writeToRGBSSETemplate<false, true> ((__m128i*&)readPtrRed,
Packit Service 6754ca
                                            (__m128i*&)readPtrGreen,
Packit Service 6754ca
                                            (__m128i*&)readPtrBlue,
Packit Service 6754ca
                                            (__m128i*&)writePtr,
Packit Service 6754ca
                                            pixelsToCopySSE);
Packit Service 6754ca
    }
Packit Service 6754ca
    else if (readPtrAreAligned && !writePtrIsAligned)
Packit Service 6754ca
    {
Packit Service 6754ca
        writeToRGBSSETemplate<true, false> ((__m128i*&)readPtrRed,
Packit Service 6754ca
                                            (__m128i*&)readPtrGreen,
Packit Service 6754ca
                                            (__m128i*&)readPtrBlue,
Packit Service 6754ca
                                            (__m128i*&)writePtr,
Packit Service 6754ca
                                            pixelsToCopySSE);
Packit Service 6754ca
    }
Packit Service 6754ca
    else if (readPtrAreAligned && writePtrIsAligned)
Packit Service 6754ca
    {
Packit Service 6754ca
        writeToRGBSSETemplate<true, true> ((__m128i*&)readPtrRed,
Packit Service 6754ca
                                           (__m128i*&)readPtrGreen,
Packit Service 6754ca
                                           (__m128i*&)readPtrBlue,
Packit Service 6754ca
                                           (__m128i*&)writePtr,
Packit Service 6754ca
                                           pixelsToCopySSE);
Packit Service 6754ca
    }
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
    writeToRGBNormal (readPtrRed, readPtrGreen, readPtrBlue,
Packit Service 6754ca
                      writePtr, pixelsToCopyNormal);
Packit Service 6754ca
}
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
#else // ! defined IMF_HAVE_SSE2
Packit Service 6754ca
Packit Service 6754ca
#endif // defined IMF_HAVE_SSE2
Packit Service 6754ca
Packit Service 6754ca
Packit Service 6754ca
OPENEXR_IMF_INTERNAL_NAMESPACE_HEADER_EXIT
Packit Service 6754ca
Packit Service 6754ca
#endif