Blame gst/deinterlace/tvtime/tomsmocomp/SearchLoopBottom.inc

Packit 1f69a5
// -*- c++ -*-       
Packit 1f69a5
Packit 1f69a5
// Version for non-SSE2
Packit 1f69a5
Packit 1f69a5
#ifndef IS_C
Packit 1f69a5
Packit 1f69a5
#ifdef SKIP_SEARCH
Packit 1f69a5
            "movq    %%mm6, %%mm0\n\t"            // just use the results of our wierd bob
Packit 1f69a5
#else
Packit 1f69a5
Packit 1f69a5
Packit 1f69a5
            // JA 9/Dec/2002
Packit 1f69a5
            // failed experiment
Packit 1f69a5
            // but leave in placeholder for me to play about
Packit 1f69a5
#ifdef DONT_USE_STRANGE_BOB
Packit 1f69a5
            // Use the best weave if diffs less than 10 as that
Packit 1f69a5
            // means the image is still or moving cleanly
Packit 1f69a5
            // if there is motion we will clip which will catch anything
Packit 1f69a5
            "psubusb "_FOURS", %%mm7\n\t"          // sets bits to zero if weave diff < 4
Packit 1f69a5
            "pxor    %%mm0, %%mm0\n\t"
Packit 1f69a5
            "pcmpeqb %%mm0, %%mm7\n\t"            // all ff where weave better, else 00
Packit 1f69a5
            "pcmpeqb %%mm7, %%mm0\n\t"            // all ff where bob better, else 00
Packit 1f69a5
            "pand    %%mm6, %%mm0\n\t"            // use bob for these pixel values
Packit 1f69a5
            "pand    %%mm5, %%mm7\n\t"            // use weave for these
Packit 1f69a5
            "por     %%mm7, %%mm0\n\t"            // combine both
Packit 1f69a5
#else
Packit 1f69a5
            // Use the better of bob or weave
Packit 1f69a5
            //      pminub  mm4, TENS           // the most we care about
Packit 1f69a5
            V_PMINUB ("%%mm4", _TENS, "%%mm0")   // the most we care about
Packit 1f69a5
            
Packit 1f69a5
            "psubusb %%mm4, %%mm7\n\t"            // foregive that much from weave est?
Packit 1f69a5
            "psubusb "_FOURS", %%mm7\n\t"       // bias it a bit toward weave
Packit 1f69a5
            "pxor    %%mm0, %%mm0\n\t"
Packit 1f69a5
            "pcmpeqb %%mm0, %%mm7\n\t"            // all ff where weave better, else 00
Packit 1f69a5
            "pcmpeqb %%mm7, %%mm0\n\t"            // all ff where bob better, else 00
Packit 1f69a5
            "pand    %%mm6, %%mm0\n\t"            // use bob for these pixel values
Packit 1f69a5
            "pand    %%mm5, %%mm7\n\t"            // use weave for these
Packit 1f69a5
            "por     %%mm7, %%mm0\n\t"            // combine both
Packit 1f69a5
#endif
Packit 1f69a5
            
Packit 1f69a5
            
Packit 1f69a5
                //      pminub  mm0, Max_Vals       // but clip to catch the stray error
Packit 1f69a5
                V_PMINUB ("%%mm0", _Max_Vals, "%%mm1") // but clip to catch the stray error
Packit 1f69a5
                //      pmaxub  mm0, Min_Vals
Packit 1f69a5
                V_PMAXUB ("%%mm0", _Min_Vals)
Packit 1f69a5
                
Packit 1f69a5
#endif
Packit 1f69a5
Packit 1f69a5
Packit 1f69a5
            MOVX"     "_pDest", %%"XAX"\n\t"
Packit 1f69a5
                
Packit 1f69a5
#ifdef USE_VERTICAL_FILTER
Packit 1f69a5
            "movq    %%mm0, %%mm1\n\t"
Packit 1f69a5
            //      pavgb   mm0, qword ptr["XBX"]
Packit 1f69a5
            V_PAVGB ("%%mm0", "(%%"XBX")", "%%mm2", _ShiftMask)
Packit 1f69a5
            //      movntq  qword ptr["XAX"+"XDX"], mm0
Packit 1f69a5
            V_MOVNTQ ("(%"XAX", %%"XDX")", "%%mm0")
Packit 1f69a5
            //      pavgb   mm1, qword ptr["XBX"+"XCX"]
Packit 1f69a5
            V_PAVGB ("%%mm1", "(%%"XBX", %%"XCX")", "%%mm2", _ShiftMask)
Packit 1f69a5
	    //FIXME: XDX or XAX!!
Packit 1f69a5
            "addq   "_dst_pitchw", %%"XBX
Packit 1f69a5
            //      movntq  qword ptr["XAX"+"XDX"], mm1
Packit 1f69a5
            V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm1")
Packit 1f69a5
#else
Packit 1f69a5
                
Packit 1f69a5
            //      movntq  qword ptr["XAX"+"XDX"], mm0
Packit 1f69a5
                V_MOVNTQ ("(%%"XAX", %%"XDX")", "%%mm0")
Packit 1f69a5
#endif
Packit 1f69a5
                
Packit 1f69a5
           LEAX"    8(%%"XDX"), %%"XDX"\n\t"       // bump offset pointer
Packit 1f69a5
           CMPX"    "_Last8", %%"XDX"\n\t"       // done with line?
Packit 1f69a5
           "jb      1b\n\t"                    // y
Packit 1f69a5
Packit 1f69a5
           MOVX" "_oldbx", %%"XBX"\n\t"
Packit 1f69a5
Packit 1f69a5
        : /* no outputs */
Packit 1f69a5
Packit 1f69a5
        : "m"(pBob),
Packit 1f69a5
          "m"(src_pitch2),
Packit 1f69a5
          "m"(ShiftMask),
Packit 1f69a5
          "m"(pDest),
Packit 1f69a5
          "m"(dst_pitchw),
Packit 1f69a5
          "m"(Last8),
Packit 1f69a5
          "m"(pSrc),
Packit 1f69a5
          "m"(pSrcP),
Packit 1f69a5
          "m"(pBobP),
Packit 1f69a5
          "m"(DiffThres),
Packit 1f69a5
          "m"(Min_Vals),
Packit 1f69a5
          "m"(Max_Vals),
Packit 1f69a5
          "m"(FOURS),
Packit 1f69a5
          "m"(TENS),
Packit 1f69a5
          "m"(ONES),
Packit 1f69a5
          "m"(UVMask),
Packit 1f69a5
          "m"(Max_Mov),
Packit 1f69a5
          "m"(YMask),
Packit 1f69a5
          "m"(oldbx)
Packit 1f69a5
Packit 1f69a5
        : XAX, XCX, XDX, XSI, XDI,
Packit 1f69a5
          "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)",
Packit 1f69a5
#ifdef __MMX__
Packit 1f69a5
          "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
Packit 1f69a5
#endif
Packit 1f69a5
          "memory", "cc"
Packit 1f69a5
        );
Packit 1f69a5
Packit 1f69a5
        // adjust for next line
Packit 1f69a5
        pSrc  += src_pitch2;
Packit 1f69a5
        pSrcP += src_pitch2;
Packit 1f69a5
        pDest += dst_pitch2;
Packit 1f69a5
        pBob  += src_pitch2;
Packit 1f69a5
        pBobP += src_pitch2;
Packit 1f69a5
    }
Packit 1f69a5
    
Packit 1f69a5
    return 0;
Packit 1f69a5
#else
Packit 1f69a5
#ifdef SKIP_SEARCH
Packit 1f69a5
            out[0] = best[0];            // just use the results of our wierd bob
Packit 1f69a5
	    out[1] = best[1];
Packit 1f69a5
#else
Packit 1f69a5
            diff[0] = diff[0] - MIN (diff[0], 10) - 4;
Packit 1f69a5
	    diff[1] = diff[1] - MIN (diff[1] - 10) - 4;
Packit 1f69a5
	    if (diff[0] < 0)
Packit 1f69a5
	      out[0] = weave[0];
Packit 1f69a5
	    else
Packit 1f69a5
	      out[0] = best[0];
Packit 1f69a5
	    
Packit 1f69a5
	    if (diff[1] < 0)
Packit 1f69a5
	      out[1] = weave[1];
Packit 1f69a5
	    else
Packit 1f69a5
	      out[1] = best[1];
Packit 1f69a5
Packit 1f69a5
Packit 1f69a5
	    out[0] = CLAMP (out[0], MinVals[0], MaxVals[0]);
Packit 1f69a5
	    out[1] = CLAMP (out[1], MinVals[1], MaxVals[1]);
Packit 1f69a5
#endif
Packit 1f69a5
Packit 1f69a5
#ifdef USE_VERTICAL_FILTER
Packit 1f69a5
            pDest[x] = (out[0] + pBob[0]) / 2;
Packit 1f69a5
	    pDest[x + dst_pitchw] = (pBob[src_pitch2] + out[0]) / 2;
Packit 1f69a5
            pDest[x + 1] = (out[1] + pBob[1]) / 2;
Packit 1f69a5
	    pDest[x + 1 + dst_pitchw] = (pBob[src_pitch2 + 1] + out[1]) / 2;
Packit 1f69a5
#else
Packit 1f69a5
            pDest[x] = out[0];
Packit 1f69a5
	    pDest[x+1] = out[1];
Packit 1f69a5
#endif
Packit 1f69a5
            pBob += 2;
Packit 1f69a5
            pBobP += 2;
Packit 1f69a5
            pSrc += 2;
Packit 1f69a5
            pSrcP += 2;
Packit 1f69a5
	}
Packit 1f69a5
        // adjust for next line
Packit 1f69a5
        pSrc  = src_pitch2 * (y+1) + pWeaveSrc;
Packit 1f69a5
        pSrcP = src_pitch2 * (y+1) + pWeaveSrcP;
Packit 1f69a5
        pDest = dst_pitch2 * (y+1) + pWeaveDest + dst_pitch2;
Packit 1f69a5
Packit 1f69a5
Packit 1f69a5
	if (TopFirst)
Packit 1f69a5
	{
Packit 1f69a5
		pBob = pCopySrc + src_pitch2;
Packit 1f69a5
		pBobP = pCopySrcP + src_pitch2;
Packit 1f69a5
	}
Packit 1f69a5
	else
Packit 1f69a5
	{
Packit 1f69a5
		pBob =  pCopySrc;
Packit 1f69a5
		pBobP =  pCopySrcP;
Packit 1f69a5
	}
Packit 1f69a5
Packit 1f69a5
        pBob  += src_pitch2 * (y+1);
Packit 1f69a5
        pBobP += src_pitch2 * (y+1);
Packit 1f69a5
    }
Packit 1f69a5
    
Packit 1f69a5
    return 0;
Packit 1f69a5
Packit 1f69a5
#endif