|
Packit |
a4058c |
/*
|
|
Packit |
a4058c |
* Copyright (C) 2000 Red Hat, Inc
|
|
Packit |
a4058c |
*
|
|
Packit |
a4058c |
* This library is free software; you can redistribute it and/or
|
|
Packit |
a4058c |
* modify it under the terms of the GNU Lesser General Public
|
|
Packit |
a4058c |
* License as published by the Free Software Foundation; either
|
|
Packit |
a4058c |
* version 2 of the License, or (at your option) any later version.
|
|
Packit |
a4058c |
*
|
|
Packit |
a4058c |
* This library is distributed in the hope that it will be useful,
|
|
Packit |
a4058c |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
a4058c |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
a4058c |
* Lesser General Public License for more details.
|
|
Packit |
a4058c |
*
|
|
Packit |
a4058c |
* You should have received a copy of the GNU Lesser General Public
|
|
Packit |
a4058c |
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
Packit |
a4058c |
*/
|
|
Packit |
a4058c |
.file "composite_line_22_4a4_mmx.S"
|
|
Packit |
a4058c |
.version "01.01"
|
|
Packit |
a4058c |
gcc2_compiled.:
|
|
Packit |
a4058c |
.text
|
|
Packit |
a4058c |
.align 16
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX)
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
/* Magic indicating no need for an executable stack */
|
|
Packit |
a4058c |
#if !defined __powerpc64__ && !defined __ia64__
|
|
Packit |
a4058c |
.section .note.GNU-stack; .previous
|
|
Packit |
a4058c |
#endif
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
.globl _pixops_composite_line_22_4a4_mmx
|
|
Packit |
a4058c |
.type _pixops_composite_line_22_4a4_mmx,@function
|
|
Packit |
a4058c |
_pixops_composite_line_22_4a4_mmx:
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
#else
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
.globl __pixops_composite_line_22_4a4_mmx
|
|
Packit |
a4058c |
__pixops_composite_line_22_4a4_mmx:
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
#endif
|
|
Packit |
a4058c |
/*
|
|
Packit |
a4058c |
* Arguments
|
|
Packit |
a4058c |
*
|
|
Packit |
a4058c |
* weights: 8(%ebp)
|
|
Packit |
a4058c |
* p: 12(%ebp) %esi
|
|
Packit |
a4058c |
* q1: 16(%ebp)
|
|
Packit |
a4058c |
* q2: 20(%ebp)
|
|
Packit |
a4058c |
* xstep: 24(%ebp)
|
|
Packit |
a4058c |
* p_end: 28(%ebp)
|
|
Packit |
a4058c |
* xinit: 32(%ebp)
|
|
Packit |
a4058c |
*
|
|
Packit |
a4058c |
*/
|
|
Packit |
a4058c |
/*
|
|
Packit |
a4058c |
* Function call entry
|
|
Packit |
a4058c |
*/
|
|
Packit |
a4058c |
pushl %ebp
|
|
Packit |
a4058c |
movl %esp,%ebp
|
|
Packit |
a4058c |
subl $28,%esp
|
|
Packit |
a4058c |
pushl %edi
|
|
Packit |
a4058c |
pushl %esi
|
|
Packit |
a4058c |
pushl %ebx
|
|
Packit |
a4058c |
/* Locals:
|
|
Packit |
a4058c |
* int x %ebx
|
|
Packit |
a4058c |
* int x_scaled -24(%ebp)
|
|
Packit |
a4058c |
*/
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
/*
|
|
Packit |
a4058c |
* Setup
|
|
Packit |
a4058c |
*/
|
|
Packit |
a4058c |
/* Initialize variables */
|
|
Packit |
a4058c |
movl 32(%ebp),%ebx
|
|
Packit |
a4058c |
movl 32(%ebp),%edx
|
|
Packit |
a4058c |
sarl $16,%edx
|
|
Packit |
a4058c |
movl 12(%ebp),%esi
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
movl %edx,-24(%ebp)
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
cmpl 28(%ebp),%esi
|
|
Packit |
a4058c |
jnb .out
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
/* Load initial values into %mm1, %mm3 */
|
|
Packit |
a4058c |
shll $2, %edx
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
pxor %mm4, %mm4
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
movl 16(%ebp),%edi
|
|
Packit |
a4058c |
movl (%edi, %edx), %eax
|
|
Packit |
a4058c |
movd (%edi, %edx), %mm5
|
|
Packit |
a4058c |
punpcklbw %mm4, %mm5
|
|
Packit |
a4058c |
shrl $24, %eax
|
|
Packit |
a4058c |
movl $0x010101, %ecx
|
|
Packit |
a4058c |
mull %ecx
|
|
Packit |
a4058c |
orl $0xff000000, %eax
|
|
Packit |
a4058c |
movd %eax, %mm1
|
|
Packit |
a4058c |
punpcklbw %mm4, %mm1
|
|
Packit |
a4058c |
pmullw %mm5,%mm1
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
movl -24(%ebp),%edx
|
|
Packit |
a4058c |
shll $2, %edx
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
movl 20(%ebp),%edi
|
|
Packit |
a4058c |
movl (%edi, %edx), %eax
|
|
Packit |
a4058c |
movd (%edi, %edx), %mm5
|
|
Packit |
a4058c |
punpcklbw %mm4, %mm5
|
|
Packit |
a4058c |
shrl $24, %eax
|
|
Packit |
a4058c |
movl $0x010101, %ecx
|
|
Packit |
a4058c |
mull %ecx
|
|
Packit |
a4058c |
orl $0xff000000, %eax
|
|
Packit |
a4058c |
movd %eax, %mm3
|
|
Packit |
a4058c |
punpcklbw %mm4, %mm3
|
|
Packit |
a4058c |
pmullw %mm5,%mm3
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
psrlw $8,%mm1
|
|
Packit |
a4058c |
psrlw $8,%mm3
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
addl $65536,%ebx
|
|
Packit |
a4058c |
movl %ebx,%edx
|
|
Packit |
a4058c |
sarl $16,%edx
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
jmp .newx
|
|
Packit |
a4058c |
.p2align 4,,7
|
|
Packit |
a4058c |
.loop:
|
|
Packit |
a4058c |
/* int x_index = (x & 0xf000) >> 12 */
|
|
Packit |
a4058c |
movl %ebx,%eax
|
|
Packit |
a4058c |
andl $0xf000,%eax
|
|
Packit |
a4058c |
shrl $7,%eax
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
movq (%edi,%eax),%mm4
|
|
Packit |
a4058c |
pmullw %mm0,%mm4
|
|
Packit |
a4058c |
movq 8(%edi,%eax),%mm5
|
|
Packit |
a4058c |
pmullw %mm1,%mm5
|
|
Packit |
a4058c |
movq 16(%edi,%eax),%mm6
|
|
Packit |
a4058c |
movq 24(%edi,%eax),%mm7
|
|
Packit |
a4058c |
pmullw %mm2,%mm6
|
|
Packit |
a4058c |
pmullw %mm3,%mm7
|
|
Packit |
a4058c |
paddw %mm4, %mm5
|
|
Packit |
a4058c |
paddw %mm6, %mm7
|
|
Packit |
a4058c |
paddw %mm5, %mm7
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
movl $0xffff,%ecx
|
|
Packit |
a4058c |
movd %ecx,%mm4
|
|
Packit |
a4058c |
psllq $48,%mm4
|
|
Packit |
a4058c |
movq %mm4,%mm6
|
|
Packit |
a4058c |
psubw %mm7,%mm4
|
|
Packit |
a4058c |
pand %mm6,%mm4
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
movq %mm4,%mm5
|
|
Packit |
a4058c |
psrlq $16,%mm4
|
|
Packit |
a4058c |
por %mm4,%mm5
|
|
Packit |
a4058c |
psrlq $32,%mm5
|
|
Packit |
a4058c |
por %mm4,%mm5
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
psrlw $8,%mm5
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
movd (%esi),%mm7
|
|
Packit |
a4058c |
pxor %mm4,%mm4
|
|
Packit |
a4058c |
punpcklbw %mm4, %mm7
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
pmullw %mm7,%mm5
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
/* x += x_step; */
|
|
Packit |
a4058c |
addl 24(%ebp),%ebx
|
|
Packit |
a4058c |
/* x_scale = x >> 16; */
|
|
Packit |
a4058c |
movl %ebx,%edx
|
|
Packit |
a4058c |
sarl $16,%edx
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
paddw %mm5,%mm6
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
psrlw $8,%mm6
|
|
Packit |
a4058c |
packuswb %mm6, %mm6
|
|
Packit |
a4058c |
movd %mm6,(%esi)
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
addl $4, %esi
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
cmpl %esi,28(%ebp)
|
|
Packit |
a4058c |
je .out
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
cmpl %edx,-24(%ebp)
|
|
Packit |
a4058c |
je .loop
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
.newx:
|
|
Packit |
a4058c |
movl %edx,-24(%ebp)
|
|
Packit |
a4058c |
/*
|
|
Packit |
a4058c |
* Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
|
|
Packit |
a4058c |
*/
|
|
Packit |
a4058c |
movq %mm1, %mm0
|
|
Packit |
a4058c |
movq %mm3, %mm2
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
shll $2, %edx
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
/* # %mm4 will always be already clear here */
|
|
Packit |
a4058c |
/* # pxor %mm4, %mm4 */
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
movl 16(%ebp),%edi
|
|
Packit |
a4058c |
movl (%edi, %edx), %eax
|
|
Packit |
a4058c |
movd (%edi, %edx), %mm5
|
|
Packit |
a4058c |
punpcklbw %mm4, %mm5
|
|
Packit |
a4058c |
shrl $24, %eax
|
|
Packit |
a4058c |
movl $0x010101, %ecx
|
|
Packit |
a4058c |
mull %ecx
|
|
Packit |
a4058c |
/*
|
|
Packit |
a4058c |
* mull destroyed %edx, need to reconstitute
|
|
Packit |
a4058c |
*/
|
|
Packit |
a4058c |
movl -24(%ebp),%edx
|
|
Packit |
a4058c |
shll $2, %edx
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
orl $0xff000000, %eax
|
|
Packit |
a4058c |
movd %eax, %mm1
|
|
Packit |
a4058c |
punpcklbw %mm4, %mm1
|
|
Packit |
a4058c |
pmullw %mm5,%mm1
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
movl 20(%ebp),%edi
|
|
Packit |
a4058c |
movl (%edi, %edx), %eax
|
|
Packit |
a4058c |
movd (%edi, %edx), %mm5
|
|
Packit |
a4058c |
punpcklbw %mm4, %mm5
|
|
Packit |
a4058c |
shrl $24, %eax
|
|
Packit |
a4058c |
movl $0x010101, %ecx
|
|
Packit |
a4058c |
mull %ecx
|
|
Packit |
a4058c |
orl $0xff000000, %eax
|
|
Packit |
a4058c |
movd %eax, %mm3
|
|
Packit |
a4058c |
punpcklbw %mm4, %mm3
|
|
Packit |
a4058c |
pmullw %mm5,%mm3
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
psrlw $8,%mm1
|
|
Packit |
a4058c |
psrlw $8,%mm3
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
movl 8(%ebp),%edi
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
jmp .loop
|
|
Packit |
a4058c |
|
|
Packit |
a4058c |
.out:
|
|
Packit |
a4058c |
movl %esi,%eax
|
|
Packit |
a4058c |
emms
|
|
Packit |
a4058c |
leal -40(%ebp),%esp
|
|
Packit |
a4058c |
popl %ebx
|
|
Packit |
a4058c |
popl %esi
|
|
Packit |
a4058c |
popl %edi
|
|
Packit |
a4058c |
movl %ebp,%esp
|
|
Packit |
a4058c |
popl %ebp
|
|
Packit |
a4058c |
ret
|