Blame gdk-pixbuf/pixops/composite_line_22_4a4_mmx.S

Packit a4058c
/*
Packit a4058c
 * Copyright (C) 2000 Red Hat, Inc
Packit a4058c
 *
Packit a4058c
 * This library is free software; you can redistribute it and/or
Packit a4058c
 * modify it under the terms of the GNU Lesser General Public
Packit a4058c
 * License as published by the Free Software Foundation; either
Packit a4058c
 * version 2 of the License, or (at your option) any later version.
Packit a4058c
 *
Packit a4058c
 * This library is distributed in the hope that it will be useful,
Packit a4058c
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit a4058c
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit a4058c
 * Lesser General Public License for more details.
Packit a4058c
 *
Packit a4058c
 * You should have received a copy of the GNU Lesser General Public
Packit a4058c
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
Packit a4058c
 */
Packit a4058c
	.file	"composite_line_22_4a4_mmx.S"
Packit a4058c
	.version	"01.01"
Packit a4058c
gcc2_compiled.:
Packit a4058c
.text
Packit a4058c
	.align 16
Packit a4058c
Packit a4058c
#if !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(__INTERIX)	
Packit a4058c
	
Packit a4058c
/* Magic indicating no need for an executable stack */
Packit a4058c
#if !defined __powerpc64__ && !defined __ia64__
Packit a4058c
.section .note.GNU-stack;  .previous
Packit a4058c
#endif
Packit a4058c
	
Packit a4058c
.globl _pixops_composite_line_22_4a4_mmx
Packit a4058c
	.type	 _pixops_composite_line_22_4a4_mmx,@function
Packit a4058c
_pixops_composite_line_22_4a4_mmx:
Packit a4058c
	
Packit a4058c
#else
Packit a4058c
	
Packit a4058c
.globl __pixops_composite_line_22_4a4_mmx
Packit a4058c
__pixops_composite_line_22_4a4_mmx:
Packit a4058c
	
Packit a4058c
#endif
Packit a4058c
/*
Packit a4058c
 * Arguments
Packit a4058c
 *		
Packit a4058c
 * weights:	 8(%ebp)
Packit a4058c
 * p:	        12(%ebp)	%esi
Packit a4058c
 * q1:	        16(%ebp)	
Packit a4058c
 * q2:	        20(%ebp)	
Packit a4058c
 * xstep:       24(%ebp)	
Packit a4058c
 * p_end:       28(%ebp)
Packit a4058c
 * xinit:       32(%ebp)
Packit a4058c
 *	
Packit a4058c
*/
Packit a4058c
/*
Packit a4058c
 * Function call entry
Packit a4058c
 */
Packit a4058c
	pushl %ebp
Packit a4058c
	movl %esp,%ebp
Packit a4058c
	subl $28,%esp
Packit a4058c
	pushl %edi
Packit a4058c
	pushl %esi
Packit a4058c
	pushl %ebx
Packit a4058c
/* Locals:	
Packit a4058c
 * int x                      %ebx
Packit a4058c
 * int x_scaled             -24(%ebp)
Packit a4058c
 */
Packit a4058c
Packit a4058c
/*
Packit a4058c
 * Setup
Packit a4058c
 */
Packit a4058c
/* Initialize variables */	
Packit a4058c
	movl 32(%ebp),%ebx
Packit a4058c
	movl 32(%ebp),%edx
Packit a4058c
	sarl $16,%edx
Packit a4058c
	movl 12(%ebp),%esi
Packit a4058c
Packit a4058c
	movl %edx,-24(%ebp)
Packit a4058c
Packit a4058c
	cmpl 28(%ebp),%esi
Packit a4058c
	jnb  .out
Packit a4058c
Packit a4058c
/* Load initial values into %mm1, %mm3 */
Packit a4058c
	shll $2, %edx
Packit a4058c
Packit a4058c
	pxor %mm4, %mm4
Packit a4058c
	
Packit a4058c
	movl 16(%ebp),%edi
Packit a4058c
	movl (%edi, %edx), %eax
Packit a4058c
	movd (%edi, %edx), %mm5
Packit a4058c
	punpcklbw %mm4, %mm5
Packit a4058c
	shrl $24, %eax
Packit a4058c
	movl $0x010101, %ecx
Packit a4058c
	mull %ecx
Packit a4058c
	orl  $0xff000000, %eax
Packit a4058c
	movd %eax, %mm1
Packit a4058c
	punpcklbw %mm4, %mm1
Packit a4058c
	pmullw %mm5,%mm1
Packit a4058c
Packit a4058c
	movl -24(%ebp),%edx
Packit a4058c
	shll $2, %edx
Packit a4058c
		
Packit a4058c
	movl 20(%ebp),%edi
Packit a4058c
	movl (%edi, %edx), %eax
Packit a4058c
	movd (%edi, %edx), %mm5
Packit a4058c
	punpcklbw %mm4, %mm5
Packit a4058c
	shrl $24, %eax
Packit a4058c
	movl $0x010101, %ecx
Packit a4058c
	mull %ecx
Packit a4058c
	orl  $0xff000000, %eax
Packit a4058c
	movd %eax, %mm3
Packit a4058c
	punpcklbw %mm4, %mm3
Packit a4058c
	pmullw %mm5,%mm3
Packit a4058c
Packit a4058c
	psrlw $8,%mm1
Packit a4058c
	psrlw $8,%mm3
Packit a4058c
Packit a4058c
	addl $65536,%ebx
Packit a4058c
	movl %ebx,%edx
Packit a4058c
	sarl $16,%edx
Packit a4058c
Packit a4058c
	jmp .newx
Packit a4058c
	.p2align 4,,7
Packit a4058c
.loop:
Packit a4058c
/* int x_index = (x & 0xf000) >> 12 */
Packit a4058c
	movl %ebx,%eax
Packit a4058c
	andl $0xf000,%eax
Packit a4058c
	shrl $7,%eax
Packit a4058c
Packit a4058c
	movq (%edi,%eax),%mm4
Packit a4058c
	pmullw %mm0,%mm4
Packit a4058c
	movq 8(%edi,%eax),%mm5
Packit a4058c
	pmullw %mm1,%mm5
Packit a4058c
	movq 16(%edi,%eax),%mm6
Packit a4058c
	movq 24(%edi,%eax),%mm7
Packit a4058c
	pmullw %mm2,%mm6
Packit a4058c
	pmullw %mm3,%mm7
Packit a4058c
	paddw %mm4, %mm5
Packit a4058c
	paddw %mm6, %mm7
Packit a4058c
	paddw %mm5, %mm7
Packit a4058c
Packit a4058c
	movl $0xffff,%ecx
Packit a4058c
	movd %ecx,%mm4
Packit a4058c
	psllq $48,%mm4
Packit a4058c
	movq %mm4,%mm6
Packit a4058c
	psubw %mm7,%mm4
Packit a4058c
	pand %mm6,%mm4
Packit a4058c
	
Packit a4058c
	movq %mm4,%mm5
Packit a4058c
	psrlq $16,%mm4
Packit a4058c
	por %mm4,%mm5
Packit a4058c
	psrlq $32,%mm5
Packit a4058c
	por %mm4,%mm5
Packit a4058c
	
Packit a4058c
	psrlw $8,%mm5
Packit a4058c
Packit a4058c
	movd (%esi),%mm7
Packit a4058c
	pxor %mm4,%mm4
Packit a4058c
	punpcklbw %mm4, %mm7
Packit a4058c
		
Packit a4058c
	pmullw %mm7,%mm5
Packit a4058c
Packit a4058c
/* x += x_step; */
Packit a4058c
	addl 24(%ebp),%ebx
Packit a4058c
/* x_scale = x >> 16; */
Packit a4058c
	movl %ebx,%edx
Packit a4058c
	sarl $16,%edx
Packit a4058c
Packit a4058c
	paddw %mm5,%mm6
Packit a4058c
Packit a4058c
	psrlw $8,%mm6
Packit a4058c
	packuswb %mm6, %mm6 
Packit a4058c
	movd %mm6,(%esi)
Packit a4058c
Packit a4058c
	addl $4, %esi
Packit a4058c
		
Packit a4058c
	cmpl %esi,28(%ebp)
Packit a4058c
	je   .out
Packit a4058c
Packit a4058c
	cmpl %edx,-24(%ebp)
Packit a4058c
	je   .loop
Packit a4058c
Packit a4058c
.newx:
Packit a4058c
	movl %edx,-24(%ebp)
Packit a4058c
/*
Packit a4058c
 * Load the two new values into %mm1, %mm3, move old values into %mm0, %mm2
Packit a4058c
 */
Packit a4058c
	movq %mm1, %mm0
Packit a4058c
	movq %mm3, %mm2
Packit a4058c
Packit a4058c
	shll $2, %edx
Packit a4058c
Packit a4058c
/* #	%mm4 will always be already clear here	 */
Packit a4058c
/* #	pxor %mm4, %mm4 */
Packit a4058c
Packit a4058c
	movl 16(%ebp),%edi
Packit a4058c
	movl (%edi, %edx), %eax
Packit a4058c
	movd (%edi, %edx), %mm5
Packit a4058c
	punpcklbw %mm4, %mm5
Packit a4058c
	shrl $24, %eax
Packit a4058c
	movl $0x010101, %ecx
Packit a4058c
	mull %ecx
Packit a4058c
/* 
Packit a4058c
 *	mull destroyed %edx, need to reconstitute 
Packit a4058c
 */
Packit a4058c
	movl -24(%ebp),%edx
Packit a4058c
	shll $2, %edx
Packit a4058c
Packit a4058c
	orl  $0xff000000, %eax
Packit a4058c
	movd %eax, %mm1
Packit a4058c
	punpcklbw %mm4, %mm1
Packit a4058c
	pmullw %mm5,%mm1
Packit a4058c
		
Packit a4058c
	movl 20(%ebp),%edi
Packit a4058c
	movl (%edi, %edx), %eax
Packit a4058c
	movd (%edi, %edx), %mm5
Packit a4058c
	punpcklbw %mm4, %mm5
Packit a4058c
	shrl $24, %eax
Packit a4058c
	movl $0x010101, %ecx
Packit a4058c
	mull %ecx
Packit a4058c
	orl  $0xff000000, %eax
Packit a4058c
	movd %eax, %mm3
Packit a4058c
	punpcklbw %mm4, %mm3
Packit a4058c
	pmullw %mm5,%mm3
Packit a4058c
	
Packit a4058c
	psrlw $8,%mm1
Packit a4058c
	psrlw $8,%mm3
Packit a4058c
Packit a4058c
	movl 8(%ebp),%edi
Packit a4058c
	
Packit a4058c
	jmp .loop
Packit a4058c
Packit a4058c
.out:
Packit a4058c
	movl %esi,%eax
Packit a4058c
	emms
Packit a4058c
	leal -40(%ebp),%esp
Packit a4058c
	popl %ebx
Packit a4058c
	popl %esi
Packit a4058c
	popl %edi
Packit a4058c
	movl %ebp,%esp
Packit a4058c
	popl %ebp
Packit a4058c
	ret