Blame src/pcm/pcm_dmix_i386.h

Packit 4a16fb
/**
Packit 4a16fb
 * \file pcm/pcm_dmix_i386.h
Packit 4a16fb
 * \ingroup PCM_Plugins
Packit 4a16fb
 * \brief PCM Direct Stream Mixing (dmix) Plugin Interface - I386 assembler code
Packit 4a16fb
 * \author Jaroslav Kysela <perex@perex.cz>
Packit 4a16fb
 * \date 2003
Packit 4a16fb
 */
Packit 4a16fb
/*
Packit 4a16fb
 *  PCM - Direct Stream Mixing
Packit 4a16fb
 *  Copyright (c) 2003 by Jaroslav Kysela <perex@perex.cz>
Packit 4a16fb
 *
Packit 4a16fb
 *
Packit 4a16fb
 *   This library is free software; you can redistribute it and/or modify
Packit 4a16fb
 *   it under the terms of the GNU Lesser General Public License as
Packit 4a16fb
 *   published by the Free Software Foundation; either version 2.1 of
Packit 4a16fb
 *   the License, or (at your option) any later version.
Packit 4a16fb
 *
Packit 4a16fb
 *   This program is distributed in the hope that it will be useful,
Packit 4a16fb
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 4a16fb
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit 4a16fb
 *   GNU Lesser General Public License for more details.
Packit 4a16fb
 *
Packit 4a16fb
 *   You should have received a copy of the GNU Lesser General Public
Packit 4a16fb
 *   License along with this library; if not, write to the Free Software
Packit 4a16fb
 *   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
Packit 4a16fb
 *
Packit 4a16fb
 */
Packit 4a16fb
Packit 4a16fb
/*
Packit 4a16fb
 *  for plain i386
Packit 4a16fb
 */
Packit 4a16fb
static void MIX_AREAS_16(unsigned int size,
Packit 4a16fb
			 volatile signed short *dst, signed short *src,
Packit 4a16fb
			 volatile signed int *sum, size_t dst_step,
Packit 4a16fb
			 size_t src_step, size_t sum_step)
Packit 4a16fb
{
Packit 4a16fb
	unsigned int old_ebx;
Packit 4a16fb
Packit 4a16fb
	/*
Packit 4a16fb
	 *  ESI - src
Packit 4a16fb
	 *  EDI - dst
Packit 4a16fb
	 *  EBX - sum
Packit 4a16fb
	 *  ECX - old sample
Packit 4a16fb
	 *  EAX - sample / temporary
Packit 4a16fb
	 *  EDX - temporary
Packit 4a16fb
	 */
Packit 4a16fb
	__asm__ __volatile__ (
Packit 4a16fb
		"\n"
Packit 4a16fb
Packit 4a16fb
		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
Packit 4a16fb
		/*
Packit 4a16fb
		 *  initialization, load ESI, EDI, EBX registers
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovl %1, %%edi\n"
Packit 4a16fb
		"\tmovl %2, %%esi\n"
Packit 4a16fb
		"\tmovl %3, %%ebx\n"
Packit 4a16fb
		"\tcmpl $0, %0\n"
Packit 4a16fb
		"\tjnz 2f\n"
Packit 4a16fb
		"\tjmp 7f\n"
Packit 4a16fb
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 * for (;;)
Packit 4a16fb
		 */
Packit 4a16fb
		"\t.p2align 4,,15\n"
Packit 4a16fb
		"1:"
Packit 4a16fb
		"\tadd %4, %%edi\n"
Packit 4a16fb
		"\tadd %5, %%esi\n"
Packit 4a16fb
		"\tadd %6, %%ebx\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 *   sample = *src;
Packit 4a16fb
		 *   sum_sample = *sum;
Packit 4a16fb
		 *   if (cmpxchg(*dst, 0, 1) == 0)
Packit 4a16fb
		 *     sample -= sum_sample;
Packit 4a16fb
		 *   xadd(*sum, sample);
Packit 4a16fb
		 */
Packit 4a16fb
Packit 4a16fb
		"2:"
Packit 4a16fb
		"\tmovw $0, %%ax\n"
Packit 4a16fb
		"\tmovw $1, %%cx\n"
Packit 4a16fb
		"\tmovl (%%ebx), %%edx\n"
Packit 4a16fb
		"\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
Packit 4a16fb
		"\tmovswl (%%esi), %%ecx\n"
Packit 4a16fb
		"\tjnz 3f\n"
Packit 4a16fb
		"\t" XSUB " %%edx, %%ecx\n"
Packit 4a16fb
		"3:"
Packit 4a16fb
		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 *   do {
Packit 4a16fb
		 *     sample = old_sample = *sum;
Packit 4a16fb
		 *     saturate(v);
Packit 4a16fb
		 *     *dst = sample;
Packit 4a16fb
		 *   } while (v != *sum);
Packit 4a16fb
		 */
Packit 4a16fb
Packit 4a16fb
		"4:"
Packit 4a16fb
		"\tmovl (%%ebx), %%ecx\n"
Packit 4a16fb
		"\tcmpl $0x7fff,%%ecx\n"
Packit 4a16fb
		"\tjg 5f\n"
Packit 4a16fb
		"\tcmpl $-0x8000,%%ecx\n"
Packit 4a16fb
		"\tjl 6f\n"
Packit 4a16fb
		"\tmovw %%cx, (%%edi)\n"
Packit 4a16fb
		"\tcmpl %%ecx, (%%ebx)\n"
Packit 4a16fb
		"\tjnz 4b\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 * while (size-- > 0)
Packit 4a16fb
		 */
Packit 4a16fb
		"\tdecl %0\n"
Packit 4a16fb
		"\tjnz 1b\n"
Packit 4a16fb
		"\tjmp 7f\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 *  sample > 0x7fff
Packit 4a16fb
		 */
Packit 4a16fb
Packit 4a16fb
		"\t.p2align 4,,15\n"
Packit 4a16fb
Packit 4a16fb
		"5:"
Packit 4a16fb
		"\tmovw $0x7fff, (%%edi)\n"
Packit 4a16fb
		"\tcmpl %%ecx,(%%ebx)\n"
Packit 4a16fb
		"\tjnz 4b\n"
Packit 4a16fb
		"\tdecl %0\n"
Packit 4a16fb
		"\tjnz 1b\n"
Packit 4a16fb
		"\tjmp 7f\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 *  sample < -0x8000
Packit 4a16fb
		 */
Packit 4a16fb
Packit 4a16fb
		"\t.p2align 4,,15\n"
Packit 4a16fb
Packit 4a16fb
		"6:"
Packit 4a16fb
		"\tmovw $-0x8000, (%%edi)\n"
Packit 4a16fb
		"\tcmpl %%ecx, (%%ebx)\n"
Packit 4a16fb
		"\tjnz 4b\n"
Packit 4a16fb
		"\tdecl %0\n"
Packit 4a16fb
		"\tjnz 1b\n"
Packit 4a16fb
		
Packit 4a16fb
		"7:"
Packit 4a16fb
		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
Packit 4a16fb
Packit 4a16fb
		: /* no output regs */
Packit 4a16fb
		: "m" (size), "m" (dst), "m" (src),
Packit 4a16fb
		  "m" (sum), "m" (dst_step), "m" (src_step),
Packit 4a16fb
		  "m" (sum_step), "m" (old_ebx)
Packit 4a16fb
		: "esi", "edi", "edx", "ecx", "eax"
Packit 4a16fb
	);
Packit 4a16fb
}
Packit 4a16fb
Packit 4a16fb
/*
Packit 4a16fb
 *  MMX optimized
Packit 4a16fb
 */
Packit 4a16fb
static void MIX_AREAS_16_MMX(unsigned int size,
Packit 4a16fb
			     volatile signed short *dst, signed short *src,
Packit 4a16fb
			     volatile signed int *sum, size_t dst_step,
Packit 4a16fb
			     size_t src_step, size_t sum_step)
Packit 4a16fb
{
Packit 4a16fb
	unsigned int old_ebx;
Packit 4a16fb
Packit 4a16fb
	/*
Packit 4a16fb
	 *  ESI - src
Packit 4a16fb
	 *  EDI - dst
Packit 4a16fb
	 *  EBX - sum
Packit 4a16fb
	 *  ECX - old sample
Packit 4a16fb
	 *  EAX - sample / temporary
Packit 4a16fb
	 *  EDX - temporary
Packit 4a16fb
	 */
Packit 4a16fb
	__asm__ __volatile__ (
Packit 4a16fb
		"\n"
Packit 4a16fb
Packit 4a16fb
		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
Packit 4a16fb
		/*
Packit 4a16fb
		 *  initialization, load ESI, EDI, EBX registers
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovl %1, %%edi\n"
Packit 4a16fb
		"\tmovl %2, %%esi\n"
Packit 4a16fb
		"\tmovl %3, %%ebx\n"
Packit 4a16fb
		"\tcmpl $0, %0\n"
Packit 4a16fb
		"\tjnz 2f\n"
Packit 4a16fb
		"\tjmp 5f\n"
Packit 4a16fb
Packit 4a16fb
		"\t.p2align 4,,15\n"
Packit 4a16fb
		"1:"
Packit 4a16fb
		"\tadd %4, %%edi\n"
Packit 4a16fb
		"\tadd %5, %%esi\n"
Packit 4a16fb
		"\tadd %6, %%ebx\n"
Packit 4a16fb
Packit 4a16fb
		"2:"
Packit 4a16fb
		/*
Packit 4a16fb
		 *   sample = *src;
Packit 4a16fb
		 *   sum_sample = *sum;
Packit 4a16fb
		 *   if (cmpxchg(*dst, 0, 1) == 0)
Packit 4a16fb
		 *     sample -= sum_sample;
Packit 4a16fb
		 *   xadd(*sum, sample);
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovw $0, %%ax\n"
Packit 4a16fb
		"\tmovw $1, %%cx\n"
Packit 4a16fb
		"\tmovl (%%ebx), %%edx\n"
Packit 4a16fb
		"\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
Packit 4a16fb
		"\tmovswl (%%esi), %%ecx\n"
Packit 4a16fb
		"\tjnz 3f\n"
Packit 4a16fb
		"\t" XSUB " %%edx, %%ecx\n"
Packit 4a16fb
		"3:"
Packit 4a16fb
		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 *   do {
Packit 4a16fb
		 *     sample = old_sample = *sum;
Packit 4a16fb
		 *     saturate(v);
Packit 4a16fb
		 *     *dst = sample;
Packit 4a16fb
		 *   } while (v != *sum);
Packit 4a16fb
		 */
Packit 4a16fb
Packit 4a16fb
		"4:"
Packit 4a16fb
		"\tmovl (%%ebx), %%ecx\n"
Packit 4a16fb
		"\tmovd %%ecx, %%mm0\n"
Packit 4a16fb
		"\tpackssdw %%mm1, %%mm0\n"
Packit 4a16fb
		"\tmovd %%mm0, %%eax\n"
Packit 4a16fb
		"\tmovw %%ax, (%%edi)\n"
Packit 4a16fb
		"\tcmpl %%ecx, (%%ebx)\n"
Packit 4a16fb
		"\tjnz 4b\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 * while (size-- > 0)
Packit 4a16fb
		 */
Packit 4a16fb
		"\tdecl %0\n"
Packit 4a16fb
		"\tjnz 1b\n"
Packit 4a16fb
		"\temms\n"
Packit 4a16fb
                "5:"
Packit 4a16fb
		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
Packit 4a16fb
Packit 4a16fb
		: /* no output regs */
Packit 4a16fb
		: "m" (size), "m" (dst), "m" (src),
Packit 4a16fb
		  "m" (sum), "m" (dst_step), "m" (src_step),
Packit 4a16fb
		  "m" (sum_step), "m" (old_ebx)
Packit 4a16fb
		: "esi", "edi", "edx", "ecx", "eax"
Packit 4a16fb
	);
Packit 4a16fb
}
Packit 4a16fb
Packit 4a16fb
/*
Packit 4a16fb
 *  for plain i386, 32-bit version (24-bit resolution)
Packit 4a16fb
 */
Packit 4a16fb
static void MIX_AREAS_32(unsigned int size,
Packit 4a16fb
			 volatile signed int *dst, signed int *src,
Packit 4a16fb
			 volatile signed int *sum, size_t dst_step,
Packit 4a16fb
			 size_t src_step, size_t sum_step)
Packit 4a16fb
{
Packit 4a16fb
	unsigned int old_ebx;
Packit 4a16fb
Packit 4a16fb
	/*
Packit 4a16fb
	 *  ESI - src
Packit 4a16fb
	 *  EDI - dst
Packit 4a16fb
	 *  EBX - sum
Packit 4a16fb
	 *  ECX - old sample
Packit 4a16fb
	 *  EAX - sample / temporary
Packit 4a16fb
	 *  EDX - temporary
Packit 4a16fb
	 */
Packit 4a16fb
	__asm__ __volatile__ (
Packit 4a16fb
		"\n"
Packit 4a16fb
Packit 4a16fb
		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
Packit 4a16fb
		/*
Packit 4a16fb
		 *  initialization, load ESI, EDI, EBX registers
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovl %1, %%edi\n"
Packit 4a16fb
		"\tmovl %2, %%esi\n"
Packit 4a16fb
		"\tmovl %3, %%ebx\n"
Packit 4a16fb
		"\tcmpl $0, %0\n"
Packit 4a16fb
		"\tjnz 1f\n"
Packit 4a16fb
		"\tjmp 6f\n"
Packit 4a16fb
Packit 4a16fb
		"\t.p2align 4,,15\n"
Packit 4a16fb
Packit 4a16fb
		"1:"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 *   sample = *src;
Packit 4a16fb
		 *   sum_sample = *sum;
Packit 4a16fb
		 *   if (cmpxchg(*dst, 0, 1) == 0)
Packit 4a16fb
		 *     sample -= sum_sample;
Packit 4a16fb
		 *   xadd(*sum, sample);
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovl $0, %%eax\n"
Packit 4a16fb
		"\tmovl $1, %%ecx\n"
Packit 4a16fb
		"\tmovl (%%ebx), %%edx\n"
Packit 4a16fb
		"\t" LOCK_PREFIX "cmpxchgl %%ecx, (%%edi)\n"
Packit 4a16fb
		"\tjnz 2f\n"
Packit 4a16fb
		"\tmovl (%%esi), %%ecx\n"
Packit 4a16fb
		/* sample >>= 8 */
Packit 4a16fb
		"\tsarl $8, %%ecx\n"
Packit 4a16fb
		"\t" XSUB " %%edx, %%ecx\n"
Packit 4a16fb
		"\tjmp 21f\n"
Packit 4a16fb
		"2:"
Packit 4a16fb
		"\tmovl (%%esi), %%ecx\n"
Packit 4a16fb
		/* sample >>= 8 */
Packit 4a16fb
		"\tsarl $8, %%ecx\n"
Packit 4a16fb
		"21:"
Packit 4a16fb
		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 *   do {
Packit 4a16fb
		 *     sample = old_sample = *sum;
Packit 4a16fb
		 *     saturate(v);
Packit 4a16fb
		 *     *dst = sample;
Packit 4a16fb
		 *   } while (v != *sum);
Packit 4a16fb
		 */
Packit 4a16fb
Packit 4a16fb
		"3:"
Packit 4a16fb
		"\tmovl (%%ebx), %%ecx\n"
Packit 4a16fb
		/*
Packit 4a16fb
		 *  if (sample > 0x7fff00)
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovl $0x7fffff, %%eax\n"
Packit 4a16fb
		"\tcmpl %%eax, %%ecx\n"
Packit 4a16fb
		"\tjg 4f\n"
Packit 4a16fb
		/*
Packit 4a16fb
		 *  if (sample < -0x800000)
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovl $-0x800000, %%eax\n"
Packit 4a16fb
		"\tcmpl %%eax, %%ecx\n"
Packit 4a16fb
		"\tjl 4f\n"
Packit 4a16fb
		"\tmovl %%ecx, %%eax\n"
Packit 4a16fb
		"4:"
Packit 4a16fb
		/*
Packit 4a16fb
		 *  sample <<= 8;
Packit 4a16fb
		 */
Packit 4a16fb
		"\tsall $8, %%eax\n"
Packit 4a16fb
		"\tmovl %%eax, (%%edi)\n"
Packit 4a16fb
		"\tcmpl %%ecx, (%%ebx)\n"
Packit 4a16fb
		"\tjnz 3b\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 * while (size-- > 0)
Packit 4a16fb
		 */
Packit 4a16fb
		"\tdecl %0\n"
Packit 4a16fb
		"\tjz 6f\n"
Packit 4a16fb
		"\tadd %4, %%edi\n"
Packit 4a16fb
		"\tadd %5, %%esi\n"
Packit 4a16fb
		"\tadd %6, %%ebx\n"
Packit 4a16fb
		"\tjmp 1b\n"
Packit 4a16fb
		
Packit 4a16fb
		"6:"
Packit 4a16fb
		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
Packit 4a16fb
Packit 4a16fb
		: /* no output regs */
Packit 4a16fb
		: "m" (size), "m" (dst), "m" (src),
Packit 4a16fb
		  "m" (sum), "m" (dst_step), "m" (src_step),
Packit 4a16fb
		  "m" (sum_step), "m" (old_ebx)
Packit 4a16fb
		: "esi", "edi", "edx", "ecx", "eax"
Packit 4a16fb
	);
Packit 4a16fb
}
Packit 4a16fb
Packit 4a16fb
/*
Packit 4a16fb
 * 24-bit version for plain i386
Packit 4a16fb
 */
Packit 4a16fb
static void MIX_AREAS_24(unsigned int size,
Packit 4a16fb
			 volatile unsigned char *dst, unsigned char *src,
Packit 4a16fb
			 volatile signed int *sum, size_t dst_step,
Packit 4a16fb
			 size_t src_step, size_t sum_step)
Packit 4a16fb
{
Packit 4a16fb
	unsigned int old_ebx;
Packit 4a16fb
Packit 4a16fb
	/*
Packit 4a16fb
	 *  ESI - src
Packit 4a16fb
	 *  EDI - dst
Packit 4a16fb
	 *  EBX - sum
Packit 4a16fb
	 *  ECX - old sample
Packit 4a16fb
	 *  EAX - sample / temporary
Packit 4a16fb
	 *  EDX - temporary
Packit 4a16fb
	 */
Packit 4a16fb
	__asm__ __volatile__ (
Packit 4a16fb
		"\n"
Packit 4a16fb
Packit 4a16fb
		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
Packit 4a16fb
		/*
Packit 4a16fb
		 *  initialization, load ESI, EDI, EBX registers
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovl %1, %%edi\n"
Packit 4a16fb
		"\tmovl %2, %%esi\n"
Packit 4a16fb
		"\tmovl %3, %%ebx\n"
Packit 4a16fb
		"\tcmpl $0, %0\n"
Packit 4a16fb
		"\tjnz 1f\n"
Packit 4a16fb
		"\tjmp 6f\n"
Packit 4a16fb
Packit 4a16fb
		"\t.p2align 4,,15\n"
Packit 4a16fb
Packit 4a16fb
		"1:"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 *   sample = *src;
Packit 4a16fb
		 *   sum_sample = *sum;
Packit 4a16fb
		 *   if (test_and_set_bit(0, dst) == 0)
Packit 4a16fb
		 *     sample -= sum_sample;
Packit 4a16fb
		 *   *sum += sample;
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovsbl 2(%%esi), %%eax\n"
Packit 4a16fb
		"\tmovzwl (%%esi), %%ecx\n"
Packit 4a16fb
		"\tmovl (%%ebx), %%edx\n"
Packit 4a16fb
		"\tsall $16, %%eax\n"
Packit 4a16fb
		"\torl %%eax, %%ecx\n"
Packit 4a16fb
		"\t" LOCK_PREFIX "btsw $0, (%%edi)\n"
Packit 4a16fb
		"\tjc 2f\n"
Packit 4a16fb
		"\t" XSUB " %%edx, %%ecx\n"
Packit 4a16fb
		"2:"
Packit 4a16fb
		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 *   do {
Packit 4a16fb
		 *     sample = old_sample = *sum;
Packit 4a16fb
		 *     saturate(sample);
Packit 4a16fb
		 *     *dst = sample | 1;
Packit 4a16fb
		 *   } while (old_sample != *sum);
Packit 4a16fb
		 */
Packit 4a16fb
Packit 4a16fb
		"3:"
Packit 4a16fb
		"\tmovl (%%ebx), %%ecx\n"
Packit 4a16fb
		/*
Packit 4a16fb
		 *  if (sample > 0x7fffff)
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovl $0x7fffff, %%eax\n"
Packit 4a16fb
		"\tcmpl %%eax, %%ecx\n"
Packit 4a16fb
		"\tjg 4f\n"
Packit 4a16fb
		/*
Packit 4a16fb
		 *  if (sample < -0x7fffff)
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovl $-0x7fffff, %%eax\n"
Packit 4a16fb
		"\tcmpl %%eax, %%ecx\n"
Packit 4a16fb
		"\tjl 4f\n"
Packit 4a16fb
		"\tmovl %%ecx, %%eax\n"
Packit 4a16fb
		"\torl $1, %%eax\n"
Packit 4a16fb
		"4:"
Packit 4a16fb
		"\tmovw %%ax, (%%edi)\n"
Packit 4a16fb
		"\tshrl $16, %%eax\n"
Packit 4a16fb
		"\tmovb %%al, 2(%%edi)\n"
Packit 4a16fb
		"\tcmpl %%ecx, (%%ebx)\n"
Packit 4a16fb
		"\tjnz 3b\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 * while (size-- > 0)
Packit 4a16fb
		 */
Packit 4a16fb
		"\tdecl %0\n"
Packit 4a16fb
		"\tjz 6f\n"
Packit 4a16fb
		"\tadd %4, %%edi\n"
Packit 4a16fb
		"\tadd %5, %%esi\n"
Packit 4a16fb
		"\tadd %6, %%ebx\n"
Packit 4a16fb
		"\tjmp 1b\n"
Packit 4a16fb
		
Packit 4a16fb
		"6:"
Packit 4a16fb
		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
Packit 4a16fb
Packit 4a16fb
		: /* no output regs */
Packit 4a16fb
		: "m" (size), "m" (dst), "m" (src),
Packit 4a16fb
		  "m" (sum), "m" (dst_step), "m" (src_step),
Packit 4a16fb
		  "m" (sum_step), "m" (old_ebx)
Packit 4a16fb
		: "esi", "edi", "edx", "ecx", "eax"
Packit 4a16fb
	);
Packit 4a16fb
}
Packit 4a16fb
Packit 4a16fb
/*
Packit 4a16fb
 * 24-bit version for Pentium Pro/II
Packit 4a16fb
 */
Packit 4a16fb
static void MIX_AREAS_24_CMOV(unsigned int size,
Packit 4a16fb
			      volatile unsigned char *dst, unsigned char *src,
Packit 4a16fb
			      volatile signed int *sum, size_t dst_step,
Packit 4a16fb
			      size_t src_step, size_t sum_step)
Packit 4a16fb
{
Packit 4a16fb
	unsigned int old_ebx;
Packit 4a16fb
Packit 4a16fb
	/*
Packit 4a16fb
	 *  ESI - src
Packit 4a16fb
	 *  EDI - dst
Packit 4a16fb
	 *  EBX - sum
Packit 4a16fb
	 *  ECX - old sample
Packit 4a16fb
	 *  EAX - sample / temporary
Packit 4a16fb
	 *  EDX - temporary
Packit 4a16fb
	 */
Packit 4a16fb
	__asm__ __volatile__ (
Packit 4a16fb
		"\n"
Packit 4a16fb
Packit 4a16fb
		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
Packit 4a16fb
		/*
Packit 4a16fb
		 *  initialization, load ESI, EDI, EBX registers
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovl %1, %%edi\n"
Packit 4a16fb
		"\tmovl %2, %%esi\n"
Packit 4a16fb
		"\tmovl %3, %%ebx\n"
Packit 4a16fb
		"\tcmpl $0, %0\n"
Packit 4a16fb
		"\tjz 6f\n"
Packit 4a16fb
Packit 4a16fb
		"\t.p2align 4,,15\n"
Packit 4a16fb
Packit 4a16fb
		"1:"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 *   sample = *src;
Packit 4a16fb
		 *   sum_sample = *sum;
Packit 4a16fb
		 *   if (test_and_set_bit(0, dst) == 0)
Packit 4a16fb
		 *     sample -= sum_sample;
Packit 4a16fb
		 *   *sum += sample;
Packit 4a16fb
		 */
Packit 4a16fb
		"\tmovsbl 2(%%esi), %%eax\n"
Packit 4a16fb
		"\tmovzwl (%%esi), %%ecx\n"
Packit 4a16fb
		"\tmovl (%%ebx), %%edx\n"
Packit 4a16fb
		"\tsall $16, %%eax\n"
Packit 4a16fb
		"\t" LOCK_PREFIX "btsw $0, (%%edi)\n"
Packit 4a16fb
		"\tleal (%%ecx,%%eax,1), %%ecx\n"
Packit 4a16fb
		"\tjc 2f\n"
Packit 4a16fb
		"\t" XSUB " %%edx, %%ecx\n"
Packit 4a16fb
		"2:"
Packit 4a16fb
		"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 *   do {
Packit 4a16fb
		 *     sample = old_sample = *sum;
Packit 4a16fb
		 *     saturate(sample);
Packit 4a16fb
		 *     *dst = sample | 1;
Packit 4a16fb
		 *   } while (old_sample != *sum);
Packit 4a16fb
		 */
Packit 4a16fb
Packit 4a16fb
		"3:"
Packit 4a16fb
		"\tmovl (%%ebx), %%ecx\n"
Packit 4a16fb
Packit 4a16fb
		"\tmovl $0x7fffff, %%eax\n"
Packit 4a16fb
		"\tmovl $-0x7fffff, %%edx\n"
Packit 4a16fb
		"\tcmpl %%eax, %%ecx\n"
Packit 4a16fb
		"\tcmovng %%ecx, %%eax\n"
Packit 4a16fb
		"\tcmpl %%edx, %%ecx\n"
Packit 4a16fb
		"\tcmovl %%edx, %%eax\n"
Packit 4a16fb
Packit 4a16fb
		"\torl $1, %%eax\n"
Packit 4a16fb
		"\tmovw %%ax, (%%edi)\n"
Packit 4a16fb
		"\tshrl $16, %%eax\n"
Packit 4a16fb
		"\tmovb %%al, 2(%%edi)\n"
Packit 4a16fb
Packit 4a16fb
		"\tcmpl %%ecx, (%%ebx)\n"
Packit 4a16fb
		"\tjnz 3b\n"
Packit 4a16fb
Packit 4a16fb
		/*
Packit 4a16fb
		 * while (size-- > 0)
Packit 4a16fb
		 */
Packit 4a16fb
		"\tadd %4, %%edi\n"
Packit 4a16fb
		"\tadd %5, %%esi\n"
Packit 4a16fb
		"\tadd %6, %%ebx\n"
Packit 4a16fb
		"\tdecl %0\n"
Packit 4a16fb
		"\tjnz 1b\n"
Packit 4a16fb
		
Packit 4a16fb
		"6:"
Packit 4a16fb
		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
Packit 4a16fb
Packit 4a16fb
		: /* no output regs */
Packit 4a16fb
		: "m" (size), "m" (dst), "m" (src),
Packit 4a16fb
		  "m" (sum), "m" (dst_step), "m" (src_step),
Packit 4a16fb
		  "m" (sum_step), "m" (old_ebx)
Packit 4a16fb
		: "esi", "edi", "edx", "ecx", "eax"
Packit 4a16fb
	);
Packit 4a16fb
}