|
Packit |
4a16fb |
/**
|
|
Packit |
4a16fb |
* \file pcm/pcm_dmix_i386.h
|
|
Packit |
4a16fb |
* \ingroup PCM_Plugins
|
|
Packit |
4a16fb |
* \brief PCM Direct Stream Mixing (dmix) Plugin Interface - I386 assembler code
|
|
Packit |
4a16fb |
* \author Jaroslav Kysela <perex@perex.cz>
|
|
Packit |
4a16fb |
* \date 2003
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* PCM - Direct Stream Mixing
|
|
Packit |
4a16fb |
* Copyright (c) 2003 by Jaroslav Kysela <perex@perex.cz>
|
|
Packit |
4a16fb |
*
|
|
Packit |
4a16fb |
*
|
|
Packit |
4a16fb |
* This library is free software; you can redistribute it and/or modify
|
|
Packit |
4a16fb |
* it under the terms of the GNU Lesser General Public License as
|
|
Packit |
4a16fb |
* published by the Free Software Foundation; either version 2.1 of
|
|
Packit |
4a16fb |
* the License, or (at your option) any later version.
|
|
Packit |
4a16fb |
*
|
|
Packit |
4a16fb |
* This program is distributed in the hope that it will be useful,
|
|
Packit |
4a16fb |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
4a16fb |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
Packit |
4a16fb |
* GNU Lesser General Public License for more details.
|
|
Packit |
4a16fb |
*
|
|
Packit |
4a16fb |
* You should have received a copy of the GNU Lesser General Public
|
|
Packit |
4a16fb |
* License along with this library; if not, write to the Free Software
|
|
Packit |
4a16fb |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
Packit |
4a16fb |
*
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* for plain i386
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
static void MIX_AREAS_16(unsigned int size,
|
|
Packit |
4a16fb |
volatile signed short *dst, signed short *src,
|
|
Packit |
4a16fb |
volatile signed int *sum, size_t dst_step,
|
|
Packit |
4a16fb |
size_t src_step, size_t sum_step)
|
|
Packit |
4a16fb |
{
|
|
Packit |
4a16fb |
unsigned int old_ebx;
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* ESI - src
|
|
Packit |
4a16fb |
* EDI - dst
|
|
Packit |
4a16fb |
* EBX - sum
|
|
Packit |
4a16fb |
* ECX - old sample
|
|
Packit |
4a16fb |
* EAX - sample / temporary
|
|
Packit |
4a16fb |
* EDX - temporary
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
__asm__ __volatile__ (
|
|
Packit |
4a16fb |
"\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* initialization, load ESI, EDI, EBX registers
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovl %1, %%edi\n"
|
|
Packit |
4a16fb |
"\tmovl %2, %%esi\n"
|
|
Packit |
4a16fb |
"\tmovl %3, %%ebx\n"
|
|
Packit |
4a16fb |
"\tcmpl $0, %0\n"
|
|
Packit |
4a16fb |
"\tjnz 2f\n"
|
|
Packit |
4a16fb |
"\tjmp 7f\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* for (;;)
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\t.p2align 4,,15\n"
|
|
Packit |
4a16fb |
"1:"
|
|
Packit |
4a16fb |
"\tadd %4, %%edi\n"
|
|
Packit |
4a16fb |
"\tadd %5, %%esi\n"
|
|
Packit |
4a16fb |
"\tadd %6, %%ebx\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* sample = *src;
|
|
Packit |
4a16fb |
* sum_sample = *sum;
|
|
Packit |
4a16fb |
* if (cmpxchg(*dst, 0, 1) == 0)
|
|
Packit |
4a16fb |
* sample -= sum_sample;
|
|
Packit |
4a16fb |
* xadd(*sum, sample);
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"2:"
|
|
Packit |
4a16fb |
"\tmovw $0, %%ax\n"
|
|
Packit |
4a16fb |
"\tmovw $1, %%cx\n"
|
|
Packit |
4a16fb |
"\tmovl (%%ebx), %%edx\n"
|
|
Packit |
4a16fb |
"\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
|
|
Packit |
4a16fb |
"\tmovswl (%%esi), %%ecx\n"
|
|
Packit |
4a16fb |
"\tjnz 3f\n"
|
|
Packit |
4a16fb |
"\t" XSUB " %%edx, %%ecx\n"
|
|
Packit |
4a16fb |
"3:"
|
|
Packit |
4a16fb |
"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* do {
|
|
Packit |
4a16fb |
* sample = old_sample = *sum;
|
|
Packit |
4a16fb |
* saturate(v);
|
|
Packit |
4a16fb |
* *dst = sample;
|
|
Packit |
4a16fb |
* } while (v != *sum);
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"4:"
|
|
Packit |
4a16fb |
"\tmovl (%%ebx), %%ecx\n"
|
|
Packit |
4a16fb |
"\tcmpl $0x7fff,%%ecx\n"
|
|
Packit |
4a16fb |
"\tjg 5f\n"
|
|
Packit |
4a16fb |
"\tcmpl $-0x8000,%%ecx\n"
|
|
Packit |
4a16fb |
"\tjl 6f\n"
|
|
Packit |
4a16fb |
"\tmovw %%cx, (%%edi)\n"
|
|
Packit |
4a16fb |
"\tcmpl %%ecx, (%%ebx)\n"
|
|
Packit |
4a16fb |
"\tjnz 4b\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* while (size-- > 0)
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tdecl %0\n"
|
|
Packit |
4a16fb |
"\tjnz 1b\n"
|
|
Packit |
4a16fb |
"\tjmp 7f\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* sample > 0x7fff
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\t.p2align 4,,15\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"5:"
|
|
Packit |
4a16fb |
"\tmovw $0x7fff, (%%edi)\n"
|
|
Packit |
4a16fb |
"\tcmpl %%ecx,(%%ebx)\n"
|
|
Packit |
4a16fb |
"\tjnz 4b\n"
|
|
Packit |
4a16fb |
"\tdecl %0\n"
|
|
Packit |
4a16fb |
"\tjnz 1b\n"
|
|
Packit |
4a16fb |
"\tjmp 7f\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* sample < -0x8000
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\t.p2align 4,,15\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"6:"
|
|
Packit |
4a16fb |
"\tmovw $-0x8000, (%%edi)\n"
|
|
Packit |
4a16fb |
"\tcmpl %%ecx, (%%ebx)\n"
|
|
Packit |
4a16fb |
"\tjnz 4b\n"
|
|
Packit |
4a16fb |
"\tdecl %0\n"
|
|
Packit |
4a16fb |
"\tjnz 1b\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"7:"
|
|
Packit |
4a16fb |
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
: /* no output regs */
|
|
Packit |
4a16fb |
: "m" (size), "m" (dst), "m" (src),
|
|
Packit |
4a16fb |
"m" (sum), "m" (dst_step), "m" (src_step),
|
|
Packit |
4a16fb |
"m" (sum_step), "m" (old_ebx)
|
|
Packit |
4a16fb |
: "esi", "edi", "edx", "ecx", "eax"
|
|
Packit |
4a16fb |
);
|
|
Packit |
4a16fb |
}
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* MMX optimized
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
static void MIX_AREAS_16_MMX(unsigned int size,
|
|
Packit |
4a16fb |
volatile signed short *dst, signed short *src,
|
|
Packit |
4a16fb |
volatile signed int *sum, size_t dst_step,
|
|
Packit |
4a16fb |
size_t src_step, size_t sum_step)
|
|
Packit |
4a16fb |
{
|
|
Packit |
4a16fb |
unsigned int old_ebx;
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* ESI - src
|
|
Packit |
4a16fb |
* EDI - dst
|
|
Packit |
4a16fb |
* EBX - sum
|
|
Packit |
4a16fb |
* ECX - old sample
|
|
Packit |
4a16fb |
* EAX - sample / temporary
|
|
Packit |
4a16fb |
* EDX - temporary
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
__asm__ __volatile__ (
|
|
Packit |
4a16fb |
"\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* initialization, load ESI, EDI, EBX registers
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovl %1, %%edi\n"
|
|
Packit |
4a16fb |
"\tmovl %2, %%esi\n"
|
|
Packit |
4a16fb |
"\tmovl %3, %%ebx\n"
|
|
Packit |
4a16fb |
"\tcmpl $0, %0\n"
|
|
Packit |
4a16fb |
"\tjnz 2f\n"
|
|
Packit |
4a16fb |
"\tjmp 5f\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\t.p2align 4,,15\n"
|
|
Packit |
4a16fb |
"1:"
|
|
Packit |
4a16fb |
"\tadd %4, %%edi\n"
|
|
Packit |
4a16fb |
"\tadd %5, %%esi\n"
|
|
Packit |
4a16fb |
"\tadd %6, %%ebx\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"2:"
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* sample = *src;
|
|
Packit |
4a16fb |
* sum_sample = *sum;
|
|
Packit |
4a16fb |
* if (cmpxchg(*dst, 0, 1) == 0)
|
|
Packit |
4a16fb |
* sample -= sum_sample;
|
|
Packit |
4a16fb |
* xadd(*sum, sample);
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovw $0, %%ax\n"
|
|
Packit |
4a16fb |
"\tmovw $1, %%cx\n"
|
|
Packit |
4a16fb |
"\tmovl (%%ebx), %%edx\n"
|
|
Packit |
4a16fb |
"\t" LOCK_PREFIX "cmpxchgw %%cx, (%%edi)\n"
|
|
Packit |
4a16fb |
"\tmovswl (%%esi), %%ecx\n"
|
|
Packit |
4a16fb |
"\tjnz 3f\n"
|
|
Packit |
4a16fb |
"\t" XSUB " %%edx, %%ecx\n"
|
|
Packit |
4a16fb |
"3:"
|
|
Packit |
4a16fb |
"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* do {
|
|
Packit |
4a16fb |
* sample = old_sample = *sum;
|
|
Packit |
4a16fb |
* saturate(v);
|
|
Packit |
4a16fb |
* *dst = sample;
|
|
Packit |
4a16fb |
* } while (v != *sum);
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"4:"
|
|
Packit |
4a16fb |
"\tmovl (%%ebx), %%ecx\n"
|
|
Packit |
4a16fb |
"\tmovd %%ecx, %%mm0\n"
|
|
Packit |
4a16fb |
"\tpackssdw %%mm1, %%mm0\n"
|
|
Packit |
4a16fb |
"\tmovd %%mm0, %%eax\n"
|
|
Packit |
4a16fb |
"\tmovw %%ax, (%%edi)\n"
|
|
Packit |
4a16fb |
"\tcmpl %%ecx, (%%ebx)\n"
|
|
Packit |
4a16fb |
"\tjnz 4b\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* while (size-- > 0)
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tdecl %0\n"
|
|
Packit |
4a16fb |
"\tjnz 1b\n"
|
|
Packit |
4a16fb |
"\temms\n"
|
|
Packit |
4a16fb |
"5:"
|
|
Packit |
4a16fb |
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
: /* no output regs */
|
|
Packit |
4a16fb |
: "m" (size), "m" (dst), "m" (src),
|
|
Packit |
4a16fb |
"m" (sum), "m" (dst_step), "m" (src_step),
|
|
Packit |
4a16fb |
"m" (sum_step), "m" (old_ebx)
|
|
Packit |
4a16fb |
: "esi", "edi", "edx", "ecx", "eax"
|
|
Packit |
4a16fb |
);
|
|
Packit |
4a16fb |
}
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* for plain i386, 32-bit version (24-bit resolution)
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
static void MIX_AREAS_32(unsigned int size,
|
|
Packit |
4a16fb |
volatile signed int *dst, signed int *src,
|
|
Packit |
4a16fb |
volatile signed int *sum, size_t dst_step,
|
|
Packit |
4a16fb |
size_t src_step, size_t sum_step)
|
|
Packit |
4a16fb |
{
|
|
Packit |
4a16fb |
unsigned int old_ebx;
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* ESI - src
|
|
Packit |
4a16fb |
* EDI - dst
|
|
Packit |
4a16fb |
* EBX - sum
|
|
Packit |
4a16fb |
* ECX - old sample
|
|
Packit |
4a16fb |
* EAX - sample / temporary
|
|
Packit |
4a16fb |
* EDX - temporary
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
__asm__ __volatile__ (
|
|
Packit |
4a16fb |
"\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* initialization, load ESI, EDI, EBX registers
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovl %1, %%edi\n"
|
|
Packit |
4a16fb |
"\tmovl %2, %%esi\n"
|
|
Packit |
4a16fb |
"\tmovl %3, %%ebx\n"
|
|
Packit |
4a16fb |
"\tcmpl $0, %0\n"
|
|
Packit |
4a16fb |
"\tjnz 1f\n"
|
|
Packit |
4a16fb |
"\tjmp 6f\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\t.p2align 4,,15\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"1:"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* sample = *src;
|
|
Packit |
4a16fb |
* sum_sample = *sum;
|
|
Packit |
4a16fb |
* if (cmpxchg(*dst, 0, 1) == 0)
|
|
Packit |
4a16fb |
* sample -= sum_sample;
|
|
Packit |
4a16fb |
* xadd(*sum, sample);
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovl $0, %%eax\n"
|
|
Packit |
4a16fb |
"\tmovl $1, %%ecx\n"
|
|
Packit |
4a16fb |
"\tmovl (%%ebx), %%edx\n"
|
|
Packit |
4a16fb |
"\t" LOCK_PREFIX "cmpxchgl %%ecx, (%%edi)\n"
|
|
Packit |
4a16fb |
"\tjnz 2f\n"
|
|
Packit |
4a16fb |
"\tmovl (%%esi), %%ecx\n"
|
|
Packit |
4a16fb |
/* sample >>= 8 */
|
|
Packit |
4a16fb |
"\tsarl $8, %%ecx\n"
|
|
Packit |
4a16fb |
"\t" XSUB " %%edx, %%ecx\n"
|
|
Packit |
4a16fb |
"\tjmp 21f\n"
|
|
Packit |
4a16fb |
"2:"
|
|
Packit |
4a16fb |
"\tmovl (%%esi), %%ecx\n"
|
|
Packit |
4a16fb |
/* sample >>= 8 */
|
|
Packit |
4a16fb |
"\tsarl $8, %%ecx\n"
|
|
Packit |
4a16fb |
"21:"
|
|
Packit |
4a16fb |
"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* do {
|
|
Packit |
4a16fb |
* sample = old_sample = *sum;
|
|
Packit |
4a16fb |
* saturate(v);
|
|
Packit |
4a16fb |
* *dst = sample;
|
|
Packit |
4a16fb |
* } while (v != *sum);
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"3:"
|
|
Packit |
4a16fb |
"\tmovl (%%ebx), %%ecx\n"
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* if (sample > 0x7fff00)
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovl $0x7fffff, %%eax\n"
|
|
Packit |
4a16fb |
"\tcmpl %%eax, %%ecx\n"
|
|
Packit |
4a16fb |
"\tjg 4f\n"
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* if (sample < -0x800000)
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovl $-0x800000, %%eax\n"
|
|
Packit |
4a16fb |
"\tcmpl %%eax, %%ecx\n"
|
|
Packit |
4a16fb |
"\tjl 4f\n"
|
|
Packit |
4a16fb |
"\tmovl %%ecx, %%eax\n"
|
|
Packit |
4a16fb |
"4:"
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* sample <<= 8;
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tsall $8, %%eax\n"
|
|
Packit |
4a16fb |
"\tmovl %%eax, (%%edi)\n"
|
|
Packit |
4a16fb |
"\tcmpl %%ecx, (%%ebx)\n"
|
|
Packit |
4a16fb |
"\tjnz 3b\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* while (size-- > 0)
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tdecl %0\n"
|
|
Packit |
4a16fb |
"\tjz 6f\n"
|
|
Packit |
4a16fb |
"\tadd %4, %%edi\n"
|
|
Packit |
4a16fb |
"\tadd %5, %%esi\n"
|
|
Packit |
4a16fb |
"\tadd %6, %%ebx\n"
|
|
Packit |
4a16fb |
"\tjmp 1b\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"6:"
|
|
Packit |
4a16fb |
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
: /* no output regs */
|
|
Packit |
4a16fb |
: "m" (size), "m" (dst), "m" (src),
|
|
Packit |
4a16fb |
"m" (sum), "m" (dst_step), "m" (src_step),
|
|
Packit |
4a16fb |
"m" (sum_step), "m" (old_ebx)
|
|
Packit |
4a16fb |
: "esi", "edi", "edx", "ecx", "eax"
|
|
Packit |
4a16fb |
);
|
|
Packit |
4a16fb |
}
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* 24-bit version for plain i386
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
static void MIX_AREAS_24(unsigned int size,
|
|
Packit |
4a16fb |
volatile unsigned char *dst, unsigned char *src,
|
|
Packit |
4a16fb |
volatile signed int *sum, size_t dst_step,
|
|
Packit |
4a16fb |
size_t src_step, size_t sum_step)
|
|
Packit |
4a16fb |
{
|
|
Packit |
4a16fb |
unsigned int old_ebx;
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* ESI - src
|
|
Packit |
4a16fb |
* EDI - dst
|
|
Packit |
4a16fb |
* EBX - sum
|
|
Packit |
4a16fb |
* ECX - old sample
|
|
Packit |
4a16fb |
* EAX - sample / temporary
|
|
Packit |
4a16fb |
* EDX - temporary
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
__asm__ __volatile__ (
|
|
Packit |
4a16fb |
"\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* initialization, load ESI, EDI, EBX registers
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovl %1, %%edi\n"
|
|
Packit |
4a16fb |
"\tmovl %2, %%esi\n"
|
|
Packit |
4a16fb |
"\tmovl %3, %%ebx\n"
|
|
Packit |
4a16fb |
"\tcmpl $0, %0\n"
|
|
Packit |
4a16fb |
"\tjnz 1f\n"
|
|
Packit |
4a16fb |
"\tjmp 6f\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\t.p2align 4,,15\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"1:"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* sample = *src;
|
|
Packit |
4a16fb |
* sum_sample = *sum;
|
|
Packit |
4a16fb |
* if (test_and_set_bit(0, dst) == 0)
|
|
Packit |
4a16fb |
* sample -= sum_sample;
|
|
Packit |
4a16fb |
* *sum += sample;
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovsbl 2(%%esi), %%eax\n"
|
|
Packit |
4a16fb |
"\tmovzwl (%%esi), %%ecx\n"
|
|
Packit |
4a16fb |
"\tmovl (%%ebx), %%edx\n"
|
|
Packit |
4a16fb |
"\tsall $16, %%eax\n"
|
|
Packit |
4a16fb |
"\torl %%eax, %%ecx\n"
|
|
Packit |
4a16fb |
"\t" LOCK_PREFIX "btsw $0, (%%edi)\n"
|
|
Packit |
4a16fb |
"\tjc 2f\n"
|
|
Packit |
4a16fb |
"\t" XSUB " %%edx, %%ecx\n"
|
|
Packit |
4a16fb |
"2:"
|
|
Packit |
4a16fb |
"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* do {
|
|
Packit |
4a16fb |
* sample = old_sample = *sum;
|
|
Packit |
4a16fb |
* saturate(sample);
|
|
Packit |
4a16fb |
* *dst = sample | 1;
|
|
Packit |
4a16fb |
* } while (old_sample != *sum);
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"3:"
|
|
Packit |
4a16fb |
"\tmovl (%%ebx), %%ecx\n"
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* if (sample > 0x7fffff)
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovl $0x7fffff, %%eax\n"
|
|
Packit |
4a16fb |
"\tcmpl %%eax, %%ecx\n"
|
|
Packit |
4a16fb |
"\tjg 4f\n"
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* if (sample < -0x7fffff)
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovl $-0x7fffff, %%eax\n"
|
|
Packit |
4a16fb |
"\tcmpl %%eax, %%ecx\n"
|
|
Packit |
4a16fb |
"\tjl 4f\n"
|
|
Packit |
4a16fb |
"\tmovl %%ecx, %%eax\n"
|
|
Packit |
4a16fb |
"\torl $1, %%eax\n"
|
|
Packit |
4a16fb |
"4:"
|
|
Packit |
4a16fb |
"\tmovw %%ax, (%%edi)\n"
|
|
Packit |
4a16fb |
"\tshrl $16, %%eax\n"
|
|
Packit |
4a16fb |
"\tmovb %%al, 2(%%edi)\n"
|
|
Packit |
4a16fb |
"\tcmpl %%ecx, (%%ebx)\n"
|
|
Packit |
4a16fb |
"\tjnz 3b\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* while (size-- > 0)
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tdecl %0\n"
|
|
Packit |
4a16fb |
"\tjz 6f\n"
|
|
Packit |
4a16fb |
"\tadd %4, %%edi\n"
|
|
Packit |
4a16fb |
"\tadd %5, %%esi\n"
|
|
Packit |
4a16fb |
"\tadd %6, %%ebx\n"
|
|
Packit |
4a16fb |
"\tjmp 1b\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"6:"
|
|
Packit |
4a16fb |
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
: /* no output regs */
|
|
Packit |
4a16fb |
: "m" (size), "m" (dst), "m" (src),
|
|
Packit |
4a16fb |
"m" (sum), "m" (dst_step), "m" (src_step),
|
|
Packit |
4a16fb |
"m" (sum_step), "m" (old_ebx)
|
|
Packit |
4a16fb |
: "esi", "edi", "edx", "ecx", "eax"
|
|
Packit |
4a16fb |
);
|
|
Packit |
4a16fb |
}
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* 24-bit version for Pentium Pro/II
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
static void MIX_AREAS_24_CMOV(unsigned int size,
|
|
Packit |
4a16fb |
volatile unsigned char *dst, unsigned char *src,
|
|
Packit |
4a16fb |
volatile signed int *sum, size_t dst_step,
|
|
Packit |
4a16fb |
size_t src_step, size_t sum_step)
|
|
Packit |
4a16fb |
{
|
|
Packit |
4a16fb |
unsigned int old_ebx;
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* ESI - src
|
|
Packit |
4a16fb |
* EDI - dst
|
|
Packit |
4a16fb |
* EBX - sum
|
|
Packit |
4a16fb |
* ECX - old sample
|
|
Packit |
4a16fb |
* EAX - sample / temporary
|
|
Packit |
4a16fb |
* EDX - temporary
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
__asm__ __volatile__ (
|
|
Packit |
4a16fb |
"\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* initialization, load ESI, EDI, EBX registers
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovl %1, %%edi\n"
|
|
Packit |
4a16fb |
"\tmovl %2, %%esi\n"
|
|
Packit |
4a16fb |
"\tmovl %3, %%ebx\n"
|
|
Packit |
4a16fb |
"\tcmpl $0, %0\n"
|
|
Packit |
4a16fb |
"\tjz 6f\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\t.p2align 4,,15\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"1:"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* sample = *src;
|
|
Packit |
4a16fb |
* sum_sample = *sum;
|
|
Packit |
4a16fb |
* if (test_and_set_bit(0, dst) == 0)
|
|
Packit |
4a16fb |
* sample -= sum_sample;
|
|
Packit |
4a16fb |
* *sum += sample;
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tmovsbl 2(%%esi), %%eax\n"
|
|
Packit |
4a16fb |
"\tmovzwl (%%esi), %%ecx\n"
|
|
Packit |
4a16fb |
"\tmovl (%%ebx), %%edx\n"
|
|
Packit |
4a16fb |
"\tsall $16, %%eax\n"
|
|
Packit |
4a16fb |
"\t" LOCK_PREFIX "btsw $0, (%%edi)\n"
|
|
Packit |
4a16fb |
"\tleal (%%ecx,%%eax,1), %%ecx\n"
|
|
Packit |
4a16fb |
"\tjc 2f\n"
|
|
Packit |
4a16fb |
"\t" XSUB " %%edx, %%ecx\n"
|
|
Packit |
4a16fb |
"2:"
|
|
Packit |
4a16fb |
"\t" LOCK_PREFIX XADD " %%ecx, (%%ebx)\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* do {
|
|
Packit |
4a16fb |
* sample = old_sample = *sum;
|
|
Packit |
4a16fb |
* saturate(sample);
|
|
Packit |
4a16fb |
* *dst = sample | 1;
|
|
Packit |
4a16fb |
* } while (old_sample != *sum);
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"3:"
|
|
Packit |
4a16fb |
"\tmovl (%%ebx), %%ecx\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\tmovl $0x7fffff, %%eax\n"
|
|
Packit |
4a16fb |
"\tmovl $-0x7fffff, %%edx\n"
|
|
Packit |
4a16fb |
"\tcmpl %%eax, %%ecx\n"
|
|
Packit |
4a16fb |
"\tcmovng %%ecx, %%eax\n"
|
|
Packit |
4a16fb |
"\tcmpl %%edx, %%ecx\n"
|
|
Packit |
4a16fb |
"\tcmovl %%edx, %%eax\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\torl $1, %%eax\n"
|
|
Packit |
4a16fb |
"\tmovw %%ax, (%%edi)\n"
|
|
Packit |
4a16fb |
"\tshrl $16, %%eax\n"
|
|
Packit |
4a16fb |
"\tmovb %%al, 2(%%edi)\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"\tcmpl %%ecx, (%%ebx)\n"
|
|
Packit |
4a16fb |
"\tjnz 3b\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
/*
|
|
Packit |
4a16fb |
* while (size-- > 0)
|
|
Packit |
4a16fb |
*/
|
|
Packit |
4a16fb |
"\tadd %4, %%edi\n"
|
|
Packit |
4a16fb |
"\tadd %5, %%esi\n"
|
|
Packit |
4a16fb |
"\tadd %6, %%ebx\n"
|
|
Packit |
4a16fb |
"\tdecl %0\n"
|
|
Packit |
4a16fb |
"\tjnz 1b\n"
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
"6:"
|
|
Packit |
4a16fb |
"\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */
|
|
Packit |
4a16fb |
|
|
Packit |
4a16fb |
: /* no output regs */
|
|
Packit |
4a16fb |
: "m" (size), "m" (dst), "m" (src),
|
|
Packit |
4a16fb |
"m" (sum), "m" (dst_step), "m" (src_step),
|
|
Packit |
4a16fb |
"m" (sum_step), "m" (old_ebx)
|
|
Packit |
4a16fb |
: "esi", "edi", "edx", "ecx", "eax"
|
|
Packit |
4a16fb |
);
|
|
Packit |
4a16fb |
}
|