Blame src/libmpg123/synth_arm_accurate.S

Packit c32a2d
/*
Packit c32a2d
	synth_arm_accurate: ARM optimized synth (ISO compliant 16bit output version)
Packit c32a2d
Packit c32a2d
	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
Packit c32a2d
	see COPYING and AUTHORS files in distribution or http://mpg123.org
Packit c32a2d
	initially written by Taihei Monma
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#include "mangle.h"
Packit c32a2d
Packit c32a2d
#define WINDOW r0
Packit c32a2d
#define B0 r1
Packit c32a2d
#define SAMPLES r2
Packit c32a2d
#define REG_CLIP r4
Packit c32a2d
#define REG_MAX r14
Packit c32a2d
Packit c32a2d
/*
Packit c32a2d
	int synth_1to1_arm_accurate_asm(real *window, real *b0, short *samples, int bo1);
Packit c32a2d
	return value: number of clipped samples
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
	.code 32
Packit c32a2d
Packit c32a2d
	.text
Packit c32a2d
	ALIGN4
Packit c32a2d
	.globl ASM_NAME(synth_1to1_arm_accurate_asm)
Packit c32a2d
#ifdef __ELF__
Packit c32a2d
	.type ASM_NAME(synth_1to1_arm_accurate_asm), %function
Packit c32a2d
#endif
Packit c32a2d
ASM_NAME(synth_1to1_arm_accurate_asm):
Packit c32a2d
	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
Packit c32a2d
	
Packit c32a2d
	add		WINDOW, WINDOW, #64
Packit c32a2d
	sub		WINDOW, WINDOW, r3, lsl #2
Packit c32a2d
	eor		REG_CLIP, REG_CLIP, REG_CLIP
Packit c32a2d
	mov		REG_MAX, #1073741824
Packit c32a2d
	sub		REG_MAX, REG_MAX, #32768
Packit c32a2d
	
Packit c32a2d
	mov		r3, #16
Packit c32a2d
	
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
1:
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smull	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smull	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #68
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	
Packit c32a2d
	mov		r8, r8, lsr #24
Packit c32a2d
	mov		r12, r12, lsr #24
Packit c32a2d
	orr		r8, r8, r7, lsl #8
Packit c32a2d
	orr		r12, r12, r11, lsl #8
Packit c32a2d
	sub		r8, r8, r12
Packit c32a2d
	
Packit c32a2d
	cmp		r8, REG_MAX
Packit c32a2d
	movgt	r8, REG_MAX
Packit c32a2d
	addgt	REG_CLIP, REG_CLIP, #1
Packit c32a2d
	cmp		r8, #-1073741824
Packit c32a2d
	movlt	r8, #-1073741824
Packit c32a2d
	addlt	REG_CLIP, REG_CLIP, #1
Packit c32a2d
	movs	r8, r8, asr #15
Packit c32a2d
	adc		r8, r8, #0
Packit c32a2d
	strh	r8, [SAMPLES], #4
Packit c32a2d
	
Packit c32a2d
	subs	r3, r3, #1
Packit c32a2d
	bne		1b
Packit c32a2d
	
Packit c32a2d
	add		WINDOW, WINDOW, #4
Packit c32a2d
	add		B0, B0, #4
Packit c32a2d
	
Packit c32a2d
	ldr		r9, [WINDOW], #8
Packit c32a2d
	ldr		r10, [B0], #8
Packit c32a2d
	smull	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #8
Packit c32a2d
	ldr		r6, [B0], #8
Packit c32a2d
	smull	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #8
Packit c32a2d
	ldr		r10, [B0], #8
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #8
Packit c32a2d
	ldr		r6, [B0], #8
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #8
Packit c32a2d
	ldr		r10, [B0], #8
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #8
Packit c32a2d
	ldr		r6, [B0], #8
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #72
Packit c32a2d
	ldr		r10, [B0], #-120
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	
Packit c32a2d
	mov		r8, r8, lsr #24
Packit c32a2d
	mov		r12, r12, lsr #24
Packit c32a2d
	orr		r8, r8, r7, lsl #8
Packit c32a2d
	orr		r12, r12, r11, lsl #8
Packit c32a2d
	add		r8, r8, r12
Packit c32a2d
	
Packit c32a2d
	cmp		r8, REG_MAX
Packit c32a2d
	movgt	r8, REG_MAX
Packit c32a2d
	addgt	REG_CLIP, REG_CLIP, #1
Packit c32a2d
	cmp		r8, #-1073741824
Packit c32a2d
	movlt	r8, #-1073741824
Packit c32a2d
	addlt	REG_CLIP, REG_CLIP, #1
Packit c32a2d
	movs	r8, r8, asr #15
Packit c32a2d
	adc		r8, r8, #0
Packit c32a2d
	strh	r8, [SAMPLES], #4
Packit c32a2d
	
Packit c32a2d
	mov		r3, #14
Packit c32a2d
	
Packit c32a2d
1:
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smull	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smull	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #68
Packit c32a2d
	ldr		r10, [B0], #-124
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	
Packit c32a2d
	mov		r8, r8, lsr #24
Packit c32a2d
	mov		r12, r12, lsr #24
Packit c32a2d
	orr		r8, r8, r7, lsl #8
Packit c32a2d
	orr		r12, r12, r11, lsl #8
Packit c32a2d
	add		r8, r8, r12
Packit c32a2d
	
Packit c32a2d
	cmp		r8, REG_MAX
Packit c32a2d
	movgt	r8, REG_MAX
Packit c32a2d
	addgt	REG_CLIP, REG_CLIP, #1
Packit c32a2d
	cmp		r8, #-1073741824
Packit c32a2d
	movlt	r8, #-1073741824
Packit c32a2d
	addlt	REG_CLIP, REG_CLIP, #1
Packit c32a2d
	movs	r8, r8, asr #15
Packit c32a2d
	adc		r8, r8, #0
Packit c32a2d
	strh	r8, [SAMPLES], #4
Packit c32a2d
	
Packit c32a2d
	subs	r3, r3, #1
Packit c32a2d
	bne		1b
Packit c32a2d
	
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smull	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smull	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #4
Packit c32a2d
	ldr		r10, [B0], #4
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	ldr		r5, [WINDOW], #4
Packit c32a2d
	ldr		r6, [B0], #4
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	ldr		r9, [WINDOW], #68
Packit c32a2d
	ldr		r10, [B0], #-124
Packit c32a2d
	smlal	r8, r7, r5, r6
Packit c32a2d
	smlal	r12, r11, r9, r10
Packit c32a2d
	
Packit c32a2d
	mov		r8, r8, lsr #24
Packit c32a2d
	mov		r12, r12, lsr #24
Packit c32a2d
	orr		r8, r8, r7, lsl #8
Packit c32a2d
	orr		r12, r12, r11, lsl #8
Packit c32a2d
	add		r8, r8, r12
Packit c32a2d
	
Packit c32a2d
	cmp		r8, REG_MAX
Packit c32a2d
	movgt	r8, REG_MAX
Packit c32a2d
	addgt	REG_CLIP, REG_CLIP, #1
Packit c32a2d
	cmp		r8, #-1073741824
Packit c32a2d
	movlt	r8, #-1073741824
Packit c32a2d
	addlt	REG_CLIP, REG_CLIP, #1
Packit c32a2d
	movs	r8, r8, asr #15
Packit c32a2d
	adc		r8, r8, #0
Packit c32a2d
	strh	r8, [SAMPLES]
Packit c32a2d
	
Packit c32a2d
	mov		r0, REG_CLIP
Packit c32a2d
	
Packit c32a2d
	ldmfd   sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
Packit c32a2d
Packit c32a2d
NONEXEC_STACK