|
Packit |
c32a2d |
/*
|
|
Packit |
c32a2d |
synth_stereo_neon_accurate: ARM NEON optimized synth (stereo specific, MPEG compliant 16-bit output version)
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
copyright 1995-2010 by the mpg123 project - free software under the terms of the LGPL 2.1
|
|
Packit |
c32a2d |
see COPYING and AUTHORS files in distribution or http://mpg123.org
|
|
Packit |
c32a2d |
initially written by Taihei Monma
|
|
Packit |
c32a2d |
*/
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#include "mangle.h"
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
#define WINDOW r0
|
|
Packit |
c32a2d |
#define B0L r1
|
|
Packit |
c32a2d |
#define B0R r2
|
|
Packit |
c32a2d |
#define SAMPLES r3
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
/*
|
|
Packit |
c32a2d |
int synth_1to1_s_neon_accurate_asm(real *window, real *b0l, real *b0r, real *samples, int bo1);
|
|
Packit |
c32a2d |
return value: number of clipped samples
|
|
Packit |
c32a2d |
*/
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
.code 32
|
|
Packit |
c32a2d |
#ifndef __APPLE__
|
|
Packit |
c32a2d |
.fpu neon
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
.text
|
|
Packit |
c32a2d |
.globl ASM_NAME(synth_1to1_s_neon_accurate_asm)
|
|
Packit |
c32a2d |
#ifdef __ELF__
|
|
Packit |
c32a2d |
.type ASM_NAME(synth_1to1_s_neon_accurate_asm), %function
|
|
Packit |
c32a2d |
#endif
|
|
Packit |
c32a2d |
ASM_NAME(synth_1to1_s_neon_accurate_asm):
|
|
Packit |
c32a2d |
push {r4-r7, lr}
|
|
Packit |
c32a2d |
vpush {q4-q7}
|
|
Packit |
c32a2d |
ldr r4, [sp, #84]
|
|
Packit |
c32a2d |
mov r7, sp
|
|
Packit |
c32a2d |
sub sp, sp, #16
|
|
Packit |
c32a2d |
bic sp, #0xff
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
add WINDOW, WINDOW, #64
|
|
Packit |
c32a2d |
sub WINDOW, WINDOW, r4, lsl #2
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
mov r4, #4
|
|
Packit |
c32a2d |
mov r5, #128
|
|
Packit |
c32a2d |
mov r6, #64
|
|
Packit |
c32a2d |
1:
|
|
Packit |
c32a2d |
vld1.32 {q0,q1}, [WINDOW], r5
|
|
Packit |
c32a2d |
vld1.32 {q2,q3}, [WINDOW]
|
|
Packit |
c32a2d |
vld1.32 {q4,q5}, [B0L, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q6,q7}, [B0R, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q8,q9}, [B0L, :128]
|
|
Packit |
c32a2d |
vld1.32 {q10,q11}, [B0R, :128]
|
|
Packit |
c32a2d |
vmul.f32 q12, q0, q4
|
|
Packit |
c32a2d |
vmul.f32 q13, q0, q6
|
|
Packit |
c32a2d |
vmul.f32 q14, q2, q8
|
|
Packit |
c32a2d |
vmul.f32 q15, q2, q10
|
|
Packit |
c32a2d |
vmla.f32 q12, q1, q5
|
|
Packit |
c32a2d |
vmla.f32 q13, q1, q7
|
|
Packit |
c32a2d |
vmla.f32 q14, q3, q9
|
|
Packit |
c32a2d |
vmla.f32 q15, q3, q11
|
|
Packit |
c32a2d |
sub WINDOW, WINDOW, #96
|
|
Packit |
c32a2d |
sub B0L, B0L, #32
|
|
Packit |
c32a2d |
sub B0R, B0R, #32
|
|
Packit |
c32a2d |
vld1.32 {q0,q1}, [WINDOW], r5
|
|
Packit |
c32a2d |
vld1.32 {q2,q3}, [WINDOW]
|
|
Packit |
c32a2d |
vld1.32 {q4,q5}, [B0L, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q6,q7}, [B0R, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q8,q9}, [B0L, :128]!
|
|
Packit |
c32a2d |
vld1.32 {q10,q11}, [B0R, :128]!
|
|
Packit |
c32a2d |
vmla.f32 q12, q0, q4
|
|
Packit |
c32a2d |
vmla.f32 q13, q0, q6
|
|
Packit |
c32a2d |
vmla.f32 q14, q2, q8
|
|
Packit |
c32a2d |
vmla.f32 q15, q2, q10
|
|
Packit |
c32a2d |
add WINDOW, WINDOW, #96
|
|
Packit |
c32a2d |
vmla.f32 q12, q1, q5
|
|
Packit |
c32a2d |
vmla.f32 q13, q1, q7
|
|
Packit |
c32a2d |
vmla.f32 q14, q3, q9
|
|
Packit |
c32a2d |
vmla.f32 q15, q3, q11
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
vld1.32 {q0,q1}, [WINDOW], r5
|
|
Packit |
c32a2d |
vld1.32 {q2,q3}, [WINDOW]
|
|
Packit |
c32a2d |
vld1.32 {q4,q5}, [B0L, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q6,q7}, [B0R, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q8,q9}, [B0L, :128]
|
|
Packit |
c32a2d |
vld1.32 {q10,q11}, [B0R, :128]
|
|
Packit |
c32a2d |
vpadd.f32 d24, d24, d25
|
|
Packit |
c32a2d |
vpadd.f32 d25, d26, d27
|
|
Packit |
c32a2d |
vpadd.f32 d26, d28, d29
|
|
Packit |
c32a2d |
vpadd.f32 d27, d30, d31
|
|
Packit |
c32a2d |
vmov.i32 q15, #0x4b000000
|
|
Packit |
c32a2d |
vmvn.i32 q14, #0xb9000000
|
|
Packit |
c32a2d |
vorr.i32 q15, #0x00400000
|
|
Packit |
c32a2d |
vpadd.f32 d24, d24, d25
|
|
Packit |
c32a2d |
vpadd.f32 d25, d26, d27
|
|
Packit |
c32a2d |
vacgt.f32 q14, q12, q14
|
|
Packit |
c32a2d |
vadd.f32 q13, q12, q15
|
|
Packit |
c32a2d |
vld1.32 {q15}, [sp, :128]
|
|
Packit |
c32a2d |
vshr.u32 q14, q14, #31
|
|
Packit |
c32a2d |
vshl.i32 q13, q13, #10
|
|
Packit |
c32a2d |
vadd.i32 q14, q14, q15
|
|
Packit |
c32a2d |
vqshrn.s32 d26, q13, #10
|
|
Packit |
c32a2d |
vst1.32 {q14}, [sp, :128]
|
|
Packit |
c32a2d |
vst1.16 {d26}, [SAMPLES]!
|
|
Packit |
c32a2d |
vmul.f32 q12, q0, q4
|
|
Packit |
c32a2d |
vmul.f32 q13, q0, q6
|
|
Packit |
c32a2d |
vmul.f32 q14, q2, q8
|
|
Packit |
c32a2d |
vmul.f32 q15, q2, q10
|
|
Packit |
c32a2d |
vmla.f32 q12, q1, q5
|
|
Packit |
c32a2d |
vmla.f32 q13, q1, q7
|
|
Packit |
c32a2d |
vmla.f32 q14, q3, q9
|
|
Packit |
c32a2d |
vmla.f32 q15, q3, q11
|
|
Packit |
c32a2d |
sub WINDOW, WINDOW, #96
|
|
Packit |
c32a2d |
sub B0L, B0L, #32
|
|
Packit |
c32a2d |
sub B0R, B0R, #32
|
|
Packit |
c32a2d |
vld1.32 {q0,q1}, [WINDOW], r5
|
|
Packit |
c32a2d |
vld1.32 {q2,q3}, [WINDOW]
|
|
Packit |
c32a2d |
vld1.32 {q4,q5}, [B0L, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q6,q7}, [B0R, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q8,q9}, [B0L, :128]!
|
|
Packit |
c32a2d |
vld1.32 {q10,q11}, [B0R, :128]!
|
|
Packit |
c32a2d |
vmla.f32 q12, q0, q4
|
|
Packit |
c32a2d |
vmla.f32 q13, q0, q6
|
|
Packit |
c32a2d |
vmla.f32 q14, q2, q8
|
|
Packit |
c32a2d |
vmla.f32 q15, q2, q10
|
|
Packit |
c32a2d |
add WINDOW, WINDOW, #96
|
|
Packit |
c32a2d |
vmla.f32 q12, q1, q5
|
|
Packit |
c32a2d |
vmla.f32 q13, q1, q7
|
|
Packit |
c32a2d |
vmla.f32 q14, q3, q9
|
|
Packit |
c32a2d |
vmla.f32 q15, q3, q11
|
|
Packit |
c32a2d |
vpadd.f32 d24, d24, d25
|
|
Packit |
c32a2d |
vpadd.f32 d25, d26, d27
|
|
Packit |
c32a2d |
vpadd.f32 d26, d28, d29
|
|
Packit |
c32a2d |
vpadd.f32 d27, d30, d31
|
|
Packit |
c32a2d |
vmov.i32 q15, #0x4b000000
|
|
Packit |
c32a2d |
vmvn.i32 q14, #0xb9000000
|
|
Packit |
c32a2d |
vorr.i32 q15, #0x00400000
|
|
Packit |
c32a2d |
vpadd.f32 d24, d24, d25
|
|
Packit |
c32a2d |
vpadd.f32 d25, d26, d27
|
|
Packit |
c32a2d |
vacgt.f32 q14, q12, q14
|
|
Packit |
c32a2d |
vadd.f32 q13, q12, q15
|
|
Packit |
c32a2d |
vld1.32 {q15}, [sp, :128]
|
|
Packit |
c32a2d |
vshr.u32 q14, q14, #31
|
|
Packit |
c32a2d |
vshl.i32 q13, q13, #10
|
|
Packit |
c32a2d |
vadd.i32 q14, q14, q15
|
|
Packit |
c32a2d |
vqshrn.s32 d26, q13, #10
|
|
Packit |
c32a2d |
vst1.32 {q14}, [sp, :128]
|
|
Packit |
c32a2d |
vst1.16 {d26}, [SAMPLES]!
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
subs r4, r4, #1
|
|
Packit |
c32a2d |
bne 1b
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
mov r4, #4
|
|
Packit |
c32a2d |
mov r6, #-64
|
|
Packit |
c32a2d |
1:
|
|
Packit |
c32a2d |
vld1.32 {q0,q1}, [WINDOW], r5
|
|
Packit |
c32a2d |
vld1.32 {q2,q3}, [WINDOW]
|
|
Packit |
c32a2d |
vld1.32 {q4,q5}, [B0L, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q6,q7}, [B0R, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q8,q9}, [B0L, :128]
|
|
Packit |
c32a2d |
vld1.32 {q10,q11}, [B0R, :128]
|
|
Packit |
c32a2d |
vmul.f32 q12, q0, q4
|
|
Packit |
c32a2d |
vmul.f32 q13, q0, q6
|
|
Packit |
c32a2d |
vmul.f32 q14, q2, q8
|
|
Packit |
c32a2d |
vmul.f32 q15, q2, q10
|
|
Packit |
c32a2d |
vmla.f32 q12, q1, q5
|
|
Packit |
c32a2d |
vmla.f32 q13, q1, q7
|
|
Packit |
c32a2d |
vmla.f32 q14, q3, q9
|
|
Packit |
c32a2d |
vmla.f32 q15, q3, q11
|
|
Packit |
c32a2d |
sub WINDOW, WINDOW, #96
|
|
Packit |
c32a2d |
add B0L, B0L, #96
|
|
Packit |
c32a2d |
add B0R, B0R, #96
|
|
Packit |
c32a2d |
vld1.32 {q0,q1}, [WINDOW], r5
|
|
Packit |
c32a2d |
vld1.32 {q2,q3}, [WINDOW]
|
|
Packit |
c32a2d |
vld1.32 {q4,q5}, [B0L, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q6,q7}, [B0R, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q8,q9}, [B0L, :128]
|
|
Packit |
c32a2d |
vld1.32 {q10,q11}, [B0R, :128]
|
|
Packit |
c32a2d |
vmla.f32 q12, q0, q4
|
|
Packit |
c32a2d |
vmla.f32 q13, q0, q6
|
|
Packit |
c32a2d |
vmla.f32 q14, q2, q8
|
|
Packit |
c32a2d |
vmla.f32 q15, q2, q10
|
|
Packit |
c32a2d |
add WINDOW, WINDOW, #96
|
|
Packit |
c32a2d |
sub B0L, B0L, #96
|
|
Packit |
c32a2d |
sub B0R, B0R, #96
|
|
Packit |
c32a2d |
vmla.f32 q12, q1, q5
|
|
Packit |
c32a2d |
vmla.f32 q13, q1, q7
|
|
Packit |
c32a2d |
vmla.f32 q14, q3, q9
|
|
Packit |
c32a2d |
vmla.f32 q15, q3, q11
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
vld1.32 {q0,q1}, [WINDOW], r5
|
|
Packit |
c32a2d |
vld1.32 {q2,q3}, [WINDOW]
|
|
Packit |
c32a2d |
vld1.32 {q4,q5}, [B0L, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q6,q7}, [B0R, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q8,q9}, [B0L, :128]
|
|
Packit |
c32a2d |
vld1.32 {q10,q11}, [B0R, :128]
|
|
Packit |
c32a2d |
vpadd.f32 d24, d24, d25
|
|
Packit |
c32a2d |
vpadd.f32 d25, d26, d27
|
|
Packit |
c32a2d |
vpadd.f32 d26, d28, d29
|
|
Packit |
c32a2d |
vpadd.f32 d27, d30, d31
|
|
Packit |
c32a2d |
vmov.i32 q15, #0x4b000000
|
|
Packit |
c32a2d |
vmvn.i32 q14, #0xb9000000
|
|
Packit |
c32a2d |
vorr.i32 q15, #0x00400000
|
|
Packit |
c32a2d |
vpadd.f32 d24, d24, d25
|
|
Packit |
c32a2d |
vpadd.f32 d25, d26, d27
|
|
Packit |
c32a2d |
vacgt.f32 q14, q12, q14
|
|
Packit |
c32a2d |
vadd.f32 q13, q12, q15
|
|
Packit |
c32a2d |
vld1.32 {q15}, [sp, :128]
|
|
Packit |
c32a2d |
vshr.u32 q14, q14, #31
|
|
Packit |
c32a2d |
vshl.i32 q13, q13, #10
|
|
Packit |
c32a2d |
vadd.i32 q14, q14, q15
|
|
Packit |
c32a2d |
vqshrn.s32 d26, q13, #10
|
|
Packit |
c32a2d |
vst1.32 {q14}, [sp, :128]
|
|
Packit |
c32a2d |
vst1.16 {d26}, [SAMPLES]!
|
|
Packit |
c32a2d |
vmul.f32 q12, q0, q4
|
|
Packit |
c32a2d |
vmul.f32 q13, q0, q6
|
|
Packit |
c32a2d |
vmul.f32 q14, q2, q8
|
|
Packit |
c32a2d |
vmul.f32 q15, q2, q10
|
|
Packit |
c32a2d |
vmla.f32 q12, q1, q5
|
|
Packit |
c32a2d |
vmla.f32 q13, q1, q7
|
|
Packit |
c32a2d |
vmla.f32 q14, q3, q9
|
|
Packit |
c32a2d |
vmla.f32 q15, q3, q11
|
|
Packit |
c32a2d |
sub WINDOW, WINDOW, #96
|
|
Packit |
c32a2d |
add B0L, B0L, #96
|
|
Packit |
c32a2d |
add B0R, B0R, #96
|
|
Packit |
c32a2d |
vld1.32 {q0,q1}, [WINDOW], r5
|
|
Packit |
c32a2d |
vld1.32 {q2,q3}, [WINDOW]
|
|
Packit |
c32a2d |
vld1.32 {q4,q5}, [B0L, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q6,q7}, [B0R, :128], r6
|
|
Packit |
c32a2d |
vld1.32 {q8,q9}, [B0L, :128]
|
|
Packit |
c32a2d |
vld1.32 {q10,q11}, [B0R, :128]
|
|
Packit |
c32a2d |
vmla.f32 q12, q0, q4
|
|
Packit |
c32a2d |
vmla.f32 q13, q0, q6
|
|
Packit |
c32a2d |
vmla.f32 q14, q2, q8
|
|
Packit |
c32a2d |
vmla.f32 q15, q2, q10
|
|
Packit |
c32a2d |
add WINDOW, WINDOW, #96
|
|
Packit |
c32a2d |
sub B0L, B0L, #96
|
|
Packit |
c32a2d |
sub B0R, B0R, #96
|
|
Packit |
c32a2d |
vmla.f32 q12, q1, q5
|
|
Packit |
c32a2d |
vmla.f32 q13, q1, q7
|
|
Packit |
c32a2d |
vmla.f32 q14, q3, q9
|
|
Packit |
c32a2d |
vmla.f32 q15, q3, q11
|
|
Packit |
c32a2d |
vpadd.f32 d24, d24, d25
|
|
Packit |
c32a2d |
vpadd.f32 d25, d26, d27
|
|
Packit |
c32a2d |
vpadd.f32 d26, d28, d29
|
|
Packit |
c32a2d |
vpadd.f32 d27, d30, d31
|
|
Packit |
c32a2d |
vmov.i32 q15, #0x4b000000
|
|
Packit |
c32a2d |
vmvn.i32 q14, #0xb9000000
|
|
Packit |
c32a2d |
vorr.i32 q15, #0x00400000
|
|
Packit |
c32a2d |
vpadd.f32 d24, d24, d25
|
|
Packit |
c32a2d |
vpadd.f32 d25, d26, d27
|
|
Packit |
c32a2d |
vacgt.f32 q14, q12, q14
|
|
Packit |
c32a2d |
vadd.f32 q13, q12, q15
|
|
Packit |
c32a2d |
vld1.32 {q15}, [sp, :128]
|
|
Packit |
c32a2d |
vshr.u32 q14, q14, #31
|
|
Packit |
c32a2d |
vshl.i32 q13, q13, #10
|
|
Packit |
c32a2d |
vadd.i32 q14, q14, q15
|
|
Packit |
c32a2d |
vqshrn.s32 d26, q13, #10
|
|
Packit |
c32a2d |
vst1.32 {q14}, [sp, :128]
|
|
Packit |
c32a2d |
vst1.16 {d26}, [SAMPLES]!
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
subs r4, r4, #1
|
|
Packit |
c32a2d |
bne 1b
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
vld1.32 {q0}, [sp, :128]
|
|
Packit |
c32a2d |
vpadd.i32 d0, d0, d1
|
|
Packit |
c32a2d |
vpadd.i32 d0, d0, d0
|
|
Packit |
c32a2d |
vmov.32 r0, d0[0]
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
mov sp, r7
|
|
Packit |
c32a2d |
vpop {q4-q7}
|
|
Packit |
c32a2d |
pop {r4-r7, pc}
|
|
Packit |
c32a2d |
|
|
Packit |
c32a2d |
NONEXEC_STACK
|