Blame libmp3lame/i386/fftfpu.nas

Packit 47f805
; back port from GOGO-no coda 2.24b by Takehiro TOMINAGA
Packit 47f805
Packit 47f805
; GOGO-no-coda
Packit 47f805
;	Copyright (C) 1999 shigeo
Packit 47f805
;	special thanks to URURI
Packit 47f805
Packit 47f805
%include "nasm.h"
Packit 47f805
Packit 47f805
	externdef costab_fft
Packit 47f805
	externdef sintab_fft
Packit 47f805
Packit 47f805
	segment_data
Packit 47f805
	align 32
Packit 47f805
D_1_41421	dd	1.41421356
Packit 47f805
D_1_0	dd	1.0
Packit 47f805
D_0_5	dd	0.5
Packit 47f805
D_0_25	dd	0.25
Packit 47f805
D_0_0005	dd	0.0005
Packit 47f805
D_0_0	dd	0.0
Packit 47f805
Packit 47f805
	segment_code
Packit 47f805
Packit 47f805
;void fht(float *fz, int n);
Packit 47f805
proc	fht_FPU
Packit 47f805
Packit 47f805
%$fz	arg	4
Packit 47f805
%$n	arg	4
Packit 47f805
Packit 47f805
%$k	local	4
Packit 47f805
Packit 47f805
%$f0	local	4
Packit 47f805
%$f1	local	4
Packit 47f805
%$f2	local	4
Packit 47f805
%$f3	local	4
Packit 47f805
Packit 47f805
%$g0	local	4
Packit 47f805
%$g1	local	4
Packit 47f805
%$g2	local	4
Packit 47f805
%$g3	local	4
Packit 47f805
Packit 47f805
%$s1	local	4
Packit 47f805
%$c1	local	4
Packit 47f805
%$s2	local	4
Packit 47f805
%$c2	local	4
Packit 47f805
Packit 47f805
%$t_s	local	4
Packit 47f805
%$t_c	local	4
Packit 47f805
	alloc
Packit 47f805
Packit 47f805
	pushd	ebp, ebx, esi, edi
Packit 47f805
Packit 47f805
fht_FPU_1st_part:
Packit 47f805
Packit 47f805
fht_FPU_2nd_part:
Packit 47f805
Packit 47f805
fht_FPU_3rd_part:
Packit 47f805
Packit 47f805
.do_init:
Packit 47f805
	mov	r3, 16		;k1*fsize = 4*fsize = k4
Packit 47f805
	mov	r4, 8		;kx = k1/2
Packit 47f805
	mov	r2, 48		;k3*fsize
Packit 47f805
	mov	dword [sp(%$k)], 2	;k = 2
Packit 47f805
	mov	r0, [sp(%$fz)]	;fi
Packit 47f805
	lea	r1, [r0+8]		;gi = fi + kx
Packit 47f805
Packit 47f805
.do:
Packit 47f805
.do2:
Packit 47f805
	;f
Packit 47f805
	fld	dword [r0]
Packit 47f805
	fsub	dword [r0+r3]
Packit 47f805
Packit 47f805
	fld	dword [r0]
Packit 47f805
	fadd	dword [r0+r3]
Packit 47f805
Packit 47f805
	fld	dword [r0+r3*2]
Packit 47f805
	fsub	dword [r0+r2]
Packit 47f805
Packit 47f805
	fld	dword [r0+r3*2]
Packit 47f805
	fadd	dword [r0+r2]		;f2 f3 f0 f1
Packit 47f805
Packit 47f805
	fld	st2			;f0 f2 f3 f0 f1
Packit 47f805
	fadd	st0, st1
Packit 47f805
	fstp	dword [r0]		;fi[0]
Packit 47f805
Packit 47f805
	fld	st3			;f1 f2 f3 f0 f1
Packit 47f805
	fadd	st0, st2
Packit 47f805
	fstp	dword [r0+r3]		;fi[k1]
Packit 47f805
Packit 47f805
	fsubr	st0, st2		;f0-f2 f3 f0 f1
Packit 47f805
	fstp	dword [r0+r3*2]		;fi[k2]
Packit 47f805
Packit 47f805
	fsubr	st0, st2		;f1-f3 f0 f1
Packit 47f805
	fstp	dword [r0+r2]		;fi[k3]
Packit 47f805
	fcompp
Packit 47f805
Packit 47f805
	;g
Packit 47f805
	fld	dword [r1]
Packit 47f805
	fsub	dword [r1+r3]
Packit 47f805
Packit 47f805
	fld	dword [r1]
Packit 47f805
	fadd	dword [r1+r3]
Packit 47f805
Packit 47f805
	fld	dword [D_1_41421]
Packit 47f805
	fmul	dword [r1+r2]
Packit 47f805
Packit 47f805
	fld	dword [D_1_41421]
Packit 47f805
	fmul	dword [r1+r3*2]		;g2 g3 g0 g1
Packit 47f805
Packit 47f805
	fld	st2			;g0 g2 g3 g0 g1
Packit 47f805
	fadd	st0, st1
Packit 47f805
	fstp	dword [r1]		;gi[0]
Packit 47f805
Packit 47f805
	fld	st3			;g1 g2 g3 g0 g1
Packit 47f805
	fadd	st0, st2
Packit 47f805
	fstp	dword [r1+r3]		;gi[k1]
Packit 47f805
Packit 47f805
	fsubr	st0, st2		;g0-g2 g3 g0 g1
Packit 47f805
	fstp	dword [r1+r3*2]		;gi[k2]
Packit 47f805
Packit 47f805
	fsubr	st0, st2		;g1-g3 g0 g1
Packit 47f805
	fstp	dword [r1+r2]		;gi[k3]
Packit 47f805
	fcompp
Packit 47f805
Packit 47f805
	lea	r0, [r0+r3*4]
Packit 47f805
	lea	r1, [r1+r3*4]
Packit 47f805
	cmp	r0, r6
Packit 47f805
	jb	.do2
Packit 47f805
Packit 47f805
Packit 47f805
	mov	r0, [sp(%$k)]
Packit 47f805
	fld	dword [costab_fft +r0*4]
Packit 47f805
	fstp	dword [sp(%$t_c)]
Packit 47f805
	fld	dword [sintab_fft +r0*4]
Packit 47f805
	fstp	dword [sp(%$t_s)]
Packit 47f805
	fld	dword [D_1_0]
Packit 47f805
	fstp	dword [sp(%$c1)]
Packit 47f805
	fld	dword [D_0_0]
Packit 47f805
	fstp	dword [sp(%$s1)]
Packit 47f805
Packit 47f805
.for_init:
Packit 47f805
	mov	r5, 4		;i = 1*fsize
Packit 47f805
Packit 47f805
.for:
Packit 47f805
	fld	dword [sp(%$c1)]
Packit 47f805
	fmul	dword [sp(%$t_c)]
Packit 47f805
	fld	dword [sp(%$s1)]
Packit 47f805
	fmul	dword [sp(%$t_s)]
Packit 47f805
	fsubp	st1, st0		;c1
Packit 47f805
Packit 47f805
	fld	dword [sp(%$c1)]
Packit 47f805
	fmul	dword [sp(%$t_s)]
Packit 47f805
	fld	dword [sp(%$s1)]
Packit 47f805
	fmul	dword [sp(%$t_c)]
Packit 47f805
	faddp	st1, st0		;s1 c1
Packit 47f805
	
Packit 47f805
	fld	st1
Packit 47f805
	fmul	st0, st0		;c1c1 s1 c1
Packit 47f805
	fld	st1
Packit 47f805
	fmul	st0, st0		;s1s1 c1c1 s1 c1
Packit 47f805
	fsubp	st1, st0		;c2 s1 c1
Packit 47f805
	fstp	dword [sp(%$c2)]	;s1 c1
Packit 47f805
Packit 47f805
	fld	st1			;c1 s1 c1
Packit 47f805
	fmul	st0, st1		;c1s1 s1 c1
Packit 47f805
	fadd	st0, st0		;s2 s1 c1
Packit 47f805
	fstp	dword [sp(%$s2)]	;s1 c1
Packit 47f805
Packit 47f805
	fstp	dword [sp(%$s1)]	;c1
Packit 47f805
	fstp	dword [sp(%$c1)]	;
Packit 47f805
	
Packit 47f805
	mov	r0, [sp(%$fz)]
Packit 47f805
	add	r0, r5		;r0 = fi
Packit 47f805
	mov	r1, [sp(%$fz)]
Packit 47f805
	add	r1, r3
Packit 47f805
	sub	r1, r5		;r1 = gi
Packit 47f805
Packit 47f805
.do3:
Packit 47f805
	fld	dword [sp(%$s2)]
Packit 47f805
	fmul	dword [r0+r3]
Packit 47f805
	fld	dword [sp(%$c2)]
Packit 47f805
	fmul	dword [r1+r3]
Packit 47f805
	fsubp	st1, st0		;b = s2*fi[k1] - c2*gi[k1]
Packit 47f805
Packit 47f805
	fld	dword [sp(%$c2)]
Packit 47f805
	fmul	dword [r0+r3]
Packit 47f805
	fld	dword [sp(%$s2)]
Packit 47f805
	fmul	dword [r1+r3]
Packit 47f805
	faddp	st1, st0		;a = c2*fi[k1] + s2*gi[k1]  b
Packit 47f805
Packit 47f805
	fld	dword [r0]
Packit 47f805
	fsub	st0, st1		;f1 a b
Packit 47f805
	fstp	dword [sp(%$f1)]	;a b
Packit 47f805
Packit 47f805
	fadd	dword [r0]		;f0 b
Packit 47f805
	fstp	dword [sp(%$f0)]	;b
Packit 47f805
Packit 47f805
	fld	dword [r1]
Packit 47f805
	fsub	st0, st1		;g1 b
Packit 47f805
	fstp	dword [sp(%$g1)]	;b
Packit 47f805
Packit 47f805
	fadd	dword [r1]		;g0
Packit 47f805
	fstp	dword [sp(%$g0)]	;
Packit 47f805
Packit 47f805
Packit 47f805
	fld	dword [sp(%$s2)]
Packit 47f805
	fmul	dword [r0+r2]
Packit 47f805
	fld	dword [sp(%$c2)]
Packit 47f805
	fmul	dword [r1+r2]
Packit 47f805
	fsubp	st1, st0		;b = s2*fi[k3] - c2*gi[k3]
Packit 47f805
Packit 47f805
	fld	dword [sp(%$c2)]
Packit 47f805
	fmul	dword [r0+r2]
Packit 47f805
	fld	dword [sp(%$s2)]
Packit 47f805
	fmul	dword [r1+r2]
Packit 47f805
	faddp	st1, st0		;a = c2*fi[k3] + s2*gi[k3]  b
Packit 47f805
Packit 47f805
	fld	dword [r0+r3*2]
Packit 47f805
	fsub	st0, st1		;f3 a b
Packit 47f805
	fstp	dword [sp(%$f3)]	;a b
Packit 47f805
Packit 47f805
	fadd	dword [r0+r3*2]	;f2 b
Packit 47f805
	fstp	dword [sp(%$f2)]	;b
Packit 47f805
Packit 47f805
	fld	dword [r1+r3*2]
Packit 47f805
	fsub	st0, st1		;g3 b
Packit 47f805
	fstp	dword [sp(%$g3)]	;b
Packit 47f805
Packit 47f805
	fadd	dword [r1+r3*2]	;g2
Packit 47f805
	fstp	dword [sp(%$g2)]	;
Packit 47f805
Packit 47f805
Packit 47f805
	fld	dword [sp(%$s1)]
Packit 47f805
	fmul	dword [sp(%$f2)]
Packit 47f805
	fld	dword [sp(%$c1)]
Packit 47f805
	fmul	dword [sp(%$g3)]
Packit 47f805
	fsubp	st1, st0		;b = s1*f2 - c1*g3
Packit 47f805
	
Packit 47f805
	fld	dword [sp(%$c1)]
Packit 47f805
	fmul	dword [sp(%$f2)]
Packit 47f805
	fld	dword [sp(%$s1)]
Packit 47f805
	fmul	dword [sp(%$g3)]
Packit 47f805
	faddp	st1, st0		;a = c1*f2 + s1*g3  b
Packit 47f805
Packit 47f805
	fld	dword [sp(%$f0)]
Packit 47f805
	fsub	st0, st1		;fi[k2] a b
Packit 47f805
	fstp	dword [r0+r3*2]
Packit 47f805
Packit 47f805
	fadd	dword [sp(%$f0)]	;fi[0] b
Packit 47f805
	fstp	dword [r0]
Packit 47f805
Packit 47f805
	fld	dword [sp(%$g1)]
Packit 47f805
	fsub	st0, st1		;gi[k3] b
Packit 47f805
	fstp	dword [r1+r2]
Packit 47f805
Packit 47f805
	fadd	dword [sp(%$g1)]	;gi[k1]
Packit 47f805
	fstp	dword [r1+r3]
Packit 47f805
Packit 47f805
Packit 47f805
	fld	dword [sp(%$c1)]
Packit 47f805
	fmul	dword [sp(%$g2)]
Packit 47f805
	fld	dword [sp(%$s1)]
Packit 47f805
	fmul	dword [sp(%$f3)]
Packit 47f805
	fsubp	st1, st0		;b = c1*g2 - s1*f3
Packit 47f805
	
Packit 47f805
	fld	dword [sp(%$s1)]
Packit 47f805
	fmul	dword [sp(%$g2)]
Packit 47f805
	fld	dword [sp(%$c1)]
Packit 47f805
	fmul	dword [sp(%$f3)]
Packit 47f805
	faddp	st1, st0		;a = s1*g2 + c1*f3  b
Packit 47f805
Packit 47f805
	fld	dword [sp(%$g0)]
Packit 47f805
	fsub	st0, st1		;gi[k2] a b
Packit 47f805
	fstp	dword [r1+r3*2]
Packit 47f805
Packit 47f805
	fadd	dword [sp(%$g0)]	;gi[0] b
Packit 47f805
	fstp	dword [r1]
Packit 47f805
Packit 47f805
	fld	dword [sp(%$f1)]
Packit 47f805
	fsub	st0, st1		;fi[k3] b
Packit 47f805
	fstp	dword [r0+r2]
Packit 47f805
Packit 47f805
	fadd	dword [sp(%$f1)]	;fi[k1]
Packit 47f805
	fstp	dword [r0+r3]
Packit 47f805
Packit 47f805
Packit 47f805
	lea	r0, [r0+r3*4]
Packit 47f805
	lea	r1, [r1+r3*4]
Packit 47f805
	cmp	r0, r6
Packit 47f805
	jb near	.do3
Packit 47f805
Packit 47f805
	add	r5, 4
Packit 47f805
	cmp	r5, r4
Packit 47f805
	jb near	.for
Packit 47f805
Packit 47f805
	cmp	r3, [sp(%$n)]
Packit 47f805
	jae	.exit
Packit 47f805
Packit 47f805
	add	dword [sp(%$k)], 2	;k  += 2;
Packit 47f805
	lea	r3, [r3*4]		;k1 *= 4
Packit 47f805
	lea	r2, [r2*4]		;k3 *= 4
Packit 47f805
	lea	r4, [r4*4]		;kx *= 4
Packit 47f805
	mov	r0, [sp(%$fz)]	;fi
Packit 47f805
	lea	r1, [r0+r4]		;gi = fi + kx
Packit 47f805
	jmp	.do
Packit 47f805
Packit 47f805
.exit:
Packit 47f805
	popd	ebp, ebx, esi, edi
Packit 47f805
endproc
Packit 47f805
Packit 47f805
;*************************************************************
Packit 47f805
Packit 47f805
;void fht_FPU_FXCH(float *fz, int n);
Packit 47f805
proc	fht_FPU_FXCH
Packit 47f805
Packit 47f805
%$fz	arg	4
Packit 47f805
%$n	arg	4
Packit 47f805
Packit 47f805
%$k	local	4
Packit 47f805
Packit 47f805
%$f0	local	4
Packit 47f805
%$f1	local	4
Packit 47f805
%$f2	local	4
Packit 47f805
%$f3	local	4
Packit 47f805
Packit 47f805
%$g0	local	4
Packit 47f805
%$g1	local	4
Packit 47f805
%$g2	local	4
Packit 47f805
%$g3	local	4
Packit 47f805
Packit 47f805
%$s1	local	4
Packit 47f805
%$c1	local	4
Packit 47f805
%$s2	local	4
Packit 47f805
%$c2	local	4
Packit 47f805
Packit 47f805
%$t_s	local	4
Packit 47f805
%$t_c	local	4
Packit 47f805
	alloc
Packit 47f805
Packit 47f805
	pushd	ebp, ebx, esi, edi
Packit 47f805
Packit 47f805
fht_FPU_FXCH_1st_part:
Packit 47f805
Packit 47f805
fht_FPU_FXCH_2nd_part:
Packit 47f805
Packit 47f805
fht_FPU_FXCH_3rd_part:
Packit 47f805
Packit 47f805
.do_init:
Packit 47f805
	mov	r3, 16		;k1*fsize = 4*fsize = k4
Packit 47f805
	mov	r4, 8		;kx = k1/2
Packit 47f805
	mov	r2, 48		;k3*fsize
Packit 47f805
	mov	dword [sp(%$k)], 2	;k = 2
Packit 47f805
	mov	r0, [sp(%$fz)]	;fi
Packit 47f805
	lea	r1, [r0+8]		;gi = fi + kx
Packit 47f805
Packit 47f805
.do:
Packit 47f805
.do2:
Packit 47f805
	;f
Packit 47f805
	fld	dword [r0]
Packit 47f805
	fsub	dword [r0+r3]
Packit 47f805
	fld	dword [r0]
Packit 47f805
	fadd	dword [r0+r3]
Packit 47f805
Packit 47f805
	fld	dword [r0+r3*2]
Packit 47f805
	fsub	dword [r0+r2]
Packit 47f805
	fld	dword [r0+r3*2]
Packit 47f805
	fadd	dword [r0+r2]		;f2 f3 f0 f1
Packit 47f805
Packit 47f805
	fld	st3
Packit 47f805
	fld	st3
Packit 47f805
	fxch	st5
Packit 47f805
	fadd	st0, st3
Packit 47f805
	fxch	st4
Packit 47f805
	fadd	st0, st2
Packit 47f805
	fxch	st3
Packit 47f805
	fsubp	st1, st0
Packit 47f805
	fxch	st1
Packit 47f805
	fsubp	st4, st0
Packit 47f805
	fxch	st2
Packit 47f805
Packit 47f805
	fstp	dword [r0+r3]		;fi[k1]
Packit 47f805
	fstp	dword [r0]		;fi[0]
Packit 47f805
	fstp	dword [r0+r2]		;fi[k3]
Packit 47f805
	fstp	dword [r0+r3*2]		;fi[k2]
Packit 47f805
Packit 47f805
	;g
Packit 47f805
	fld	dword [r1]
Packit 47f805
	fsub	dword [r1+r3]
Packit 47f805
	fld	dword [r1]
Packit 47f805
	fadd	dword [r1+r3]
Packit 47f805
Packit 47f805
	fld	dword [D_1_41421]
Packit 47f805
	fmul	dword [r1+r2]
Packit 47f805
	fld	dword [D_1_41421]
Packit 47f805
	fmul	dword [r1+r3*2]		;g2 g3 g0 g1
Packit 47f805
Packit 47f805
	fld	st3
Packit 47f805
	fld	st3
Packit 47f805
	fxch	st5
Packit 47f805
	fadd	st0, st3
Packit 47f805
	fxch	st4
Packit 47f805
	fadd	st0, st2
Packit 47f805
	fxch	st3
Packit 47f805
	fsubp	st1, st0
Packit 47f805
	fxch	st1
Packit 47f805
	fsubp	st4, st0
Packit 47f805
	fxch	st2
Packit 47f805
Packit 47f805
	fstp	dword [r1+r3]		;gi[k1]
Packit 47f805
	fstp	dword [r1]		;gi[0]
Packit 47f805
	fstp	dword [r1+r2]		;gi[k3]
Packit 47f805
	fstp	dword [r1+r3*2]		;gi[k2]
Packit 47f805
Packit 47f805
	lea	r0, [r0+r3*4]
Packit 47f805
	lea	r1, [r1+r3*4]
Packit 47f805
	cmp	r0, r6
Packit 47f805
	jb	.do2
Packit 47f805
Packit 47f805
Packit 47f805
	mov	r0, [sp(%$k)]
Packit 47f805
	fld	dword [costab_fft +r0*4]
Packit 47f805
	fld	dword [sintab_fft +r0*4]
Packit 47f805
	fld	dword [D_1_0]
Packit 47f805
	fld	dword [D_0_0]
Packit 47f805
	fxch	st3
Packit 47f805
	fstp	dword [sp(%$t_c)]
Packit 47f805
	fxch	st1
Packit 47f805
	fstp	dword [sp(%$t_s)]
Packit 47f805
	fstp	dword [sp(%$c1)]
Packit 47f805
	fstp	dword [sp(%$s1)]
Packit 47f805
Packit 47f805
.for_init:
Packit 47f805
	mov	r5, 4		;i = 1*fsize
Packit 47f805
Packit 47f805
.for:
Packit 47f805
	fld	dword [sp(%$c1)]
Packit 47f805
	fmul	dword [sp(%$t_c)]
Packit 47f805
	fld	dword [sp(%$s1)]
Packit 47f805
	fmul	dword [sp(%$t_s)]
Packit 47f805
Packit 47f805
	fld	dword [sp(%$c1)]
Packit 47f805
	fmul	dword [sp(%$t_s)]
Packit 47f805
	fld	dword [sp(%$s1)]
Packit 47f805
	fmul	dword [sp(%$t_c)]
Packit 47f805
	fxch	st2
Packit 47f805
	fsubp	st3, st0		;c1
Packit 47f805
	faddp	st1, st0		;s1 c1
Packit 47f805
	
Packit 47f805
	fld	st1
Packit 47f805
	fxch	st2
Packit 47f805
	fmul	st0, st0		;c1c1 s1 c1
Packit 47f805
	fld	st1
Packit 47f805
	fxch	st2
Packit 47f805
	fmul	st0, st0		;s1s1 c1c1 s1 c1
Packit 47f805
Packit 47f805
	fxch	st3
Packit 47f805
	fst	dword [sp(%$c1)]	;c1
Packit 47f805
	fxch	st2
Packit 47f805
	fst	dword [sp(%$s1)]	;s1 c1c1 c1 s1s1
Packit 47f805
Packit 47f805
	fmulp	st2, st0
Packit 47f805
	fsubrp	st2, st0
Packit 47f805
	fadd	st0, st0		;s2 c2
Packit 47f805
	fxch	st1
Packit 47f805
	fstp	dword [sp(%$c2)]
Packit 47f805
	fstp	dword [sp(%$s2)]
Packit 47f805
Packit 47f805
	mov	r0, [sp(%$fz)]
Packit 47f805
	mov	r1, [sp(%$fz)]
Packit 47f805
	add	r0, r5		;r0 = fi
Packit 47f805
	add	r1, r3
Packit 47f805
	sub	r1, r5		;r1 = gi
Packit 47f805
Packit 47f805
.do3:
Packit 47f805
	fld	dword [sp(%$s2)]
Packit 47f805
	fmul	dword [r0+r3]
Packit 47f805
	fld	dword [sp(%$c2)]
Packit 47f805
	fmul	dword [r1+r3]
Packit 47f805
Packit 47f805
	fld	dword [sp(%$c2)]
Packit 47f805
	fmul	dword [r0+r3]
Packit 47f805
	fld	dword [sp(%$s2)]
Packit 47f805
	fmul	dword [r1+r3]
Packit 47f805
	fxch	st2
Packit 47f805
	fsubp	st3, st0		;b = s2*fi[k1] - c2*gi[k1]
Packit 47f805
	faddp	st1, st0		;a = c2*fi[k1] + s2*gi[k1]  b
Packit 47f805
Packit 47f805
	fld	dword [r1]
Packit 47f805
	fsub	st0, st2		;g1 a b
Packit 47f805
	fxch	st2
Packit 47f805
	fadd	dword [r1]		;g0 a g1
Packit 47f805
Packit 47f805
	fld	dword [r0]
Packit 47f805
	fsub	st0, st2		;f1 g0 a g1
Packit 47f805
	fxch	st2
Packit 47f805
	fadd	dword [r0]		;f0 g0 f1 g1
Packit 47f805
Packit 47f805
	fxch	st3
Packit 47f805
	fstp	dword [sp(%$g1)]
Packit 47f805
	fstp	dword [sp(%$g0)]
Packit 47f805
	fstp	dword [sp(%$f1)]
Packit 47f805
	fstp	dword [sp(%$f0)]
Packit 47f805
Packit 47f805
Packit 47f805
	fld	dword [sp(%$s2)]
Packit 47f805
	fmul	dword [r0+r2]
Packit 47f805
	fld	dword [sp(%$c2)]
Packit 47f805
	fmul	dword [r1+r2]
Packit 47f805
Packit 47f805
	fld	dword [sp(%$c2)]
Packit 47f805
	fmul	dword [r0+r2]
Packit 47f805
	fld	dword [sp(%$s2)]
Packit 47f805
	fmul	dword [r1+r2]
Packit 47f805
	fxch	st2
Packit 47f805
	fsubp	st3, st0		;b = s2*fi[k3] - c2*gi[k3]
Packit 47f805
	faddp	st1, st0		;a = c2*fi[k3] + s2*gi[k3]  b
Packit 47f805
Packit 47f805
Packit 47f805
	fld	dword [r1+r3*2]
Packit 47f805
	fsub	st0, st2		;g3 a b
Packit 47f805
	fxch	st2
Packit 47f805
	fadd	dword [r1+r3*2]	;g2 a g3
Packit 47f805
Packit 47f805
	fld	dword [r0+r3*2]
Packit 47f805
	fsub	st0, st2		;f3 g2 a g3
Packit 47f805
	fxch	st2
Packit 47f805
	fadd	dword [r0+r3*2]	;f2 g2 f3 g3
Packit 47f805
Packit 47f805
	fxch	st3
Packit 47f805
	fstp	dword [sp(%$g3)]
Packit 47f805
	fstp	dword [sp(%$g2)]
Packit 47f805
	fstp	dword [sp(%$f3)]
Packit 47f805
	fstp	dword [sp(%$f2)]
Packit 47f805
Packit 47f805
Packit 47f805
	fld	dword [sp(%$s1)]
Packit 47f805
	fmul	dword [sp(%$f2)]
Packit 47f805
	fld	dword [sp(%$c1)]
Packit 47f805
	fmul	dword [sp(%$g3)]
Packit 47f805
	
Packit 47f805
	fld	dword [sp(%$c1)]
Packit 47f805
	fmul	dword [sp(%$f2)]
Packit 47f805
	fld	dword [sp(%$s1)]
Packit 47f805
	fmul	dword [sp(%$g3)]
Packit 47f805
	fxch	st2
Packit 47f805
	fsubp	st3, st0		;b = s1*f2 - c1*g3
Packit 47f805
	faddp	st1, st0		;a = c1*f2 + s1*g3  b
Packit 47f805
Packit 47f805
	fld	dword [sp(%$g1)]
Packit 47f805
	fsub	st0, st2		;gi[k3] a b
Packit 47f805
	fxch	st2
Packit 47f805
	fadd	dword [sp(%$g1)]	;gi[k1] a gi[k3]
Packit 47f805
Packit 47f805
	fld	dword [sp(%$f0)]
Packit 47f805
	fsub	st0, st2		;fi[k2] gi[k1] a gi[k3]
Packit 47f805
	fxch	st2
Packit 47f805
	fadd	dword [sp(%$f0)]	;fi[0] gi[k1] fi[k2] gi[k3]
Packit 47f805
Packit 47f805
	fxch	st3
Packit 47f805
	fstp	dword [r1+r2]
Packit 47f805
	fstp	dword [r1+r3]
Packit 47f805
	fstp	dword [r0+r3*2]
Packit 47f805
	fstp	dword [r0]
Packit 47f805
Packit 47f805
Packit 47f805
	fld	dword [sp(%$c1)]
Packit 47f805
	fmul	dword [sp(%$g2)]
Packit 47f805
	fld	dword [sp(%$s1)]
Packit 47f805
	fmul	dword [sp(%$f3)]
Packit 47f805
	
Packit 47f805
	fld	dword [sp(%$s1)]
Packit 47f805
	fmul	dword [sp(%$g2)]
Packit 47f805
	fld	dword [sp(%$c1)]
Packit 47f805
	fmul	dword [sp(%$f3)]
Packit 47f805
	fxch	st2
Packit 47f805
	fsubp	st3, st0		;b = c1*g2 - s1*f3
Packit 47f805
	faddp	st1, st0		;a = s1*g2 + c1*f3  b
Packit 47f805
Packit 47f805
	fld	dword [sp(%$f1)]
Packit 47f805
	fsub	st0, st2		;fi[k3] a b
Packit 47f805
	fxch	st2
Packit 47f805
	fadd	dword [sp(%$f1)]	;fi[k1] a fi[k3]
Packit 47f805
Packit 47f805
	fld	dword [sp(%$g0)]
Packit 47f805
	fsub	st0, st2		;gi[k2] fi[k1] a fi[k3]
Packit 47f805
	fxch	st2
Packit 47f805
	fadd	dword [sp(%$g0)]	;gi[0] fi[k1] gi[k2] fi[k3]
Packit 47f805
Packit 47f805
	fxch	st3
Packit 47f805
	fstp	dword [r0+r2]
Packit 47f805
	fstp	dword [r0+r3]
Packit 47f805
	fstp	dword [r1+r3*2]
Packit 47f805
	fstp	dword [r1]
Packit 47f805
Packit 47f805
Packit 47f805
	lea	r0, [r0+r3*4]
Packit 47f805
	lea	r1, [r1+r3*4]
Packit 47f805
	cmp	r0, r6
Packit 47f805
	jb near	.do3
Packit 47f805
Packit 47f805
	add	r5, 4
Packit 47f805
	cmp	r5, r4
Packit 47f805
	jb near	.for
Packit 47f805
Packit 47f805
	cmp	r3, [sp(%$n)]
Packit 47f805
	jae	.exit
Packit 47f805
Packit 47f805
	add	dword [sp(%$k)], 2	;k  += 2;
Packit 47f805
	lea	r3, [r3*4]		;k1 *= 4
Packit 47f805
	lea	r2, [r2*4]		;k3 *= 4
Packit 47f805
	lea	r4, [r4*4]		;kx *= 4
Packit 47f805
	mov	r0, [sp(%$fz)]	;fi
Packit 47f805
	lea	r1, [r0+r4]		;gi = fi + kx
Packit 47f805
	jmp	.do
Packit 47f805
Packit 47f805
.exit:
Packit 47f805
	popd	ebp, ebx, esi, edi
Packit 47f805
endproc
Packit 47f805
Packit 47f805
	end