/*
Copyright (C) 2008 Rocky Bernstein <rocky@gnu.org>
Copyright (C) 1998 Monty xiphmont@mit.edu
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/******************************************************************
* FFT implementation from OggSquish, minus cosine transforms,
* minus all but radix 2/4 case
*
* See OggSquish or NetLib for the version that can do other than just
* power-of-two sized vectors.
******************************************************************/
#include <stdlib.h>
#include <math.h>
#include "smallft.h"
static void drfti1(int n, float *wa, int *ifac){
static int ntryh[4] = { 4,2,3,5 };
static float tpi = 6.28318530717958647692528676655900577;
float arg,argh,argld,fi;
int ntry=0,i,j=-1;
int k1, l1, l2, ib;
int ld, ii, ip, is, nq, nr;
int ido, ipm, nfm1;
int nl=n;
int nf=0;
L101:
j++;
if (j < 4)
ntry=ntryh[j];
else
ntry+=2;
L104:
nq=nl/ntry;
nr=nl-ntry*nq;
if (nr!=0) goto L101;
nf++;
ifac[nf+1]=ntry;
nl=nq;
if(ntry!=2)goto L107;
if(nf==1)goto L107;
for (i=1;i<nf;i++){
ib=nf-i+1;
ifac[ib+1]=ifac[ib];
}
ifac[2] = 2;
L107:
if(nl!=1)goto L104;
ifac[0]=n;
ifac[1]=nf;
argh=tpi/n;
is=0;
nfm1=nf-1;
l1=1;
if(nfm1==0)return;
for (k1=0;k1<nfm1;k1++){
ip=ifac[k1+2];
ld=0;
l2=l1*ip;
ido=n/l2;
ipm=ip-1;
for (j=0;j<ipm;j++){
ld+=l1;
i=is;
argld=(float)ld*argh;
fi=0.;
for (ii=2;ii<ido;ii+=2){
fi+=1.;
arg=fi*argld;
wa[i++]=cos(arg);
wa[i++]=sin(arg);
}
is+=ido;
}
l1=l2;
}
}
static void fdrffti(int n, float *wsave, int *ifac){
if (n == 1) return;
drfti1(n, wsave+n, ifac);
}
static void dradf2(int ido,int l1,float *cc,float *ch,float *wa1){
int i,k;
float ti2,tr2;
int t0,t1,t2,t3,t4,t5,t6;
t1=0;
t0=(t2=l1*ido);
t3=ido<<1;
for(k=0;k<l1;k++){
ch[t1<<1]=cc[t1]+cc[t2];
ch[(t1<<1)+t3-1]=cc[t1]-cc[t2];
t1+=ido;
t2+=ido;
}
if(ido<2)return;
if(ido==2)goto L105;
t1=0;
t2=t0;
for(k=0;k<l1;k++){
t3=t2;
t4=(t1<<1)+(ido<<1);
t5=t1;
t6=t1+t1;
for(i=2;i<ido;i+=2){
t3+=2;
t4-=2;
t5+=2;
t6+=2;
tr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
ti2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
ch[t6]=cc[t5]+ti2;
ch[t4]=ti2-cc[t5];
ch[t6-1]=cc[t5-1]+tr2;
ch[t4-1]=cc[t5-1]-tr2;
}
t1+=ido;
t2+=ido;
}
if(ido%2==1)return;
L105:
t3=(t2=(t1=ido)-1);
t2+=t0;
for(k=0;k<l1;k++){
ch[t1]=-cc[t2];
ch[t1-1]=cc[t3];
t1+=ido<<1;
t2+=ido;
t3+=ido;
}
}
static void dradf4(int ido,int l1,float *cc,float *ch,float *wa1,
float *wa2,float *wa3){
static float hsqt2 = .70710678118654752440084436210485;
int i,k,t0,t1,t2,t3,t4,t5,t6;
float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
t0=l1*ido;
t1=t0;
t4=t1<<1;
t2=t1+(t1<<1);
t3=0;
for(k=0;k<l1;k++){
tr1=cc[t1]+cc[t2];
tr2=cc[t3]+cc[t4];
ch[t5=t3<<2]=tr1+tr2;
ch[(ido<<2)+t5-1]=tr2-tr1;
ch[(t5+=(ido<<1))-1]=cc[t3]-cc[t4];
ch[t5]=cc[t2]-cc[t1];
t1+=ido;
t2+=ido;
t3+=ido;
t4+=ido;
}
if(ido<2)return;
if(ido==2)goto L105;
t1=0;
for(k=0;k<l1;k++){
t2=t1;
t4=t1<<2;
t5=(t6=ido<<1)+t4;
for(i=2;i<ido;i+=2){
t3=(t2+=2);
t4+=2;
t5-=2;
t3+=t0;
cr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
ci2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
t3+=t0;
cr3=wa2[i-2]*cc[t3-1]+wa2[i-1]*cc[t3];
ci3=wa2[i-2]*cc[t3]-wa2[i-1]*cc[t3-1];
t3+=t0;
cr4=wa3[i-2]*cc[t3-1]+wa3[i-1]*cc[t3];
ci4=wa3[i-2]*cc[t3]-wa3[i-1]*cc[t3-1];
tr1=cr2+cr4;
tr4=cr4-cr2;
ti1=ci2+ci4;
ti4=ci2-ci4;
ti2=cc[t2]+ci3;
ti3=cc[t2]-ci3;
tr2=cc[t2-1]+cr3;
tr3=cc[t2-1]-cr3;
ch[t4-1]=tr1+tr2;
ch[t4]=ti1+ti2;
ch[t5-1]=tr3-ti4;
ch[t5]=tr4-ti3;
ch[t4+t6-1]=ti4+tr3;
ch[t4+t6]=tr4+ti3;
ch[t5+t6-1]=tr2-tr1;
ch[t5+t6]=ti1-ti2;
}
t1+=ido;
}
if(ido&1)return;
L105:
t2=(t1=t0+ido-1)+(t0<<1);
t3=ido<<2;
t4=ido;
t5=ido<<1;
t6=ido;
for(k=0;k<l1;k++){
ti1=-hsqt2*(cc[t1]+cc[t2]);
tr1=hsqt2*(cc[t1]-cc[t2]);
ch[t4-1]=tr1+cc[t6-1];
ch[t4+t5-1]=cc[t6-1]-tr1;
ch[t4]=ti1-cc[t1+t0];
ch[t4+t5]=ti1+cc[t1+t0];
t1+=ido;
t2+=ido;
t4+=t3;
t6+=ido;
}
}
static void drftf1(int n,float *c,float *ch,float *wa,int *ifac){
int i,k1,l1,l2;
int na,kh,nf;
int ip,iw,ido,ix2,ix3;
nf=ifac[1];
na=1;
l2=n;
iw=n;
for(k1=0;k1<nf;k1++){
kh=nf-k1;
ip=ifac[kh+1];
l1=l2/ip;
ido=n/l2;
iw-=(ip-1)*ido;
na=1-na;
if(ip!=4)goto L102;
ix2=iw+ido;
ix3=ix2+ido;
if(na!=0)
dradf4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
else
dradf4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
goto L110;
L102:
if(ip!=2)goto L104;
if(na!=0)goto L103;
dradf2(ido,l1,c,ch,wa+iw-1);
goto L110;
L103:
dradf2(ido,l1,ch,c,wa+iw-1);
goto L110;
L104:
return; /* We're restricted to powers of two. just fail */
L110:
l2=l1;
}
if(na==1)return;
for(i=0;i<n;i++)c[i]=ch[i];
}
static void fdrfftf(int n,float *r,float *wsave,int *ifac){
if(n==1)return;
drftf1(n,r,wsave,wsave+n,ifac);
}
static void dradb2(int ido,int l1,float *cc,float *ch,float *wa1){
int i,k,t0,t1,t2,t3,t4,t5,t6;
float ti2,tr2;
t0=l1*ido;
t1=0;
t2=0;
t3=(ido<<1)-1;
for(k=0;k<l1;k++){
ch[t1]=cc[t2]+cc[t3+t2];
ch[t1+t0]=cc[t2]-cc[t3+t2];
t2=(t1+=ido)<<1;
}
if(ido<2)return;
if(ido==2)goto L105;
t1=0;
t2=0;
for(k=0;k<l1;k++){
t3=t1;
t5=(t4=t2)+(ido<<1);
t6=t0+t1;
for(i=2;i<ido;i+=2){
t3+=2;
t4+=2;
t5-=2;
t6+=2;
ch[t3-1]=cc[t4-1]+cc[t5-1];
tr2=cc[t4-1]-cc[t5-1];
ch[t3]=cc[t4]-cc[t5];
ti2=cc[t4]+cc[t5];
ch[t6-1]=wa1[i-2]*tr2-wa1[i-1]*ti2;
ch[t6]=wa1[i-2]*ti2+wa1[i-1]*tr2;
}
t2=(t1+=ido)<<1;
}
if(ido%2==1)return;
L105:
t1=ido-1;
t2=ido-1;
for(k=0;k<l1;k++){
ch[t1]=cc[t2]+cc[t2];
ch[t1+t0]=-(cc[t2+1]+cc[t2+1]);
t1+=ido;
t2+=ido<<1;
}
}
static void dradb4(int ido,int l1,float *cc,float *ch,float *wa1,
float *wa2,float *wa3){
static float sqrt2=1.4142135623730950488016887242097;
int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8;
float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
t0=l1*ido;
t1=0;
t2=ido<<2;
t3=0;
t6=ido<<1;
for(k=0;k<l1;k++){
t4=t3+t6;
t5=t1;
tr3=cc[t4-1]+cc[t4-1];
tr4=cc[t4]+cc[t4];
tr1=cc[t3]-cc[(t4+=t6)-1];
tr2=cc[t3]+cc[t4-1];
ch[t5]=tr2+tr3;
ch[t5+=t0]=tr1-tr4;
ch[t5+=t0]=tr2-tr3;
ch[t5+=t0]=tr1+tr4;
t1+=ido;
t3+=t2;
}
if(ido<2)return;
if(ido==2)goto L105;
t1=0;
for(k=0;k<l1;k++){
t5=(t4=(t3=(t2=t1<<2)+t6))+t6;
t7=t1;
for(i=2;i<ido;i+=2){
t2+=2;
t3+=2;
t4-=2;
t5-=2;
t7+=2;
ti1=cc[t2]+cc[t5];
ti2=cc[t2]-cc[t5];
ti3=cc[t3]-cc[t4];
tr4=cc[t3]+cc[t4];
tr1=cc[t2-1]-cc[t5-1];
tr2=cc[t2-1]+cc[t5-1];
ti4=cc[t3-1]-cc[t4-1];
tr3=cc[t3-1]+cc[t4-1];
ch[t7-1]=tr2+tr3;
cr3=tr2-tr3;
ch[t7]=ti2+ti3;
ci3=ti2-ti3;
cr2=tr1-tr4;
cr4=tr1+tr4;
ci2=ti1+ti4;
ci4=ti1-ti4;
ch[(t8=t7+t0)-1]=wa1[i-2]*cr2-wa1[i-1]*ci2;
ch[t8]=wa1[i-2]*ci2+wa1[i-1]*cr2;
ch[(t8+=t0)-1]=wa2[i-2]*cr3-wa2[i-1]*ci3;
ch[t8]=wa2[i-2]*ci3+wa2[i-1]*cr3;
ch[(t8+=t0)-1]=wa3[i-2]*cr4-wa3[i-1]*ci4;
ch[t8]=wa3[i-2]*ci4+wa3[i-1]*cr4;
}
t1+=ido;
}
if(ido%2 == 1)return;
L105:
t1=ido;
t2=ido<<2;
t3=ido-1;
t4=ido+(ido<<1);
for(k=0;k<l1;k++){
t5=t3;
ti1=cc[t1]+cc[t4];
ti2=cc[t4]-cc[t1];
tr1=cc[t1-1]-cc[t4-1];
tr2=cc[t1-1]+cc[t4-1];
ch[t5]=tr2+tr2;
ch[t5+=t0]=sqrt2*(tr1-ti1);
ch[t5+=t0]=ti2+ti2;
ch[t5+=t0]=-sqrt2*(tr1+ti1);
t3+=ido;
t1+=t2;
t4+=t2;
}
}
static void drftb1(int n, float *c, float *ch, float *wa, int *ifac){
int i,k1,l1,l2;
int na;
int nf,ip,iw,ix2,ix3,ido;
nf=ifac[1];
na=0;
l1=1;
iw=1;
for(k1=0;k1<nf;k1++){
ip=ifac[k1 + 2];
l2=ip*l1;
ido=n/l2;
if(ip!=4)goto L103;
ix2=iw+ido;
ix3=ix2+ido;
if(na!=0)
dradb4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
else
dradb4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
na=1-na;
goto L115;
L103:
if(ip!=2)goto L106;
if(na!=0)
dradb2(ido,l1,ch,c,wa+iw-1);
else
dradb2(ido,l1,c,ch,wa+iw-1);
na=1-na;
goto L115;
L106:
return; /* silently fail. we only do powers of two in this version */
L115:
l1=l2;
iw+=(ip-1)*ido;
}
if(na==0)return;
for(i=0;i<n;i++)c[i]=ch[i];
}
static void fdrfftb(int n, float *r, float *wsave, int *ifac){
if (n == 1)return;
drftb1(n, r, wsave, wsave+n, ifac);
}
void fft_forward(int n, float *buf,float *trigcache,int *splitcache){
int flag=0;
if(!trigcache || !splitcache){
trigcache=calloc(3*n,sizeof(float));
splitcache=calloc(32,sizeof(int));
fdrffti(n, trigcache, splitcache);
flag=1;
}
fdrfftf(n, buf, trigcache, splitcache);
if(flag){
free(trigcache);
free(splitcache);
}
}
void fft_backward(int n, float *buf, float *trigcache,int *splitcache){
int i;
int flag=0;
if(!trigcache || !splitcache){
trigcache=calloc(3*n,sizeof(float));
splitcache=calloc(32,sizeof(int));
fdrffti(n, trigcache, splitcache);
flag=1;
}
fdrfftb(n, buf, trigcache, splitcache);
for(i=0;i<n;i++)buf[i]/=n;
if(flag){
free(trigcache);
free(splitcache);
}
}
void fft_i(int n, float **trigcache, int **splitcache){
*trigcache=calloc(3*n,sizeof(float));
*splitcache=calloc(32,sizeof(int));
fdrffti(n, *trigcache, *splitcache);
}