Blame libcelt/mdct.c

Packit 664db3
/* (C) 2008 Jean-Marc Valin, CSIRO
Packit 664db3
*/
Packit 664db3
/*
Packit 664db3
   Redistribution and use in source and binary forms, with or without
Packit 664db3
   modification, are permitted provided that the following conditions
Packit 664db3
   are met:
Packit 664db3
   
Packit 664db3
   - Redistributions of source code must retain the above copyright
Packit 664db3
   notice, this list of conditions and the following disclaimer.
Packit 664db3
   
Packit 664db3
   - Redistributions in binary form must reproduce the above copyright
Packit 664db3
   notice, this list of conditions and the following disclaimer in the
Packit 664db3
   documentation and/or other materials provided with the distribution.
Packit 664db3
   
Packit 664db3
   - Neither the name of the Xiph.org Foundation nor the names of its
Packit 664db3
   contributors may be used to endorse or promote products derived from
Packit 664db3
   this software without specific prior written permission.
Packit 664db3
   
Packit 664db3
   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
Packit 664db3
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
Packit 664db3
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
Packit 664db3
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
Packit 664db3
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
Packit 664db3
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
Packit 664db3
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
Packit 664db3
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
Packit 664db3
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
Packit 664db3
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
Packit 664db3
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Packit 664db3
*/
Packit 664db3
Packit 664db3
/* This is a simple MDCT implementation that uses a N/4 complex FFT
Packit 664db3
   to do most of the work. It should be relatively straightforward to
Packit 664db3
   plug in pretty much and FFT here.
Packit 664db3
   
Packit 664db3
   This replaces the Vorbis FFT (and uses the exact same API), which 
Packit 664db3
   was a bit too messy and that was ending up duplicating code 
Packit 664db3
   (might as well use the same FFT everywhere).
Packit 664db3
   
Packit 664db3
   The algorithm is similar to (and inspired from) Fabrice Bellard's
Packit 664db3
   MDCT implementation in FFMPEG, but has differences in signs, ordering
Packit 664db3
   and scaling in many places. 
Packit 664db3
*/
Packit 664db3
Packit 664db3
#ifdef HAVE_CONFIG_H
Packit 664db3
#include "config.h"
Packit 664db3
#endif
Packit 664db3
Packit 664db3
#include "mdct.h"
Packit 664db3
#include "kfft_double.h"
Packit 664db3
#include <math.h>
Packit 664db3
#include "os_support.h"
Packit 664db3
#include "mathops.h"
Packit 664db3
#include "stack_alloc.h"
Packit 664db3
Packit 664db3
#ifndef M_PI
Packit 664db3
#define M_PI 3.141592653
Packit 664db3
#endif
Packit 664db3
Packit 664db3
void mdct_init(mdct_lookup *l,int N)
Packit 664db3
{
Packit 664db3
   int i;
Packit 664db3
   int N2;
Packit 664db3
   l->n = N;
Packit 664db3
   N2 = N>>1;
Packit 664db3
   l->kfft = cpx32_fft_alloc(N>>2);
Packit 664db3
   l->trig = (kiss_twiddle_scalar*)celt_alloc(N2*sizeof(kiss_twiddle_scalar));
Packit 664db3
   /* We have enough points that sine isn't necessary */
Packit 664db3
#if defined(FIXED_POINT)
Packit 664db3
#if defined(DOUBLE_PRECISION) & !defined(MIXED_PRECISION)
Packit 664db3
   for (i=0;i
Packit 664db3
      l->trig[i] = SAMP_MAX*cos(2*M_PI*(i+1./8.)/N);
Packit 664db3
#else
Packit 664db3
   for (i=0;i
Packit 664db3
      l->trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),16386),N));
Packit 664db3
#endif
Packit 664db3
#else
Packit 664db3
   for (i=0;i
Packit 664db3
      l->trig[i] = cos(2*M_PI*(i+1./8.)/N);
Packit 664db3
#endif
Packit 664db3
}
Packit 664db3
Packit 664db3
void mdct_clear(mdct_lookup *l)
Packit 664db3
{
Packit 664db3
   cpx32_fft_free(l->kfft);
Packit 664db3
   celt_free(l->trig);
Packit 664db3
}
Packit 664db3
Packit 664db3
void mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16_t *window, int overlap)
Packit 664db3
{
Packit 664db3
   int i;
Packit 664db3
   int N, N2, N4;
Packit 664db3
   VARDECL(kiss_fft_scalar, f);
Packit 664db3
   SAVE_STACK;
Packit 664db3
   N = l->n;
Packit 664db3
   N2 = N>>1;
Packit 664db3
   N4 = N>>2;
Packit 664db3
   ALLOC(f, N2, kiss_fft_scalar);
Packit 664db3
   
Packit 664db3
   /* Consider the input to be compused of four blocks: [a, b, c, d] */
Packit 664db3
   /* Window, shuffle, fold */
Packit 664db3
   {
Packit 664db3
      /* Temp pointers to make it really clear to the compiler what we're doing */
Packit 664db3
      const kiss_fft_scalar * restrict xp1 = in+(overlap>>1);
Packit 664db3
      const kiss_fft_scalar * restrict xp2 = in+N2-1+(overlap>>1);
Packit 664db3
      kiss_fft_scalar * restrict yp = out;
Packit 664db3
      const celt_word16_t * restrict wp1 = window+(overlap>>1);
Packit 664db3
      const celt_word16_t * restrict wp2 = window+(overlap>>1)-1;
Packit 664db3
      for(i=0;i<(overlap>>2);i++)
Packit 664db3
      {
Packit 664db3
         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
Packit 664db3
         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
Packit 664db3
         *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
Packit 664db3
         xp1+=2;
Packit 664db3
         xp2-=2;
Packit 664db3
         wp1+=2;
Packit 664db3
         wp2-=2;
Packit 664db3
      }
Packit 664db3
      wp1 = window;
Packit 664db3
      wp2 = window+overlap-1;
Packit 664db3
      for(;i<N4-(overlap>>2);i++)
Packit 664db3
      {
Packit 664db3
         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
Packit 664db3
         *yp++ = *xp2;
Packit 664db3
         *yp++ = *xp1;
Packit 664db3
         xp1+=2;
Packit 664db3
         xp2-=2;
Packit 664db3
      }
Packit 664db3
      for(;i
Packit 664db3
      {
Packit 664db3
         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
Packit 664db3
         *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
Packit 664db3
         *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
Packit 664db3
         xp1+=2;
Packit 664db3
         xp2-=2;
Packit 664db3
         wp1+=2;
Packit 664db3
         wp2-=2;
Packit 664db3
      }
Packit 664db3
   }
Packit 664db3
   /* Pre-rotation */
Packit 664db3
   {
Packit 664db3
      kiss_fft_scalar * restrict yp = out;
Packit 664db3
      kiss_fft_scalar *t = &l->trig[0];
Packit 664db3
      for(i=0;i
Packit 664db3
      {
Packit 664db3
         kiss_fft_scalar re, im;
Packit 664db3
         re = yp[0];
Packit 664db3
         im = yp[1];
Packit 664db3
         *yp++ = -S_MUL(re,t[0])  +  S_MUL(im,t[N4]);
Packit 664db3
         *yp++ = -S_MUL(im,t[0])  -  S_MUL(re,t[N4]);
Packit 664db3
         t++;
Packit 664db3
      }
Packit 664db3
   }
Packit 664db3
Packit 664db3
   /* N/4 complex FFT, down-scales by 4/N */
Packit 664db3
   cpx32_fft(l->kfft, out, f, N4);
Packit 664db3
Packit 664db3
   /* Post-rotate */
Packit 664db3
   {
Packit 664db3
      /* Temp pointers to make it really clear to the compiler what we're doing */
Packit 664db3
      const kiss_fft_scalar * restrict fp = f;
Packit 664db3
      kiss_fft_scalar * restrict yp1 = out;
Packit 664db3
      kiss_fft_scalar * restrict yp2 = out+N2-1;
Packit 664db3
      kiss_fft_scalar *t = &l->trig[0];
Packit 664db3
      /* Temp pointers to make it really clear to the compiler what we're doing */
Packit 664db3
      for(i=0;i
Packit 664db3
      {
Packit 664db3
         *yp1 = -S_MUL(fp[1],t[N4]) + S_MUL(fp[0],t[0]);
Packit 664db3
         *yp2 = -S_MUL(fp[0],t[N4]) - S_MUL(fp[1],t[0]);
Packit 664db3
         fp += 2;
Packit 664db3
         yp1 += 2;
Packit 664db3
         yp2 -= 2;
Packit 664db3
         t++;
Packit 664db3
      }
Packit 664db3
   }
Packit 664db3
   RESTORE_STACK;
Packit 664db3
}
Packit 664db3
Packit 664db3
Packit 664db3
void mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16_t * restrict window, int overlap)
Packit 664db3
{
Packit 664db3
   int i;
Packit 664db3
   int N, N2, N4;
Packit 664db3
   VARDECL(kiss_fft_scalar, f);
Packit 664db3
   VARDECL(kiss_fft_scalar, f2);
Packit 664db3
   SAVE_STACK;
Packit 664db3
   N = l->n;
Packit 664db3
   N2 = N>>1;
Packit 664db3
   N4 = N>>2;
Packit 664db3
   ALLOC(f, N2, kiss_fft_scalar);
Packit 664db3
   ALLOC(f2, N2, kiss_fft_scalar);
Packit 664db3
   
Packit 664db3
   /* Pre-rotate */
Packit 664db3
   {
Packit 664db3
      /* Temp pointers to make it really clear to the compiler what we're doing */
Packit 664db3
      const kiss_fft_scalar * restrict xp1 = in;
Packit 664db3
      const kiss_fft_scalar * restrict xp2 = in+N2-1;
Packit 664db3
      kiss_fft_scalar * restrict yp = f2;
Packit 664db3
      kiss_fft_scalar *t = &l->trig[0];
Packit 664db3
      for(i=0;i
Packit 664db3
      {
Packit 664db3
         *yp++ = -S_MUL(*xp2, t[0])  - S_MUL(*xp1,t[N4]);
Packit 664db3
         *yp++ =  S_MUL(*xp2, t[N4]) - S_MUL(*xp1,t[0]);
Packit 664db3
         xp1+=2;
Packit 664db3
         xp2-=2;
Packit 664db3
         t++;
Packit 664db3
      }
Packit 664db3
   }
Packit 664db3
Packit 664db3
   /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
Packit 664db3
   cpx32_ifft(l->kfft, f2, f, N4);
Packit 664db3
   
Packit 664db3
   /* Post-rotate */
Packit 664db3
   {
Packit 664db3
      kiss_fft_scalar * restrict fp = f;
Packit 664db3
      kiss_fft_scalar *t = &l->trig[0];
Packit 664db3
Packit 664db3
      for(i=0;i
Packit 664db3
      {
Packit 664db3
         kiss_fft_scalar re, im;
Packit 664db3
         re = fp[0];
Packit 664db3
         im = fp[1];
Packit 664db3
         /* We'd scale up by 2 here, but instead it's done when mixing the windows */
Packit 664db3
         *fp++ = S_MUL(re,*t) + S_MUL(im,t[N4]);
Packit 664db3
         *fp++ = S_MUL(im,*t) - S_MUL(re,t[N4]);
Packit 664db3
         t++;
Packit 664db3
      }
Packit 664db3
   }
Packit 664db3
   /* De-shuffle the components for the middle of the window only */
Packit 664db3
   {
Packit 664db3
      const kiss_fft_scalar * restrict fp1 = f;
Packit 664db3
      const kiss_fft_scalar * restrict fp2 = f+N2-1;
Packit 664db3
      kiss_fft_scalar * restrict yp = f2;
Packit 664db3
      for(i = 0; i < N4; i++)
Packit 664db3
      {
Packit 664db3
         *yp++ =-*fp1;
Packit 664db3
         *yp++ = *fp2;
Packit 664db3
         fp1 += 2;
Packit 664db3
         fp2 -= 2;
Packit 664db3
      }
Packit 664db3
   }
Packit 664db3
Packit 664db3
   /* Mirror on both sides for TDAC */
Packit 664db3
   {
Packit 664db3
      kiss_fft_scalar * restrict fp1 = f2+N4-1;
Packit 664db3
      kiss_fft_scalar * restrict xp1 = out+N2-1;
Packit 664db3
      kiss_fft_scalar * restrict yp1 = out+N4-overlap/2;
Packit 664db3
      const celt_word16_t * restrict wp1 = window;
Packit 664db3
      const celt_word16_t * restrict wp2 = window+overlap-1;
Packit 664db3
      for(i = 0; i< N4-overlap/2; i++)
Packit 664db3
      {
Packit 664db3
         *xp1 = *fp1;
Packit 664db3
         xp1--;
Packit 664db3
         fp1--;
Packit 664db3
      }
Packit 664db3
      for(; i < N4; i++)
Packit 664db3
      {
Packit 664db3
         kiss_fft_scalar x1;
Packit 664db3
         x1 = *fp1--;
Packit 664db3
         *yp1++ +=-MULT16_32_Q15(*wp1, x1);
Packit 664db3
         *xp1-- += MULT16_32_Q15(*wp2, x1);
Packit 664db3
         wp1++;
Packit 664db3
         wp2--;
Packit 664db3
      }
Packit 664db3
   }
Packit 664db3
   {
Packit 664db3
      kiss_fft_scalar * restrict fp2 = f2+N4;
Packit 664db3
      kiss_fft_scalar * restrict xp2 = out+N2;
Packit 664db3
      kiss_fft_scalar * restrict yp2 = out+N-1-(N4-overlap/2);
Packit 664db3
      const celt_word16_t * restrict wp1 = window;
Packit 664db3
      const celt_word16_t * restrict wp2 = window+overlap-1;
Packit 664db3
      for(i = 0; i< N4-overlap/2; i++)
Packit 664db3
      {
Packit 664db3
         *xp2 = *fp2;
Packit 664db3
         xp2++;
Packit 664db3
         fp2++;
Packit 664db3
      }
Packit 664db3
      for(; i < N4; i++)
Packit 664db3
      {
Packit 664db3
         kiss_fft_scalar x2;
Packit 664db3
         x2 = *fp2++;
Packit 664db3
         *yp2--  = MULT16_32_Q15(*wp1, x2);
Packit 664db3
         *xp2++  = MULT16_32_Q15(*wp2, x2);
Packit 664db3
         wp1++;
Packit 664db3
         wp2--;
Packit 664db3
      }
Packit 664db3
   }
Packit 664db3
   RESTORE_STACK;
Packit 664db3
}
Packit 664db3
Packit 664db3