Blame libmp3lame/fft.c

Packit 47f805
/*
Packit 47f805
** FFT and FHT routines
Packit 47f805
**  Copyright 1988, 1993; Ron Mayer
Packit 47f805
**      Copyright (c) 1999-2000 Takehiro Tominaga
Packit 47f805
**
Packit 47f805
**  fht(fz,n);
Packit 47f805
**      Does a hartley transform of "n" points in the array "fz".
Packit 47f805
**
Packit 47f805
** NOTE: This routine uses at least 2 patented algorithms, and may be
Packit 47f805
**       under the restrictions of a bunch of different organizations.
Packit 47f805
**       Although I wrote it completely myself; it is kind of a derivative
Packit 47f805
**       of a routine I once authored and released under the GPL, so it
Packit 47f805
**       may fall under the free software foundation's restrictions;
Packit 47f805
**       it was worked on as a Stanford Univ project, so they claim
Packit 47f805
**       some rights to it; it was further optimized at work here, so
Packit 47f805
**       I think this company claims parts of it.  The patents are
Packit 47f805
**       held by R. Bracewell (the FHT algorithm) and O. Buneman (the
Packit 47f805
**       trig generator), both at Stanford Univ.
Packit 47f805
**       If it were up to me, I'd say go do whatever you want with it;
Packit 47f805
**       but it would be polite to give credit to the following people
Packit 47f805
**       if you use this anywhere:
Packit 47f805
**           Euler     - probable inventor of the fourier transform.
Packit 47f805
**           Gauss     - probable inventor of the FFT.
Packit 47f805
**           Hartley   - probable inventor of the hartley transform.
Packit 47f805
**           Buneman   - for a really cool trig generator
Packit 47f805
**           Mayer(me) - for authoring this particular version and
Packit 47f805
**                       including all the optimizations in one package.
Packit 47f805
**       Thanks,
Packit 47f805
**       Ron Mayer; mayer@acuson.com
Packit 47f805
** and added some optimization by
Packit 47f805
**           Mather    - idea of using lookup table
Packit 47f805
**           Takehiro  - some dirty hack for speed up
Packit 47f805
*/
Packit 47f805
Packit 47f805
/* $Id: fft.c,v 1.39 2017/09/06 15:07:29 robert Exp $ */
Packit 47f805
Packit 47f805
#ifdef HAVE_CONFIG_H
Packit 47f805
# include <config.h>
Packit 47f805
#endif
Packit 47f805
Packit 47f805
#include "lame.h"
Packit 47f805
#include "machine.h"
Packit 47f805
#include "encoder.h"
Packit 47f805
#include "util.h"
Packit 47f805
#include "fft.h"
Packit 47f805
Packit 47f805
#include "vector/lame_intrin.h"
Packit 47f805
Packit 47f805
Packit 47f805
Packit 47f805
#define TRI_SIZE (5-1)  /* 1024 =  4**5 */
Packit 47f805
Packit 47f805
/* fft.c    */
Packit 47f805
Packit 47f805
static const FLOAT costab[TRI_SIZE * 2] = {
Packit 47f805
    9.238795325112867e-01, 3.826834323650898e-01,
Packit 47f805
    9.951847266721969e-01, 9.801714032956060e-02,
Packit 47f805
    9.996988186962042e-01, 2.454122852291229e-02,
Packit 47f805
    9.999811752826011e-01, 6.135884649154475e-03
Packit 47f805
};
Packit 47f805
Packit 47f805
static void
Packit 47f805
fht(FLOAT * fz, int n)
Packit 47f805
{
Packit 47f805
    const FLOAT *tri = costab;
Packit 47f805
    int     k4;
Packit 47f805
    FLOAT  *fi, *gi;
Packit 47f805
    FLOAT const *fn;
Packit 47f805
Packit 47f805
    n <<= 1;            /* to get BLKSIZE, because of 3DNow! ASM routine */
Packit 47f805
    fn = fz + n;
Packit 47f805
    k4 = 4;
Packit 47f805
    do {
Packit 47f805
        FLOAT   s1, c1;
Packit 47f805
        int     i, k1, k2, k3, kx;
Packit 47f805
        kx = k4 >> 1;
Packit 47f805
        k1 = k4;
Packit 47f805
        k2 = k4 << 1;
Packit 47f805
        k3 = k2 + k1;
Packit 47f805
        k4 = k2 << 1;
Packit 47f805
        fi = fz;
Packit 47f805
        gi = fi + kx;
Packit 47f805
        do {
Packit 47f805
            FLOAT   f0, f1, f2, f3;
Packit 47f805
            f1 = fi[0] - fi[k1];
Packit 47f805
            f0 = fi[0] + fi[k1];
Packit 47f805
            f3 = fi[k2] - fi[k3];
Packit 47f805
            f2 = fi[k2] + fi[k3];
Packit 47f805
            fi[k2] = f0 - f2;
Packit 47f805
            fi[0] = f0 + f2;
Packit 47f805
            fi[k3] = f1 - f3;
Packit 47f805
            fi[k1] = f1 + f3;
Packit 47f805
            f1 = gi[0] - gi[k1];
Packit 47f805
            f0 = gi[0] + gi[k1];
Packit 47f805
            f3 = SQRT2 * gi[k3];
Packit 47f805
            f2 = SQRT2 * gi[k2];
Packit 47f805
            gi[k2] = f0 - f2;
Packit 47f805
            gi[0] = f0 + f2;
Packit 47f805
            gi[k3] = f1 - f3;
Packit 47f805
            gi[k1] = f1 + f3;
Packit 47f805
            gi += k4;
Packit 47f805
            fi += k4;
Packit 47f805
        } while (fi < fn);
Packit 47f805
        c1 = tri[0];
Packit 47f805
        s1 = tri[1];
Packit 47f805
        for (i = 1; i < kx; i++) {
Packit 47f805
            FLOAT   c2, s2;
Packit 47f805
            c2 = 1 - (2 * s1) * s1;
Packit 47f805
            s2 = (2 * s1) * c1;
Packit 47f805
            fi = fz + i;
Packit 47f805
            gi = fz + k1 - i;
Packit 47f805
            do {
Packit 47f805
                FLOAT   a, b, g0, f0, f1, g1, f2, g2, f3, g3;
Packit 47f805
                b = s2 * fi[k1] - c2 * gi[k1];
Packit 47f805
                a = c2 * fi[k1] + s2 * gi[k1];
Packit 47f805
                f1 = fi[0] - a;
Packit 47f805
                f0 = fi[0] + a;
Packit 47f805
                g1 = gi[0] - b;
Packit 47f805
                g0 = gi[0] + b;
Packit 47f805
                b = s2 * fi[k3] - c2 * gi[k3];
Packit 47f805
                a = c2 * fi[k3] + s2 * gi[k3];
Packit 47f805
                f3 = fi[k2] - a;
Packit 47f805
                f2 = fi[k2] + a;
Packit 47f805
                g3 = gi[k2] - b;
Packit 47f805
                g2 = gi[k2] + b;
Packit 47f805
                b = s1 * f2 - c1 * g3;
Packit 47f805
                a = c1 * f2 + s1 * g3;
Packit 47f805
                fi[k2] = f0 - a;
Packit 47f805
                fi[0] = f0 + a;
Packit 47f805
                gi[k3] = g1 - b;
Packit 47f805
                gi[k1] = g1 + b;
Packit 47f805
                b = c1 * g2 - s1 * f3;
Packit 47f805
                a = s1 * g2 + c1 * f3;
Packit 47f805
                gi[k2] = g0 - a;
Packit 47f805
                gi[0] = g0 + a;
Packit 47f805
                fi[k3] = f1 - b;
Packit 47f805
                fi[k1] = f1 + b;
Packit 47f805
                gi += k4;
Packit 47f805
                fi += k4;
Packit 47f805
            } while (fi < fn);
Packit 47f805
            c2 = c1;
Packit 47f805
            c1 = c2 * tri[0] - s1 * tri[1];
Packit 47f805
            s1 = c2 * tri[1] + s1 * tri[0];
Packit 47f805
        }
Packit 47f805
        tri += 2;
Packit 47f805
    } while (k4 < n);
Packit 47f805
}
Packit 47f805
Packit 47f805
Packit 47f805
static const unsigned char rv_tbl[] = {
Packit 47f805
    0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
Packit 47f805
    0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
Packit 47f805
    0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
Packit 47f805
    0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
Packit 47f805
    0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
Packit 47f805
    0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
Packit 47f805
    0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
Packit 47f805
    0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
Packit 47f805
    0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
Packit 47f805
    0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
Packit 47f805
    0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
Packit 47f805
    0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
Packit 47f805
    0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
Packit 47f805
    0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
Packit 47f805
    0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
Packit 47f805
    0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe
Packit 47f805
};
Packit 47f805
Packit 47f805
#define ch01(index)  (buffer[chn][index])
Packit 47f805
Packit 47f805
#define ml00(f) (window[i        ] * f(i))
Packit 47f805
#define ml10(f) (window[i + 0x200] * f(i + 0x200))
Packit 47f805
#define ml20(f) (window[i + 0x100] * f(i + 0x100))
Packit 47f805
#define ml30(f) (window[i + 0x300] * f(i + 0x300))
Packit 47f805
Packit 47f805
#define ml01(f) (window[i + 0x001] * f(i + 0x001))
Packit 47f805
#define ml11(f) (window[i + 0x201] * f(i + 0x201))
Packit 47f805
#define ml21(f) (window[i + 0x101] * f(i + 0x101))
Packit 47f805
#define ml31(f) (window[i + 0x301] * f(i + 0x301))
Packit 47f805
Packit 47f805
#define ms00(f) (window_s[i       ] * f(i + k))
Packit 47f805
#define ms10(f) (window_s[0x7f - i] * f(i + k + 0x80))
Packit 47f805
#define ms20(f) (window_s[i + 0x40] * f(i + k + 0x40))
Packit 47f805
#define ms30(f) (window_s[0x3f - i] * f(i + k + 0xc0))
Packit 47f805
Packit 47f805
#define ms01(f) (window_s[i + 0x01] * f(i + k + 0x01))
Packit 47f805
#define ms11(f) (window_s[0x7e - i] * f(i + k + 0x81))
Packit 47f805
#define ms21(f) (window_s[i + 0x41] * f(i + k + 0x41))
Packit 47f805
#define ms31(f) (window_s[0x3e - i] * f(i + k + 0xc1))
Packit 47f805
Packit 47f805
void
Packit 47f805
fft_short(lame_internal_flags const *const gfc,
Packit 47f805
          FLOAT x_real[3][BLKSIZE_s], int chn, const sample_t *const buffer[2])
Packit 47f805
{
Packit 47f805
    int     i;
Packit 47f805
    int     j;
Packit 47f805
    int     b;
Packit 47f805
Packit 47f805
#define window_s gfc->cd_psy->window_s
Packit 47f805
#define window gfc->cd_psy->window
Packit 47f805
Packit 47f805
    for (b = 0; b < 3; b++) {
Packit 47f805
        FLOAT  *x = &x_real[b][BLKSIZE_s / 2];
Packit 47f805
        short const k = (576 / 3) * (b + 1);
Packit 47f805
        j = BLKSIZE_s / 8 - 1;
Packit 47f805
        do {
Packit 47f805
            FLOAT   f0, f1, f2, f3, w;
Packit 47f805
Packit 47f805
            i = rv_tbl[j << 2];
Packit 47f805
Packit 47f805
            f0 = ms00(ch01);
Packit 47f805
            w = ms10(ch01);
Packit 47f805
            f1 = f0 - w;
Packit 47f805
            f0 = f0 + w;
Packit 47f805
            f2 = ms20(ch01);
Packit 47f805
            w = ms30(ch01);
Packit 47f805
            f3 = f2 - w;
Packit 47f805
            f2 = f2 + w;
Packit 47f805
Packit 47f805
            x -= 4;
Packit 47f805
            x[0] = f0 + f2;
Packit 47f805
            x[2] = f0 - f2;
Packit 47f805
            x[1] = f1 + f3;
Packit 47f805
            x[3] = f1 - f3;
Packit 47f805
Packit 47f805
            f0 = ms01(ch01);
Packit 47f805
            w = ms11(ch01);
Packit 47f805
            f1 = f0 - w;
Packit 47f805
            f0 = f0 + w;
Packit 47f805
            f2 = ms21(ch01);
Packit 47f805
            w = ms31(ch01);
Packit 47f805
            f3 = f2 - w;
Packit 47f805
            f2 = f2 + w;
Packit 47f805
Packit 47f805
            x[BLKSIZE_s / 2 + 0] = f0 + f2;
Packit 47f805
            x[BLKSIZE_s / 2 + 2] = f0 - f2;
Packit 47f805
            x[BLKSIZE_s / 2 + 1] = f1 + f3;
Packit 47f805
            x[BLKSIZE_s / 2 + 3] = f1 - f3;
Packit 47f805
        } while (--j >= 0);
Packit 47f805
Packit 47f805
#undef window
Packit 47f805
#undef window_s
Packit 47f805
Packit 47f805
        gfc->fft_fht(x, BLKSIZE_s / 2);
Packit 47f805
        /* BLKSIZE_s/2 because of 3DNow! ASM routine */
Packit 47f805
    }
Packit 47f805
}
Packit 47f805
Packit 47f805
void
Packit 47f805
fft_long(lame_internal_flags const *const gfc,
Packit 47f805
         FLOAT x[BLKSIZE], int chn, const sample_t *const buffer[2])
Packit 47f805
{
Packit 47f805
    int     i;
Packit 47f805
    int     jj = BLKSIZE / 8 - 1;
Packit 47f805
    x += BLKSIZE / 2;
Packit 47f805
Packit 47f805
#define window_s gfc->cd_psy->window_s
Packit 47f805
#define window gfc->cd_psy->window
Packit 47f805
Packit 47f805
    do {
Packit 47f805
        FLOAT   f0, f1, f2, f3, w;
Packit 47f805
Packit 47f805
        i = rv_tbl[jj];
Packit 47f805
        f0 = ml00(ch01);
Packit 47f805
        w = ml10(ch01);
Packit 47f805
        f1 = f0 - w;
Packit 47f805
        f0 = f0 + w;
Packit 47f805
        f2 = ml20(ch01);
Packit 47f805
        w = ml30(ch01);
Packit 47f805
        f3 = f2 - w;
Packit 47f805
        f2 = f2 + w;
Packit 47f805
Packit 47f805
        x -= 4;
Packit 47f805
        x[0] = f0 + f2;
Packit 47f805
        x[2] = f0 - f2;
Packit 47f805
        x[1] = f1 + f3;
Packit 47f805
        x[3] = f1 - f3;
Packit 47f805
Packit 47f805
        f0 = ml01(ch01);
Packit 47f805
        w = ml11(ch01);
Packit 47f805
        f1 = f0 - w;
Packit 47f805
        f0 = f0 + w;
Packit 47f805
        f2 = ml21(ch01);
Packit 47f805
        w = ml31(ch01);
Packit 47f805
        f3 = f2 - w;
Packit 47f805
        f2 = f2 + w;
Packit 47f805
Packit 47f805
        x[BLKSIZE / 2 + 0] = f0 + f2;
Packit 47f805
        x[BLKSIZE / 2 + 2] = f0 - f2;
Packit 47f805
        x[BLKSIZE / 2 + 1] = f1 + f3;
Packit 47f805
        x[BLKSIZE / 2 + 3] = f1 - f3;
Packit 47f805
    } while (--jj >= 0);
Packit 47f805
Packit 47f805
#undef window
Packit 47f805
#undef window_s
Packit 47f805
Packit 47f805
    gfc->fft_fht(x, BLKSIZE / 2);
Packit 47f805
    /* BLKSIZE/2 because of 3DNow! ASM routine */
Packit 47f805
}
Packit 47f805
Packit 47f805
#ifdef HAVE_NASM
Packit 47f805
extern void fht_3DN(FLOAT * fz, int n);
Packit 47f805
extern void fht_SSE(FLOAT * fz, int n);
Packit 47f805
#endif
Packit 47f805
Packit 47f805
void
Packit 47f805
init_fft(lame_internal_flags * const gfc)
Packit 47f805
{
Packit 47f805
    int     i;
Packit 47f805
Packit 47f805
    /* The type of window used here will make no real difference, but */
Packit 47f805
    /* in the interest of merging nspsytune stuff - switch to blackman window */
Packit 47f805
    for (i = 0; i < BLKSIZE; i++)
Packit 47f805
        /* blackman window */
Packit 47f805
        gfc->cd_psy->window[i] = 0.42 - 0.5 * cos(2 * PI * (i + .5) / BLKSIZE) +
Packit 47f805
            0.08 * cos(4 * PI * (i + .5) / BLKSIZE);
Packit 47f805
Packit 47f805
    for (i = 0; i < BLKSIZE_s / 2; i++)
Packit 47f805
        gfc->cd_psy->window_s[i] = 0.5 * (1.0 - cos(2.0 * PI * (i + 0.5) / BLKSIZE_s));
Packit 47f805
Packit 47f805
    gfc->fft_fht = fht;
Packit 47f805
#ifdef HAVE_NASM
Packit 47f805
    if (gfc->CPU_features.AMD_3DNow) {
Packit 47f805
        gfc->fft_fht = fht_3DN;
Packit 47f805
    }
Packit 47f805
    else if (gfc->CPU_features.SSE) {
Packit 47f805
        gfc->fft_fht = fht_SSE;
Packit 47f805
    }
Packit 47f805
    else {
Packit 47f805
        gfc->fft_fht = fht;
Packit 47f805
    }
Packit 47f805
#else
Packit 47f805
#ifdef HAVE_XMMINTRIN_H
Packit 47f805
#ifdef MIN_ARCH_SSE
Packit 47f805
    gfc->fft_fht = fht_SSE2;
Packit 47f805
#endif
Packit 47f805
#endif
Packit 47f805
#endif
Packit 47f805
}