Blame src/libmpg123/tabinit.c

Packit c32a2d
/*
Packit c32a2d
	tabinit.c: initialize tables...
Packit c32a2d
Packit c32a2d
	copyright ?-2008 by the mpg123 project - free software under the terms of the LGPL 2.1
Packit c32a2d
	see COPYING and AUTHORS files in distribution or http://mpg123.org
Packit c32a2d
	initially written by Michael Hipp
Packit c32a2d
*/
Packit c32a2d
Packit c32a2d
#include "mpg123lib_intern.h"
Packit c32a2d
#include "debug.h"
Packit c32a2d
Packit c32a2d
/* That altivec alignment part here should not hurt generic code, I hope */
Packit c32a2d
#ifdef OPT_ALTIVEC
Packit c32a2d
static ALIGNED(16) real cos64[16];
Packit c32a2d
static ALIGNED(16) real cos32[8];
Packit c32a2d
static ALIGNED(16) real cos16[4];
Packit c32a2d
static ALIGNED(16) real cos8[2];
Packit c32a2d
static ALIGNED(16) real cos4[1];
Packit c32a2d
#elif defined(REAL_IS_FIXED) && defined(PRECALC_TABLES)
Packit c32a2d
static real cos64[16] = 
Packit c32a2d
{
Packit c32a2d
	8398725,8480395,8647771,8909416,9279544,9780026,10443886,11321405,
Packit c32a2d
	12491246,14081950,16316987,19619946,24900150,34523836,57170182,170959967
Packit c32a2d
};
Packit c32a2d
static real cos32[8] =
Packit c32a2d
{
Packit c32a2d
	8429197,8766072,9511743,10851869,13223040,17795219,28897867,85583072
Packit c32a2d
};
Packit c32a2d
static real cos16[4] =
Packit c32a2d
{
Packit c32a2d
	8552951,10088893,15099095,42998586
Packit c32a2d
};
Packit c32a2d
static real cos8[2] =
Packit c32a2d
{
Packit c32a2d
	9079764,21920489
Packit c32a2d
};
Packit c32a2d
static real cos4[1] =
Packit c32a2d
{
Packit c32a2d
	11863283
Packit c32a2d
};
Packit c32a2d
#else
Packit c32a2d
static real cos64[16],cos32[8],cos16[4],cos8[2],cos4[1];
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
real *pnts[] = { cos64,cos32,cos16,cos8,cos4 };
Packit c32a2d
Packit c32a2d
Packit c32a2d
static long intwinbase[] = {
Packit c32a2d
     0,    -1,    -1,    -1,    -1,    -1,    -1,    -2,    -2,    -2,
Packit c32a2d
    -2,    -3,    -3,    -4,    -4,    -5,    -5,    -6,    -7,    -7,
Packit c32a2d
    -8,    -9,   -10,   -11,   -13,   -14,   -16,   -17,   -19,   -21,
Packit c32a2d
   -24,   -26,   -29,   -31,   -35,   -38,   -41,   -45,   -49,   -53,
Packit c32a2d
   -58,   -63,   -68,   -73,   -79,   -85,   -91,   -97,  -104,  -111,
Packit c32a2d
  -117,  -125,  -132,  -139,  -147,  -154,  -161,  -169,  -176,  -183,
Packit c32a2d
  -190,  -196,  -202,  -208,  -213,  -218,  -222,  -225,  -227,  -228,
Packit c32a2d
  -228,  -227,  -224,  -221,  -215,  -208,  -200,  -189,  -177,  -163,
Packit c32a2d
  -146,  -127,  -106,   -83,   -57,   -29,     2,    36,    72,   111,
Packit c32a2d
   153,   197,   244,   294,   347,   401,   459,   519,   581,   645,
Packit c32a2d
   711,   779,   848,   919,   991,  1064,  1137,  1210,  1283,  1356,
Packit c32a2d
  1428,  1498,  1567,  1634,  1698,  1759,  1817,  1870,  1919,  1962,
Packit c32a2d
  2001,  2032,  2057,  2075,  2085,  2087,  2080,  2063,  2037,  2000,
Packit c32a2d
  1952,  1893,  1822,  1739,  1644,  1535,  1414,  1280,  1131,   970,
Packit c32a2d
   794,   605,   402,   185,   -45,  -288,  -545,  -814, -1095, -1388,
Packit c32a2d
 -1692, -2006, -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788,
Packit c32a2d
 -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597, -7910, -8209,
Packit c32a2d
 -8491, -8755, -8998, -9219, -9416, -9585, -9727, -9838, -9916, -9959,
Packit c32a2d
 -9966, -9935, -9863, -9750, -9592, -9389, -9139, -8840, -8492, -8092,
Packit c32a2d
 -7640, -7134, -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082,
Packit c32a2d
   -70,   998,  2122,  3300,  4533,  5818,  7154,  8540,  9975, 11455,
Packit c32a2d
 12980, 14548, 16155, 17799, 19478, 21189, 22929, 24694, 26482, 28289,
Packit c32a2d
 30112, 31947, 33791, 35640, 37489, 39336, 41176, 43006, 44821, 46617,
Packit c32a2d
 48390, 50137, 51853, 53534, 55178, 56778, 58333, 59838, 61289, 62684,
Packit c32a2d
 64019, 65290, 66494, 67629, 68692, 69679, 70590, 71420, 72169, 72835,
Packit c32a2d
 73415, 73908, 74313, 74630, 74856, 74992, 75038 };
Packit c32a2d
Packit c32a2d
void prepare_decode_tables()
Packit c32a2d
{
Packit c32a2d
#if !defined(REAL_IS_FIXED) || !defined(PRECALC_TABLES)
Packit c32a2d
  int i,k,kr,divv;
Packit c32a2d
  real *costab;
Packit c32a2d
Packit c32a2d
  for(i=0;i<5;i++)
Packit c32a2d
  {
Packit c32a2d
    kr=0x10>>i; divv=0x40>>i;
Packit c32a2d
    costab = pnts[i];
Packit c32a2d
    for(k=0;k
Packit c32a2d
      costab[k] = DOUBLE_TO_REAL(1.0 / (2.0 * cos(M_PI * ((double) k * 2.0 + 1.0) / (double) divv)));
Packit c32a2d
  }
Packit c32a2d
#endif
Packit c32a2d
}
Packit c32a2d
Packit c32a2d
#ifdef OPT_MMXORSSE
Packit c32a2d
#if !defined(OPT_X86_64) && !defined(OPT_NEON) && !defined(OPT_NEON64) && !defined(OPT_AVX)
Packit c32a2d
void make_decode_tables_mmx_asm(long scaleval, float* decwin_mmx, float *decwins);
Packit c32a2d
void make_decode_tables_mmx(mpg123_handle *fr)
Packit c32a2d
{
Packit c32a2d
	debug("MMX decode tables");
Packit c32a2d
	/* Take care: The scale should be like before, when we didn't have float output all around. */
Packit c32a2d
	make_decode_tables_mmx_asm((long)((fr->lastscale < 0 ? fr->p.outscale : fr->lastscale)*SHORT_SCALE), fr->decwin_mmx, fr->decwins);
Packit c32a2d
	debug("MMX decode tables done");
Packit c32a2d
}
Packit c32a2d
#else
Packit c32a2d
Packit c32a2d
/* This mimics round() as defined in C99. We stay C89. */
Packit c32a2d
static int rounded(double f)
Packit c32a2d
{
Packit c32a2d
	return (int)(f>0 ? floor(f+0.5) : ceil(f-0.5));
Packit c32a2d
}
Packit c32a2d
Packit c32a2d
/* x86-64 doesn't use asm version */
Packit c32a2d
void make_decode_tables_mmx(mpg123_handle *fr)
Packit c32a2d
{
Packit c32a2d
	int i,j,val;
Packit c32a2d
	int idx = 0;
Packit c32a2d
	short *ptr = (short *)fr->decwins;
Packit c32a2d
	/* Scale is always based on 1.0 . */
Packit c32a2d
	double scaleval = -0.5*(fr->lastscale < 0 ? fr->p.outscale : fr->lastscale);
Packit c32a2d
	debug1("MMX decode tables with scaleval %g", scaleval);
Packit c32a2d
	for(i=0,j=0;i<256;i++,j++,idx+=32)
Packit c32a2d
	{
Packit c32a2d
		if(idx < 512+16)
Packit c32a2d
		fr->decwin_mmx[idx+16] = fr->decwin_mmx[idx] = DOUBLE_TO_REAL((double) intwinbase[j] * scaleval);
Packit c32a2d
		
Packit c32a2d
		if(i % 32 == 31)
Packit c32a2d
		idx -= 1023;
Packit c32a2d
		if(i % 64 == 63)
Packit c32a2d
		scaleval = - scaleval;
Packit c32a2d
	}
Packit c32a2d
	
Packit c32a2d
	for( /* i=256 */ ;i<512;i++,j--,idx+=32)
Packit c32a2d
	{
Packit c32a2d
		if(idx < 512+16)
Packit c32a2d
		fr->decwin_mmx[idx+16] = fr->decwin_mmx[idx] = DOUBLE_TO_REAL((double) intwinbase[j] * scaleval);
Packit c32a2d
		
Packit c32a2d
		if(i % 32 == 31)
Packit c32a2d
		idx -= 1023;
Packit c32a2d
		if(i % 64 == 63)
Packit c32a2d
		scaleval = - scaleval;
Packit c32a2d
	}
Packit c32a2d
	
Packit c32a2d
	for(i=0; i<512; i++) {
Packit c32a2d
		if(i&1) val = rounded(fr->decwin_mmx[i]*0.5);
Packit c32a2d
		else val = rounded(fr->decwin_mmx[i]*-0.5);
Packit c32a2d
		if(val > 32767) val = 32767;
Packit c32a2d
		else if(val < -32768) val = -32768;
Packit c32a2d
		ptr[i] = val;
Packit c32a2d
	}
Packit c32a2d
	for(i=512; i<512+32; i++) {
Packit c32a2d
		if(i&1) val = rounded(fr->decwin_mmx[i]*0.5);
Packit c32a2d
		else val = 0;
Packit c32a2d
		if(val > 32767) val = 32767;
Packit c32a2d
		else if(val < -32768) val = -32768;
Packit c32a2d
		ptr[i] = val;
Packit c32a2d
	}
Packit c32a2d
	for(i=0; i<512; i++) {
Packit c32a2d
		val = rounded(fr->decwin_mmx[511-i]*-0.5);
Packit c32a2d
		if(val > 32767) val = 32767;
Packit c32a2d
		else if(val < -32768) val = -32768;
Packit c32a2d
		ptr[512+32+i] = val;
Packit c32a2d
	}
Packit c32a2d
	debug("decode tables done");
Packit c32a2d
}
Packit c32a2d
#endif
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
#ifdef REAL_IS_FIXED
Packit c32a2d
/* Need saturating multiplication that keeps table values in 32 bit range,
Packit c32a2d
   with the option to swap sign at will (so -2**31 is out).
Packit c32a2d
   This code is far from the decoder core and so assembly optimization might
Packit c32a2d
   be overkill. */
Packit c32a2d
static int32_t sat_mul32(int32_t a, int32_t b)
Packit c32a2d
{
Packit c32a2d
	int64_t prod = (int64_t)a * (int64_t)b;
Packit c32a2d
	/* TODO: record the clipping? An extra flag? */
Packit c32a2d
	if(prod >  2147483647L) return  2147483647L;
Packit c32a2d
	if(prod < -2147483647L) return -2147483647L;
Packit c32a2d
	return (int32_t)prod;
Packit c32a2d
}
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
void make_decode_tables(mpg123_handle *fr)
Packit c32a2d
{
Packit c32a2d
	int i,j;
Packit c32a2d
	int idx = 0;
Packit c32a2d
	double scaleval;
Packit c32a2d
#ifdef REAL_IS_FIXED
Packit c32a2d
	real scaleval_long;
Packit c32a2d
#endif
Packit c32a2d
	/* Scale is always based on 1.0 . */
Packit c32a2d
	scaleval = -0.5*(fr->lastscale < 0 ? fr->p.outscale : fr->lastscale);
Packit c32a2d
	debug1("decode tables with scaleval %g", scaleval);
Packit c32a2d
#ifdef REAL_IS_FIXED
Packit c32a2d
	scaleval_long = DOUBLE_TO_REAL_15(scaleval);
Packit c32a2d
	debug1("decode table with fixed scaleval %li", (long)scaleval_long);
Packit c32a2d
	if(scaleval_long > 28618 || scaleval_long < -28618)
Packit c32a2d
	{
Packit c32a2d
		/* TODO: Limit the scaleval itself or limit the multiplication afterwards?
Packit c32a2d
		   The former basically disables significant amplification for fixed-point
Packit c32a2d
		   decoders, but avoids (possibly subtle) distortion. */
Packit c32a2d
		/* This would limit the amplification instead:
Packit c32a2d
		   scaleval_long = scaleval_long < 0 ? -28618 : 28618; */
Packit c32a2d
		if(NOQUIET) warning("Desired amplification may introduce distortion.");
Packit c32a2d
	}
Packit c32a2d
#endif
Packit c32a2d
	for(i=0,j=0;i<256;i++,j++,idx+=32)
Packit c32a2d
	{
Packit c32a2d
		if(idx < 512+16)
Packit c32a2d
#ifdef REAL_IS_FIXED
Packit c32a2d
		fr->decwin[idx+16] = fr->decwin[idx] =
Packit c32a2d
			REAL_SCALE_WINDOW(sat_mul32(intwinbase[j],scaleval_long));
Packit c32a2d
#else
Packit c32a2d
		fr->decwin[idx+16] = fr->decwin[idx] = DOUBLE_TO_REAL((double) intwinbase[j] * scaleval);
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
		if(i % 32 == 31)
Packit c32a2d
		idx -= 1023;
Packit c32a2d
		if(i % 64 == 63)
Packit c32a2d
#ifdef REAL_IS_FIXED
Packit c32a2d
		scaleval_long = - scaleval_long;
Packit c32a2d
#else
Packit c32a2d
		scaleval = - scaleval;
Packit c32a2d
#endif
Packit c32a2d
	}
Packit c32a2d
Packit c32a2d
	for( /* i=256 */ ;i<512;i++,j--,idx+=32)
Packit c32a2d
	{
Packit c32a2d
		if(idx < 512+16)
Packit c32a2d
#ifdef REAL_IS_FIXED
Packit c32a2d
		fr->decwin[idx+16] = fr->decwin[idx] =
Packit c32a2d
			REAL_SCALE_WINDOW(sat_mul32(intwinbase[j],scaleval_long));
Packit c32a2d
#else
Packit c32a2d
		fr->decwin[idx+16] = fr->decwin[idx] = DOUBLE_TO_REAL((double) intwinbase[j] * scaleval);
Packit c32a2d
#endif
Packit c32a2d
Packit c32a2d
		if(i % 32 == 31)
Packit c32a2d
		idx -= 1023;
Packit c32a2d
		if(i % 64 == 63)
Packit c32a2d
#ifdef REAL_IS_FIXED
Packit c32a2d
		scaleval_long = - scaleval_long;
Packit c32a2d
#else
Packit c32a2d
		scaleval = - scaleval;
Packit c32a2d
#endif
Packit c32a2d
	}
Packit c32a2d
#if defined(OPT_X86_64) || defined(OPT_ALTIVEC) || defined(OPT_SSE) || defined(OPT_SSE_VINTAGE) || defined(OPT_ARM) || defined(OPT_NEON) || defined(OPT_NEON64) || defined(OPT_AVX)
Packit c32a2d
	if(  fr->cpu_opts.type == x86_64
Packit c32a2d
	  || fr->cpu_opts.type == altivec
Packit c32a2d
	  || fr->cpu_opts.type == sse
Packit c32a2d
	  || fr->cpu_opts.type == sse_vintage
Packit c32a2d
	  || fr->cpu_opts.type == arm
Packit c32a2d
	  || fr->cpu_opts.type == neon
Packit c32a2d
	  || fr->cpu_opts.type == neon64
Packit c32a2d
	  || fr->cpu_opts.type == avx )
Packit c32a2d
	{ /* for float SSE / AltiVec / ARM decoder */
Packit c32a2d
		for(i=512; i<512+32; i++)
Packit c32a2d
		{
Packit c32a2d
			fr->decwin[i] = (i&1) ? fr->decwin[i] : 0;
Packit c32a2d
		}
Packit c32a2d
		for(i=0; i<512; i++)
Packit c32a2d
		{
Packit c32a2d
			fr->decwin[512+32+i] = -fr->decwin[511-i];
Packit c32a2d
		}
Packit c32a2d
#if defined(OPT_NEON) || defined(OPT_NEON64)
Packit c32a2d
		if(fr->cpu_opts.type == neon || fr->cpu_opts.type == neon64)
Packit c32a2d
		{
Packit c32a2d
			for(i=0; i<512; i+=2)
Packit c32a2d
			{
Packit c32a2d
				fr->decwin[i] = -fr->decwin[i];
Packit c32a2d
			}
Packit c32a2d
		}
Packit c32a2d
#endif
Packit c32a2d
	}
Packit c32a2d
#endif
Packit c32a2d
	debug("decode tables done");
Packit c32a2d
}
Packit c32a2d
Packit c32a2d
#ifndef NO_8BIT
Packit c32a2d
int make_conv16to8_table(mpg123_handle *fr)
Packit c32a2d
{
Packit c32a2d
  int i;
Packit c32a2d
	int mode = fr->af.dec_enc;
Packit c32a2d
Packit c32a2d
  /*
Packit c32a2d
   * ????: 8.0 is right but on SB cards '2.0' is a better value ???
Packit c32a2d
   */
Packit c32a2d
  const double mul = 8.0;
Packit c32a2d
Packit c32a2d
  if(!fr->conv16to8_buf){
Packit c32a2d
    fr->conv16to8_buf = (unsigned char *) malloc(8192);
Packit c32a2d
    if(!fr->conv16to8_buf) {
Packit c32a2d
      fr->err = MPG123_ERR_16TO8TABLE;
Packit c32a2d
      if(NOQUIET) error("Can't allocate 16 to 8 converter table!");
Packit c32a2d
      return -1;
Packit c32a2d
    }
Packit c32a2d
    fr->conv16to8 = fr->conv16to8_buf + 4096;
Packit c32a2d
  }
Packit c32a2d
Packit c32a2d
	switch(mode)
Packit c32a2d
	{
Packit c32a2d
	case MPG123_ENC_ULAW_8:
Packit c32a2d
	{
Packit c32a2d
		double m=127.0 / log(256.0);
Packit c32a2d
		int c1;
Packit c32a2d
Packit c32a2d
		for(i=-4096;i<4096;i++)
Packit c32a2d
		{
Packit c32a2d
			/* dunno whether this is a valid transformation rule ?!?!? */
Packit c32a2d
			if(i < 0)
Packit c32a2d
			c1 = 127 - (int) (log( 1.0 - 255.0 * (double) i*mul / 32768.0 ) * m);
Packit c32a2d
			else
Packit c32a2d
			c1 = 255 - (int) (log( 1.0 + 255.0 * (double) i*mul / 32768.0 ) * m);
Packit c32a2d
			if(c1 < 0 || c1 > 255)
Packit c32a2d
			{
Packit c32a2d
				if(NOQUIET) error2("Converror %d %d",i,c1);
Packit c32a2d
				return -1;
Packit c32a2d
			}
Packit c32a2d
			if(c1 == 0)
Packit c32a2d
			c1 = 2;
Packit c32a2d
			fr->conv16to8[i] = (unsigned char) c1;
Packit c32a2d
		}
Packit c32a2d
	}
Packit c32a2d
	break;
Packit c32a2d
	case MPG123_ENC_SIGNED_8:
Packit c32a2d
		for(i=-4096;i<4096;i++)
Packit c32a2d
		fr->conv16to8[i] = i>>5;
Packit c32a2d
	break;
Packit c32a2d
	case MPG123_ENC_UNSIGNED_8:
Packit c32a2d
		for(i=-4096;i<4096;i++)
Packit c32a2d
		fr->conv16to8[i] = (i>>5)+128;
Packit c32a2d
	break;
Packit c32a2d
	case MPG123_ENC_ALAW_8:
Packit c32a2d
	{
Packit c32a2d
		/*
Packit c32a2d
			Let's believe Wikipedia (http://en.wikipedia.org/wiki/G.711) that this
Packit c32a2d
			is the correct table:
Packit c32a2d
Packit c32a2d
			s0000000wxyza... 	n000wxyz  [0-31] -> [0-15]
Packit c32a2d
			s0000001wxyza... 	n001wxyz  [32-63] -> [16-31]
Packit c32a2d
			s000001wxyzab... 	n010wxyz  [64-127] -> [32-47]
Packit c32a2d
			s00001wxyzabc... 	n011wxyz  [128-255] -> [48-63]
Packit c32a2d
			s0001wxyzabcd... 	n100wxyz  [256-511] -> [64-79]
Packit c32a2d
			s001wxyzabcde... 	n101wxyz  [512-1023] -> [80-95]
Packit c32a2d
			s01wxyzabcdef... 	n110wxyz  [1024-2047] -> [96-111]
Packit c32a2d
			s1wxyzabcdefg... 	n111wxyz  [2048-4095] -> [112-127]
Packit c32a2d
Packit c32a2d
			Let's extend to -4096, too.
Packit c32a2d
			Also, bytes are xored with 0x55 for transmission.
Packit c32a2d
Packit c32a2d
			Since it sounds OK, I assume it is fine;-)
Packit c32a2d
		*/
Packit c32a2d
		for(i=0; i<64; ++i)
Packit c32a2d
		fr->conv16to8[i] = ((unsigned int)i)>>1;
Packit c32a2d
		for(i=64; i<128; ++i)
Packit c32a2d
		fr->conv16to8[i] = ((((unsigned int)i)>>2) & 0xf) | (2<<4);
Packit c32a2d
		for(i=128; i<256; ++i)
Packit c32a2d
		fr->conv16to8[i] = ((((unsigned int)i)>>3) & 0xf) | (3<<4);
Packit c32a2d
		for(i=256; i<512; ++i)
Packit c32a2d
		fr->conv16to8[i] = ((((unsigned int)i)>>4) & 0xf) | (4<<4);
Packit c32a2d
		for(i=512; i<1024; ++i)
Packit c32a2d
		fr->conv16to8[i] = ((((unsigned int)i)>>5) & 0xf) | (5<<4);
Packit c32a2d
		for(i=1024; i<2048; ++i)
Packit c32a2d
		fr->conv16to8[i] = ((((unsigned int)i)>>6) & 0xf) | (6<<4);
Packit c32a2d
		for(i=2048; i<4096; ++i)
Packit c32a2d
		fr->conv16to8[i] = ((((unsigned int)i)>>7) & 0xf) | (7<<4);
Packit c32a2d
Packit c32a2d
		for(i=-4095; i<0; ++i)
Packit c32a2d
		fr->conv16to8[i] = fr->conv16to8[-i] | 0x80;
Packit c32a2d
Packit c32a2d
		fr->conv16to8[-4096] = fr->conv16to8[-4095];
Packit c32a2d
Packit c32a2d
		for(i=-4096;i<4096;i++)
Packit c32a2d
		{
Packit c32a2d
			/* fr->conv16to8[i] = - i>>5; */
Packit c32a2d
			/* fprintf(stderr, "table %i %i\n", i<<AUSHIFT, fr->conv16to8[i]); */
Packit c32a2d
			fr->conv16to8[i] ^= 0x55;
Packit c32a2d
		}
Packit c32a2d
	}
Packit c32a2d
	break;
Packit c32a2d
	default:
Packit c32a2d
		fr->err = MPG123_ERR_16TO8TABLE;
Packit c32a2d
		if(NOQUIET) error("Unknown 8 bit encoding choice.");
Packit c32a2d
		return -1;
Packit c32a2d
	break;
Packit c32a2d
	}
Packit c32a2d
Packit c32a2d
	return 0;
Packit c32a2d
}
Packit c32a2d
#endif
Packit c32a2d