/* stat.c -- statistical tests of random number sequences. */

/*
Copyright 1999, 2000 Free Software Foundation, Inc.

This file is part of the GNU MP Library test suite.

The GNU MP Library test suite is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 3 of the License,
or (at your option) any later version.

The GNU MP Library test suite is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
Public License for more details.

You should have received a copy of the GNU General Public License along with
the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */

/* Examples:

  $ gen 1000 | stat
Test 1000 real numbers.

  $ gen 30000 | stat -2 1000
Test 1000 real numbers 30 times and then test the 30 results in a
``second level''.

  $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff
Test 1000 integers 0 <= X <= 2^32-1.

  $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff
Test 1000 integers 0 <= X <= 2^34-1.

*/

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include "gmp.h"
#include "gmpstat.h"

#if !HAVE_DECL_OPTARG
extern char *optarg;
extern int optind, opterr;
#endif

#define FVECSIZ (100000L)

int g_debug = 0;

static void
print_ks_results (mpf_t f_p, mpf_t f_p_prob,
		  mpf_t f_m, mpf_t f_m_prob,
		  FILE *fp)
{
  double p, pp, m, mp;

  p = mpf_get_d (f_p);
  m = mpf_get_d (f_m);
  pp = mpf_get_d (f_p_prob);
  mp = mpf_get_d (f_m_prob);

  fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0);
  fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0);
}

static void
print_x2_table (unsigned int v, FILE *fp)
{
  double t[7];
  int f;


  fprintf (fp, "Chi-square table for v=%u\n", v);
  fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n");
  x2_table (t, v);
  for (f = 0; f < 7; f++)
    fprintf (fp, "%.2f\t", t[f]);
  fputs ("\n", fp);
}


/* Pks () -- Distribution function for KS results with a big n (like 1000
   or so):  F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */
/* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2)  */

static void
Pks (mpf_t p, mpf_t x)
{
  double dt;			/* temp double */

  mpf_set (p, x);
  mpf_mul (p, p, p);		/* p = x^2 */
  mpf_mul_ui (p, p, 2);		/* p = 2*x^2 */
  mpf_neg (p, p);		/* p = -2*x^2 */
  /* No pow() in gmp.  Use doubles. */
  /* FIXME: Use exp()? */
  dt = pow (M_E, mpf_get_d (p));
  mpf_set_d (p, dt);
  mpf_ui_sub (p, 1, p);
}

/* f_freq() -- frequency test on real numbers 0<=f<1*/
static void
f_freq (const unsigned l1runs, const unsigned l2runs,
	mpf_t fvec[], const unsigned long n)
{
  unsigned f;
  mpf_t f_p, f_p_prob;
  mpf_t f_m, f_m_prob;
  mpf_t *l1res;			/* level 1 result array */

  mpf_init (f_p);  mpf_init (f_m);
  mpf_init (f_p_prob);  mpf_init (f_m_prob);


  /* Allocate space for 1st level results. */
  l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t));
  if (NULL == l1res)
    {
      fprintf (stderr, "stat: malloc failure\n");
      exit (1);
    }

  printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n");
  printf ("\tKp\t\tKm\n");

  for (f = 0; f < l2runs; f++)
    {
      /*  f_printvec (fvec, n); */
      mpf_freqt (f_p, f_m, fvec + f * n, n);

      /* what's the probability of getting these results? */
      ks_table (f_p_prob, f_p, n);
      ks_table (f_m_prob, f_m, n);

      if (l1runs == 0)
	{
	  /*printf ("%u:\t", f + 1);*/
	  print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
	}
      else
	{
	  /* save result */
	  mpf_init_set (l1res[f], f_p);
	  mpf_init_set (l1res[f + l2runs], f_m);
	}
    }

  /* Now, apply the KS test on the results from the 1st level rounds
     with the distribution
     F(x) = 1 - pow(e, -2*x^2)	[Knuth, vol 2, p.51] */

  if (l1runs != 0)
    {
      /*printf ("-------------------------------------\n");*/

      /* The Kp's. */
      ks (f_p, f_m, l1res, Pks, l2runs);
      ks_table (f_p_prob, f_p, l2runs);
      ks_table (f_m_prob, f_m, l2runs);
      printf ("Kp:\t");
      print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);

      /* The Km's. */
      ks (f_p, f_m, l1res + l2runs, Pks, l2runs);
      ks_table (f_p_prob, f_p, l2runs);
      ks_table (f_m_prob, f_m, l2runs);
      printf ("Km:\t");
      print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout);
    }

  mpf_clear (f_p);  mpf_clear (f_m);
  mpf_clear (f_p_prob);  mpf_clear (f_m_prob);
  free (l1res);
}

/* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers
   0<=z<=MAX */
static void
z_freq (const unsigned l1runs,
	const unsigned l2runs,
	mpz_t zvec[],
	const unsigned long n,
	unsigned int max)
{
  mpf_t V;			/* result */
  double d_V;			/* result as a double */

  mpf_init (V);


  printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max);
  print_x2_table (max, stdout);

  mpz_freqt (V, zvec, max, n);

  d_V = mpf_get_d (V);
  printf ("V = %.2f (n = %lu)\n", d_V, n);

  mpf_clear (V);
}

unsigned int stat_debug = 0;

int
main (argc, argv)
     int argc;
     char *argv[];
{
  const char usage[] =
    "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \
    "       file     filename\n" \
    "       -2 runs  perform 2-level test with RUNS runs on 1st level\n" \
    "       -d       increase debugging level\n" \
    "       -i max   input is integers 0 <= Z <= MAX\n" \
    "       -r max   input is real numbers 0 <= R < 1 and use MAX as\n" \
    "                maximum value when converting real numbers to integers\n" \
    "";

  mpf_t fvec[FVECSIZ];
  mpz_t zvec[FVECSIZ];
  unsigned long int f, n, vecentries;
  char *filen;
  FILE *fp;
  int c;
  int omitoutput = 0;
  int realinput = -1;		/* 1: input is real numbers 0<=R<1;
				   0: input is integers 0 <= Z <= MAX. */
  long l1runs = 0,		/* 1st level runs */
    l2runs = 1;			/* 2nd level runs */
  mpf_t f_temp;
  mpz_t z_imax;			/* max value when converting between
				   real number and integer. */
  mpf_t f_imax_plus1;		/* f_imax + 1 stored in an mpf_t for
				   convenience */
  mpf_t f_imax_minus1;		/* f_imax - 1 stored in an mpf_t for
				   convenience */


  mpf_init (f_temp);
  mpz_init_set_ui (z_imax, 0x7fffffff);
  mpf_init (f_imax_plus1);
  mpf_init (f_imax_minus1);

  while ((c = getopt (argc, argv, "d2:i:r:")) != -1)
    switch (c)
      {
      case '2':
	l1runs = atol (optarg);
	l2runs = -1;		/* set later on */
	break;
      case 'd':			/* increase debug level */
	stat_debug++;
	break;
      case 'i':
	if (1 == realinput)
	  {
	    fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
	    exit (1);
	  }
	if (mpz_set_str (z_imax, optarg, 0))
	  {
	    fprintf (stderr, "stat: bad max value %s\n", optarg);
	    exit (1);
	  }
	realinput = 0;
	break;
      case 'r':
	if (0 == realinput)
	  {
	    fputs ("stat: options -i and -r are mutually exclusive\n", stderr);
	    exit (1);
	  }
	if (mpz_set_str (z_imax, optarg, 0))
	  {
	    fprintf (stderr, "stat: bad max value %s\n", optarg);
	    exit (1);
	  }
	realinput = 1;
	break;
      case 'o':
	omitoutput = atoi (optarg);
	break;
      case '?':
      default:
	fputs (usage, stderr);
	exit (1);
      }
  argc -= optind;
  argv += optind;

  if (argc < 1)
    fp = stdin;
  else
    filen = argv[0];

  if (fp != stdin)
    if (NULL == (fp = fopen (filen, "r")))
      {
	perror (filen);
	exit (1);
      }

  if (-1 == realinput)
    realinput = 1;		/* default is real numbers */

  /* read file and fill appropriate vec */
  if (1 == realinput)		/* real input */
    {
      for (f = 0; f < FVECSIZ ; f++)
	{
	  mpf_init (fvec[f]);
	  if (!mpf_inp_str (fvec[f], fp, 10))
	    break;
	}
    }
  else				/* integer input */
    {
      for (f = 0; f < FVECSIZ ; f++)
	{
	  mpz_init (zvec[f]);
	  if (!mpz_inp_str (zvec[f], fp, 10))
	    break;
	}
    }
  vecentries = n = f;		/* number of entries read */
  fclose (fp);

  if (FVECSIZ == f)
    fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\
	     "of only %ld entries.  sorry.\n", FVECSIZ);

  printf ("Got %lu numbers.\n", n);

  /* convert and fill the other vec */
  /* since fvec[] contains 0<=f<1 and we want ivec[] to contain
     0<=z<=imax and we are truncating all fractions when
     converting float to int, we have to add 1 to imax.*/
  mpf_set_z (f_imax_plus1, z_imax);
  mpf_add_ui (f_imax_plus1, f_imax_plus1, 1);
  if (1 == realinput)		/* fill zvec[] */
    {
      for (f = 0; f < n; f++)
	{
	  mpf_mul (f_temp, fvec[f], f_imax_plus1);
	  mpz_init (zvec[f]);
	  mpz_set_f (zvec[f], f_temp); /* truncating fraction */
	  if (stat_debug > 1)
	    {
	      mpz_out_str (stderr, 10, zvec[f]);
	      fputs ("\n", stderr);
	    }
	}
    }
  else				/* integer input; fill fvec[] */
    {
      /*    mpf_set_z (f_imax_minus1, z_imax);
	    mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/
      for (f = 0; f < n; f++)
	{
	  mpf_init (fvec[f]);
	  mpf_set_z (fvec[f], zvec[f]);
	  mpf_div (fvec[f], fvec[f], f_imax_plus1);
	  if (stat_debug > 1)
	    {
	      mpf_out_str (stderr, 10, 0, fvec[f]);
	      fputs ("\n", stderr);
	    }
	}
    }

  /* 2 levels? */
  if (1 != l2runs)
    {
      l2runs = n / l1runs;
      printf ("Doing %ld second level rounds "\
	      "with %ld entries in each round", l2runs, l1runs);
      if (n % l1runs)
	printf (" (discarding %ld entr%s)", n % l1runs,
		n % l1runs == 1 ? "y" : "ies");
      puts (".");
      n = l1runs;
    }

#ifndef DONT_FFREQ
  f_freq (l1runs, l2runs, fvec, n);
#endif
#ifdef DO_ZFREQ
  z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax));
#endif

  mpf_clear (f_temp); mpz_clear (z_imax);
  mpf_clear (f_imax_plus1);
  mpf_clear (f_imax_minus1);
  for (f = 0; f < vecentries; f++)
    {
      mpf_clear (fvec[f]);
      mpz_clear (zvec[f]);
    }

  return 0;
}