/* stat.c -- statistical tests of random number sequences. */ /* Copyright 1999, 2000 Free Software Foundation, Inc. This file is part of the GNU MP Library test suite. The GNU MP Library test suite is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MP Library test suite is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */ /* Examples: $ gen 1000 | stat Test 1000 real numbers. $ gen 30000 | stat -2 1000 Test 1000 real numbers 30 times and then test the 30 results in a ``second level''. $ gen -f mpz_urandomb 1000 | stat -i 0xffffffff Test 1000 integers 0 <= X <= 2^32-1. $ gen -f mpz_urandomb -z 34 1000 | stat -i 0x3ffffffff Test 1000 integers 0 <= X <= 2^34-1. */ #include #include #include #include #include "gmp.h" #include "gmpstat.h" #if !HAVE_DECL_OPTARG extern char *optarg; extern int optind, opterr; #endif #define FVECSIZ (100000L) int g_debug = 0; static void print_ks_results (mpf_t f_p, mpf_t f_p_prob, mpf_t f_m, mpf_t f_m_prob, FILE *fp) { double p, pp, m, mp; p = mpf_get_d (f_p); m = mpf_get_d (f_m); pp = mpf_get_d (f_p_prob); mp = mpf_get_d (f_m_prob); fprintf (fp, "%.4f (%.0f%%)\t", p, pp * 100.0); fprintf (fp, "%.4f (%.0f%%)\n", m, mp * 100.0); } static void print_x2_table (unsigned int v, FILE *fp) { double t[7]; int f; fprintf (fp, "Chi-square table for v=%u\n", v); fprintf (fp, "1%%\t5%%\t25%%\t50%%\t75%%\t95%%\t99%%\n"); x2_table (t, v); for (f = 0; f < 7; f++) fprintf (fp, "%.2f\t", t[f]); fputs ("\n", fp); } /* Pks () -- Distribution function for KS results with a big n (like 1000 or so): F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51]. */ /* gnuplot: plot [0:1] Pks(x), Pks(x) = 1-exp(-2*x**2) */ static void Pks (mpf_t p, mpf_t x) { double dt; /* temp double */ mpf_set (p, x); mpf_mul (p, p, p); /* p = x^2 */ mpf_mul_ui (p, p, 2); /* p = 2*x^2 */ mpf_neg (p, p); /* p = -2*x^2 */ /* No pow() in gmp. Use doubles. */ /* FIXME: Use exp()? */ dt = pow (M_E, mpf_get_d (p)); mpf_set_d (p, dt); mpf_ui_sub (p, 1, p); } /* f_freq() -- frequency test on real numbers 0<=f<1*/ static void f_freq (const unsigned l1runs, const unsigned l2runs, mpf_t fvec[], const unsigned long n) { unsigned f; mpf_t f_p, f_p_prob; mpf_t f_m, f_m_prob; mpf_t *l1res; /* level 1 result array */ mpf_init (f_p); mpf_init (f_m); mpf_init (f_p_prob); mpf_init (f_m_prob); /* Allocate space for 1st level results. */ l1res = (mpf_t *) malloc (l2runs * 2 * sizeof (mpf_t)); if (NULL == l1res) { fprintf (stderr, "stat: malloc failure\n"); exit (1); } printf ("\nEquidistribution/Frequency test on real numbers (0<=X<1):\n"); printf ("\tKp\t\tKm\n"); for (f = 0; f < l2runs; f++) { /* f_printvec (fvec, n); */ mpf_freqt (f_p, f_m, fvec + f * n, n); /* what's the probability of getting these results? */ ks_table (f_p_prob, f_p, n); ks_table (f_m_prob, f_m, n); if (l1runs == 0) { /*printf ("%u:\t", f + 1);*/ print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout); } else { /* save result */ mpf_init_set (l1res[f], f_p); mpf_init_set (l1res[f + l2runs], f_m); } } /* Now, apply the KS test on the results from the 1st level rounds with the distribution F(x) = 1 - pow(e, -2*x^2) [Knuth, vol 2, p.51] */ if (l1runs != 0) { /*printf ("-------------------------------------\n");*/ /* The Kp's. */ ks (f_p, f_m, l1res, Pks, l2runs); ks_table (f_p_prob, f_p, l2runs); ks_table (f_m_prob, f_m, l2runs); printf ("Kp:\t"); print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout); /* The Km's. */ ks (f_p, f_m, l1res + l2runs, Pks, l2runs); ks_table (f_p_prob, f_p, l2runs); ks_table (f_m_prob, f_m, l2runs); printf ("Km:\t"); print_ks_results (f_p, f_p_prob, f_m, f_m_prob, stdout); } mpf_clear (f_p); mpf_clear (f_m); mpf_clear (f_p_prob); mpf_clear (f_m_prob); free (l1res); } /* z_freq(l1runs, l2runs, zvec, n, max) -- frequency test on integers 0<=z<=MAX */ static void z_freq (const unsigned l1runs, const unsigned l2runs, mpz_t zvec[], const unsigned long n, unsigned int max) { mpf_t V; /* result */ double d_V; /* result as a double */ mpf_init (V); printf ("\nEquidistribution/Frequency test on integers (0<=X<=%u):\n", max); print_x2_table (max, stdout); mpz_freqt (V, zvec, max, n); d_V = mpf_get_d (V); printf ("V = %.2f (n = %lu)\n", d_V, n); mpf_clear (V); } unsigned int stat_debug = 0; int main (argc, argv) int argc; char *argv[]; { const char usage[] = "usage: stat [-d] [-2 runs] [-i max | -r max] [file]\n" \ " file filename\n" \ " -2 runs perform 2-level test with RUNS runs on 1st level\n" \ " -d increase debugging level\n" \ " -i max input is integers 0 <= Z <= MAX\n" \ " -r max input is real numbers 0 <= R < 1 and use MAX as\n" \ " maximum value when converting real numbers to integers\n" \ ""; mpf_t fvec[FVECSIZ]; mpz_t zvec[FVECSIZ]; unsigned long int f, n, vecentries; char *filen; FILE *fp; int c; int omitoutput = 0; int realinput = -1; /* 1: input is real numbers 0<=R<1; 0: input is integers 0 <= Z <= MAX. */ long l1runs = 0, /* 1st level runs */ l2runs = 1; /* 2nd level runs */ mpf_t f_temp; mpz_t z_imax; /* max value when converting between real number and integer. */ mpf_t f_imax_plus1; /* f_imax + 1 stored in an mpf_t for convenience */ mpf_t f_imax_minus1; /* f_imax - 1 stored in an mpf_t for convenience */ mpf_init (f_temp); mpz_init_set_ui (z_imax, 0x7fffffff); mpf_init (f_imax_plus1); mpf_init (f_imax_minus1); while ((c = getopt (argc, argv, "d2:i:r:")) != -1) switch (c) { case '2': l1runs = atol (optarg); l2runs = -1; /* set later on */ break; case 'd': /* increase debug level */ stat_debug++; break; case 'i': if (1 == realinput) { fputs ("stat: options -i and -r are mutually exclusive\n", stderr); exit (1); } if (mpz_set_str (z_imax, optarg, 0)) { fprintf (stderr, "stat: bad max value %s\n", optarg); exit (1); } realinput = 0; break; case 'r': if (0 == realinput) { fputs ("stat: options -i and -r are mutually exclusive\n", stderr); exit (1); } if (mpz_set_str (z_imax, optarg, 0)) { fprintf (stderr, "stat: bad max value %s\n", optarg); exit (1); } realinput = 1; break; case 'o': omitoutput = atoi (optarg); break; case '?': default: fputs (usage, stderr); exit (1); } argc -= optind; argv += optind; if (argc < 1) fp = stdin; else filen = argv[0]; if (fp != stdin) if (NULL == (fp = fopen (filen, "r"))) { perror (filen); exit (1); } if (-1 == realinput) realinput = 1; /* default is real numbers */ /* read file and fill appropriate vec */ if (1 == realinput) /* real input */ { for (f = 0; f < FVECSIZ ; f++) { mpf_init (fvec[f]); if (!mpf_inp_str (fvec[f], fp, 10)) break; } } else /* integer input */ { for (f = 0; f < FVECSIZ ; f++) { mpz_init (zvec[f]); if (!mpz_inp_str (zvec[f], fp, 10)) break; } } vecentries = n = f; /* number of entries read */ fclose (fp); if (FVECSIZ == f) fprintf (stderr, "stat: warning: discarding input due to lazy allocation "\ "of only %ld entries. sorry.\n", FVECSIZ); printf ("Got %lu numbers.\n", n); /* convert and fill the other vec */ /* since fvec[] contains 0<=f<1 and we want ivec[] to contain 0<=z<=imax and we are truncating all fractions when converting float to int, we have to add 1 to imax.*/ mpf_set_z (f_imax_plus1, z_imax); mpf_add_ui (f_imax_plus1, f_imax_plus1, 1); if (1 == realinput) /* fill zvec[] */ { for (f = 0; f < n; f++) { mpf_mul (f_temp, fvec[f], f_imax_plus1); mpz_init (zvec[f]); mpz_set_f (zvec[f], f_temp); /* truncating fraction */ if (stat_debug > 1) { mpz_out_str (stderr, 10, zvec[f]); fputs ("\n", stderr); } } } else /* integer input; fill fvec[] */ { /* mpf_set_z (f_imax_minus1, z_imax); mpf_sub_ui (f_imax_minus1, f_imax_minus1, 1);*/ for (f = 0; f < n; f++) { mpf_init (fvec[f]); mpf_set_z (fvec[f], zvec[f]); mpf_div (fvec[f], fvec[f], f_imax_plus1); if (stat_debug > 1) { mpf_out_str (stderr, 10, 0, fvec[f]); fputs ("\n", stderr); } } } /* 2 levels? */ if (1 != l2runs) { l2runs = n / l1runs; printf ("Doing %ld second level rounds "\ "with %ld entries in each round", l2runs, l1runs); if (n % l1runs) printf (" (discarding %ld entr%s)", n % l1runs, n % l1runs == 1 ? "y" : "ies"); puts ("."); n = l1runs; } #ifndef DONT_FFREQ f_freq (l1runs, l2runs, fvec, n); #endif #ifdef DO_ZFREQ z_freq (l1runs, l2runs, zvec, n, mpz_get_ui (z_imax)); #endif mpf_clear (f_temp); mpz_clear (z_imax); mpf_clear (f_imax_plus1); mpf_clear (f_imax_minus1); for (f = 0; f < vecentries; f++) { mpf_clear (fvec[f]); mpz_clear (zvec[f]); } return 0; }