Blame statistics/test_robust.c

Packit 67cb25
/* statistics/test_robust.c
Packit 67cb25
 * 
Packit 67cb25
 * Copyright (C) 2018 Patrick Alken
Packit 67cb25
 * 
Packit 67cb25
 * This program is free software; you can redistribute it and/or modify
Packit 67cb25
 * it under the terms of the GNU General Public License as published by
Packit 67cb25
 * the Free Software Foundation; either version 3 of the License, or (at
Packit 67cb25
 * your option) any later version.
Packit 67cb25
 * 
Packit 67cb25
 * This program is distributed in the hope that it will be useful, but
Packit 67cb25
 * WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 67cb25
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 67cb25
 * General Public License for more details.
Packit 67cb25
 * 
Packit 67cb25
 * You should have received a copy of the GNU General Public License
Packit 67cb25
 * along with this program; if not, write to the Free Software
Packit 67cb25
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
Packit 67cb25
 */
Packit 67cb25
Packit 67cb25
#include <config.h>
Packit 67cb25
#include <stdlib.h>
Packit 67cb25
#include <math.h>
Packit 67cb25
Packit 67cb25
#include <gsl/gsl_math.h>
Packit 67cb25
#include <gsl/gsl_test.h>
Packit 67cb25
#include <gsl/gsl_errno.h>
Packit 67cb25
#include <gsl/gsl_statistics.h>
Packit 67cb25
#include <gsl/gsl_sort.h>
Packit 67cb25
#include <gsl/gsl_rng.h>
Packit 67cb25
#include <gsl/gsl_vector.h>
Packit 67cb25
#include <gsl/gsl_ieee_utils.h>
Packit 67cb25
Packit 67cb25
int test_robust (void);
Packit 67cb25
Packit 67cb25
/* random vector in [-1,1] */
Packit 67cb25
static int
Packit 67cb25
random_array(const size_t n, double * x, gsl_rng * r)
Packit 67cb25
{
Packit 67cb25
  size_t i;
Packit 67cb25
Packit 67cb25
  for (i = 0; i < n; ++i)
Packit 67cb25
    x[i] = 2.0 * gsl_rng_uniform(r) - 1.0;
Packit 67cb25
Packit 67cb25
  return 0;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/* calculate MAD statistic for input vector using slow/naive algorithm */
Packit 67cb25
static double
Packit 67cb25
slow_MAD(const size_t n, const double x[])
Packit 67cb25
{
Packit 67cb25
  double *work = malloc(n * sizeof(double));
Packit 67cb25
  double median, mad;
Packit 67cb25
  size_t i;
Packit 67cb25
Packit 67cb25
  for (i = 0; i < n; ++i)
Packit 67cb25
    work[i] = x[i];
Packit 67cb25
Packit 67cb25
  gsl_sort(work, 1, n);
Packit 67cb25
  median = gsl_stats_median_from_sorted_data(work, 1, n);
Packit 67cb25
Packit 67cb25
  for (i = 0; i < n; ++i)
Packit 67cb25
    work[i] = fabs(x[i] - median);
Packit 67cb25
Packit 67cb25
  gsl_sort(work, 1, n);
Packit 67cb25
  mad = gsl_stats_median_from_sorted_data(work, 1, n);
Packit 67cb25
Packit 67cb25
  free(work);
Packit 67cb25
Packit 67cb25
  return mad;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/* calculate S_n statistic for input vector using slow/naive algorithm */
Packit 67cb25
static double
Packit 67cb25
slow_Sn0(const size_t n, const double x[])
Packit 67cb25
{
Packit 67cb25
  double *work1 = malloc(n * sizeof(double));
Packit 67cb25
  double *work2 = malloc(n * sizeof(double));
Packit 67cb25
  double Sn;
Packit 67cb25
  size_t i, j;
Packit 67cb25
Packit 67cb25
  for (i = 0; i < n; ++i)
Packit 67cb25
    {
Packit 67cb25
      for (j = 0; j < n; ++j)
Packit 67cb25
        work1[j] = fabs(x[i] - x[j]);
Packit 67cb25
Packit 67cb25
      /* find himed_j | x_i - x_j | */
Packit 67cb25
      gsl_sort(work1, 1, n);
Packit 67cb25
      work2[i] = work1[n / 2];
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  /* find lomed_i { himed_j | x_i - x_j | } */
Packit 67cb25
  gsl_sort(work2, 1, n);
Packit 67cb25
  Sn = work2[(n + 1) / 2 - 1];
Packit 67cb25
Packit 67cb25
  free(work1);
Packit 67cb25
  free(work2);
Packit 67cb25
Packit 67cb25
  return Sn;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/* calculate Q_n statistic for input vector using slow/naive algorithm */
Packit 67cb25
static double
Packit 67cb25
slow_Qn0(const size_t n, const double x[])
Packit 67cb25
{
Packit 67cb25
  const size_t wsize = n * (n - 1) / 2;
Packit 67cb25
  const size_t n_2 = n / 2;
Packit 67cb25
  const size_t k = ((n_2 + 1) * n_2) / 2;
Packit 67cb25
  double *work;
Packit 67cb25
  double Qn;
Packit 67cb25
  size_t idx = 0;
Packit 67cb25
  size_t i, j;
Packit 67cb25
Packit 67cb25
  if (n < 2)
Packit 67cb25
    return (0.0);
Packit 67cb25
Packit 67cb25
  work = malloc(wsize * sizeof(double));
Packit 67cb25
Packit 67cb25
  for (i = 0; i < n; ++i)
Packit 67cb25
    {
Packit 67cb25
      for (j = i + 1; j < n; ++j)
Packit 67cb25
        work[idx++] = fabs(x[i] - x[j]);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  gsl_sort(work, 1, idx);
Packit 67cb25
  Qn = work[k - 1];
Packit 67cb25
Packit 67cb25
  free(work);
Packit 67cb25
Packit 67cb25
  return Qn;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
static int
Packit 67cb25
test_median(const double tol, const size_t n, gsl_rng * r)
Packit 67cb25
{
Packit 67cb25
  double * x = malloc(n * sizeof(double));
Packit 67cb25
  double median1, median2;
Packit 67cb25
Packit 67cb25
  random_array(n, x, r);
Packit 67cb25
Packit 67cb25
  median1 = gsl_stats_median(x, 1, n);
Packit 67cb25
Packit 67cb25
  gsl_sort(x, 1, n);
Packit 67cb25
  median2 = gsl_stats_median_from_sorted_data(x, 1, n);
Packit 67cb25
Packit 67cb25
  gsl_test_rel(median1, median2, tol, "test_median n=%zu", n);
Packit 67cb25
Packit 67cb25
  free(x);
Packit 67cb25
Packit 67cb25
  return 0;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
static int
Packit 67cb25
test_mad(const double tol, const size_t n, gsl_rng * r)
Packit 67cb25
{
Packit 67cb25
  double * x = malloc(n * sizeof(double));
Packit 67cb25
  double * work = malloc(n * sizeof(double));
Packit 67cb25
  double mad1, mad2;
Packit 67cb25
Packit 67cb25
  random_array(n, x, r);
Packit 67cb25
Packit 67cb25
  mad1 = slow_MAD(n, x);
Packit 67cb25
Packit 67cb25
  gsl_sort(x, 1, n);
Packit 67cb25
  mad2 = gsl_stats_mad0(x, 1, n, work);
Packit 67cb25
Packit 67cb25
  gsl_test_rel(mad1, mad2, tol, "test_mad n=%zu", n);
Packit 67cb25
Packit 67cb25
  free(x);
Packit 67cb25
  free(work);
Packit 67cb25
Packit 67cb25
  return 0;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
static int
Packit 67cb25
test_Sn(const double tol, const size_t n, gsl_rng * r)
Packit 67cb25
{
Packit 67cb25
  double * x = malloc(n * sizeof(double));
Packit 67cb25
  double * work = malloc(n * sizeof(double));
Packit 67cb25
  double Sn1, Sn2;
Packit 67cb25
Packit 67cb25
  random_array(n, x, r);
Packit 67cb25
Packit 67cb25
  /* compute S_n with slow/naive algorithm */
Packit 67cb25
  Sn1 = slow_Sn0(n, x);
Packit 67cb25
Packit 67cb25
  /* compute S_n with efficient algorithm */
Packit 67cb25
  gsl_sort(x, 1, n);
Packit 67cb25
  Sn2 = gsl_stats_Sn0_from_sorted_data(x, 1, n, work);
Packit 67cb25
Packit 67cb25
  gsl_test_rel(Sn2, Sn1, tol, "test_Sn n=%zu", n);
Packit 67cb25
Packit 67cb25
  free(x);
Packit 67cb25
  free(work);
Packit 67cb25
Packit 67cb25
  return 0;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
static int
Packit 67cb25
test_Qn(const double tol, const size_t n, gsl_rng * r)
Packit 67cb25
{
Packit 67cb25
  double * x = malloc(n * sizeof(double));
Packit 67cb25
  double * work = malloc(3 * n * sizeof(double));
Packit 67cb25
  int * work_int = malloc(5 * n * sizeof(int));
Packit 67cb25
  double Qn1, Qn2;
Packit 67cb25
Packit 67cb25
  random_array(n, x, r);
Packit 67cb25
Packit 67cb25
  /* compute Q_n with slow/naive algorithm */
Packit 67cb25
  Qn1 = slow_Qn0(n, x);
Packit 67cb25
Packit 67cb25
  /* compute Q_n with efficient algorithm */
Packit 67cb25
  gsl_sort(x, 1, n);
Packit 67cb25
  Qn2 = gsl_stats_Qn0_from_sorted_data(x, 1, n, work, work_int);
Packit 67cb25
Packit 67cb25
  gsl_test_rel(Qn2, Qn1, tol, "test_Qn n=%zu", n);
Packit 67cb25
Packit 67cb25
  free(x);
Packit 67cb25
  free(work);
Packit 67cb25
  free(work_int);
Packit 67cb25
Packit 67cb25
  return 0;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
int
Packit 67cb25
test_robust (void)
Packit 67cb25
{
Packit 67cb25
  const double tol = 1.0e-12;
Packit 67cb25
  gsl_rng * r = gsl_rng_alloc(gsl_rng_default);
Packit 67cb25
Packit 67cb25
  test_median(GSL_DBL_EPSILON, 1, r);
Packit 67cb25
  test_median(GSL_DBL_EPSILON, 2, r);
Packit 67cb25
  test_median(GSL_DBL_EPSILON, 3, r);
Packit 67cb25
  test_median(GSL_DBL_EPSILON, 100, r);
Packit 67cb25
  test_median(GSL_DBL_EPSILON, 101, r);
Packit 67cb25
  test_median(GSL_DBL_EPSILON, 500, r);
Packit 67cb25
  test_median(GSL_DBL_EPSILON, 501, r);
Packit 67cb25
Packit 67cb25
  test_mad(GSL_DBL_EPSILON, 1, r);
Packit 67cb25
  test_mad(GSL_DBL_EPSILON, 2, r);
Packit 67cb25
  test_mad(GSL_DBL_EPSILON, 3, r);
Packit 67cb25
  test_mad(GSL_DBL_EPSILON, 100, r);
Packit 67cb25
  test_mad(GSL_DBL_EPSILON, 101, r);
Packit 67cb25
  test_mad(GSL_DBL_EPSILON, 500, r);
Packit 67cb25
  test_mad(GSL_DBL_EPSILON, 501, r);
Packit 67cb25
Packit 67cb25
  test_Sn(tol, 1, r);
Packit 67cb25
  test_Sn(tol, 2, r);
Packit 67cb25
  test_Sn(tol, 3, r);
Packit 67cb25
  test_Sn(tol, 100, r);
Packit 67cb25
  test_Sn(tol, 101, r);
Packit 67cb25
  test_Sn(tol, 500, r);
Packit 67cb25
  test_Sn(tol, 501, r);
Packit 67cb25
Packit 67cb25
  test_Qn(tol, 1, r);
Packit 67cb25
  test_Qn(tol, 2, r);
Packit 67cb25
  test_Qn(tol, 3, r);
Packit 67cb25
  test_Qn(tol, 4, r);
Packit 67cb25
  test_Qn(tol, 5, r);
Packit 67cb25
  test_Qn(tol, 100, r);
Packit 67cb25
  test_Qn(tol, 101, r);
Packit 67cb25
  test_Qn(tol, 500, r);
Packit 67cb25
  test_Qn(tol, 501, r);
Packit 67cb25
Packit 67cb25
  gsl_rng_free(r);
Packit 67cb25
Packit 67cb25
  return 0;
Packit 67cb25
}