Blob Blame History Raw
/* statistics/test_robust.c
 * 
 * Copyright (C) 2018 Patrick Alken
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or (at
 * your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include <config.h>
#include <stdlib.h>
#include <math.h>

#include <gsl/gsl_math.h>
#include <gsl/gsl_test.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_sort.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_ieee_utils.h>

int test_robust (void);

/* random vector in [-1,1] */
static int
random_array(const size_t n, double * x, gsl_rng * r)
{
  size_t i;

  for (i = 0; i < n; ++i)
    x[i] = 2.0 * gsl_rng_uniform(r) - 1.0;

  return 0;
}

/* calculate MAD statistic for input vector using slow/naive algorithm */
static double
slow_MAD(const size_t n, const double x[])
{
  double *work = malloc(n * sizeof(double));
  double median, mad;
  size_t i;

  for (i = 0; i < n; ++i)
    work[i] = x[i];

  gsl_sort(work, 1, n);
  median = gsl_stats_median_from_sorted_data(work, 1, n);

  for (i = 0; i < n; ++i)
    work[i] = fabs(x[i] - median);

  gsl_sort(work, 1, n);
  mad = gsl_stats_median_from_sorted_data(work, 1, n);

  free(work);

  return mad;
}

/* calculate S_n statistic for input vector using slow/naive algorithm */
static double
slow_Sn0(const size_t n, const double x[])
{
  double *work1 = malloc(n * sizeof(double));
  double *work2 = malloc(n * sizeof(double));
  double Sn;
  size_t i, j;

  for (i = 0; i < n; ++i)
    {
      for (j = 0; j < n; ++j)
        work1[j] = fabs(x[i] - x[j]);

      /* find himed_j | x_i - x_j | */
      gsl_sort(work1, 1, n);
      work2[i] = work1[n / 2];
    }

  /* find lomed_i { himed_j | x_i - x_j | } */
  gsl_sort(work2, 1, n);
  Sn = work2[(n + 1) / 2 - 1];

  free(work1);
  free(work2);

  return Sn;
}

/* calculate Q_n statistic for input vector using slow/naive algorithm */
static double
slow_Qn0(const size_t n, const double x[])
{
  const size_t wsize = n * (n - 1) / 2;
  const size_t n_2 = n / 2;
  const size_t k = ((n_2 + 1) * n_2) / 2;
  double *work;
  double Qn;
  size_t idx = 0;
  size_t i, j;

  if (n < 2)
    return (0.0);

  work = malloc(wsize * sizeof(double));

  for (i = 0; i < n; ++i)
    {
      for (j = i + 1; j < n; ++j)
        work[idx++] = fabs(x[i] - x[j]);
    }

  gsl_sort(work, 1, idx);
  Qn = work[k - 1];

  free(work);

  return Qn;
}

static int
test_median(const double tol, const size_t n, gsl_rng * r)
{
  double * x = malloc(n * sizeof(double));
  double median1, median2;

  random_array(n, x, r);

  median1 = gsl_stats_median(x, 1, n);

  gsl_sort(x, 1, n);
  median2 = gsl_stats_median_from_sorted_data(x, 1, n);

  gsl_test_rel(median1, median2, tol, "test_median n=%zu", n);

  free(x);

  return 0;
}

static int
test_mad(const double tol, const size_t n, gsl_rng * r)
{
  double * x = malloc(n * sizeof(double));
  double * work = malloc(n * sizeof(double));
  double mad1, mad2;

  random_array(n, x, r);

  mad1 = slow_MAD(n, x);

  gsl_sort(x, 1, n);
  mad2 = gsl_stats_mad0(x, 1, n, work);

  gsl_test_rel(mad1, mad2, tol, "test_mad n=%zu", n);

  free(x);
  free(work);

  return 0;
}

static int
test_Sn(const double tol, const size_t n, gsl_rng * r)
{
  double * x = malloc(n * sizeof(double));
  double * work = malloc(n * sizeof(double));
  double Sn1, Sn2;

  random_array(n, x, r);

  /* compute S_n with slow/naive algorithm */
  Sn1 = slow_Sn0(n, x);

  /* compute S_n with efficient algorithm */
  gsl_sort(x, 1, n);
  Sn2 = gsl_stats_Sn0_from_sorted_data(x, 1, n, work);

  gsl_test_rel(Sn2, Sn1, tol, "test_Sn n=%zu", n);

  free(x);
  free(work);

  return 0;
}

static int
test_Qn(const double tol, const size_t n, gsl_rng * r)
{
  double * x = malloc(n * sizeof(double));
  double * work = malloc(3 * n * sizeof(double));
  int * work_int = malloc(5 * n * sizeof(int));
  double Qn1, Qn2;

  random_array(n, x, r);

  /* compute Q_n with slow/naive algorithm */
  Qn1 = slow_Qn0(n, x);

  /* compute Q_n with efficient algorithm */
  gsl_sort(x, 1, n);
  Qn2 = gsl_stats_Qn0_from_sorted_data(x, 1, n, work, work_int);

  gsl_test_rel(Qn2, Qn1, tol, "test_Qn n=%zu", n);

  free(x);
  free(work);
  free(work_int);

  return 0;
}

int
test_robust (void)
{
  const double tol = 1.0e-12;
  gsl_rng * r = gsl_rng_alloc(gsl_rng_default);

  test_median(GSL_DBL_EPSILON, 1, r);
  test_median(GSL_DBL_EPSILON, 2, r);
  test_median(GSL_DBL_EPSILON, 3, r);
  test_median(GSL_DBL_EPSILON, 100, r);
  test_median(GSL_DBL_EPSILON, 101, r);
  test_median(GSL_DBL_EPSILON, 500, r);
  test_median(GSL_DBL_EPSILON, 501, r);

  test_mad(GSL_DBL_EPSILON, 1, r);
  test_mad(GSL_DBL_EPSILON, 2, r);
  test_mad(GSL_DBL_EPSILON, 3, r);
  test_mad(GSL_DBL_EPSILON, 100, r);
  test_mad(GSL_DBL_EPSILON, 101, r);
  test_mad(GSL_DBL_EPSILON, 500, r);
  test_mad(GSL_DBL_EPSILON, 501, r);

  test_Sn(tol, 1, r);
  test_Sn(tol, 2, r);
  test_Sn(tol, 3, r);
  test_Sn(tol, 100, r);
  test_Sn(tol, 101, r);
  test_Sn(tol, 500, r);
  test_Sn(tol, 501, r);

  test_Qn(tol, 1, r);
  test_Qn(tol, 2, r);
  test_Qn(tol, 3, r);
  test_Qn(tol, 4, r);
  test_Qn(tol, 5, r);
  test_Qn(tol, 100, r);
  test_Qn(tol, 101, r);
  test_Qn(tol, 500, r);
  test_Qn(tol, 501, r);

  gsl_rng_free(r);

  return 0;
}