|
Packit |
67cb25 |
/* statistics/test_robust.c
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* Copyright (C) 2018 Patrick Alken
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* This program is free software; you can redistribute it and/or modify
|
|
Packit |
67cb25 |
* it under the terms of the GNU General Public License as published by
|
|
Packit |
67cb25 |
* the Free Software Foundation; either version 3 of the License, or (at
|
|
Packit |
67cb25 |
* your option) any later version.
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* This program is distributed in the hope that it will be useful, but
|
|
Packit |
67cb25 |
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
67cb25 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
67cb25 |
* General Public License for more details.
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* You should have received a copy of the GNU General Public License
|
|
Packit |
67cb25 |
* along with this program; if not, write to the Free Software
|
|
Packit |
67cb25 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
Packit |
67cb25 |
*/
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
#include <config.h>
|
|
Packit |
67cb25 |
#include <stdlib.h>
|
|
Packit |
67cb25 |
#include <math.h>
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
#include <gsl/gsl_math.h>
|
|
Packit |
67cb25 |
#include <gsl/gsl_test.h>
|
|
Packit |
67cb25 |
#include <gsl/gsl_errno.h>
|
|
Packit |
67cb25 |
#include <gsl/gsl_statistics.h>
|
|
Packit |
67cb25 |
#include <gsl/gsl_sort.h>
|
|
Packit |
67cb25 |
#include <gsl/gsl_rng.h>
|
|
Packit |
67cb25 |
#include <gsl/gsl_vector.h>
|
|
Packit |
67cb25 |
#include <gsl/gsl_ieee_utils.h>
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
int test_robust (void);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* random vector in [-1,1] */
|
|
Packit |
67cb25 |
static int
|
|
Packit |
67cb25 |
random_array(const size_t n, double * x, gsl_rng * r)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
size_t i;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
for (i = 0; i < n; ++i)
|
|
Packit |
67cb25 |
x[i] = 2.0 * gsl_rng_uniform(r) - 1.0;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return 0;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* calculate MAD statistic for input vector using slow/naive algorithm */
|
|
Packit |
67cb25 |
static double
|
|
Packit |
67cb25 |
slow_MAD(const size_t n, const double x[])
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double *work = malloc(n * sizeof(double));
|
|
Packit |
67cb25 |
double median, mad;
|
|
Packit |
67cb25 |
size_t i;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
for (i = 0; i < n; ++i)
|
|
Packit |
67cb25 |
work[i] = x[i];
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
gsl_sort(work, 1, n);
|
|
Packit |
67cb25 |
median = gsl_stats_median_from_sorted_data(work, 1, n);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
for (i = 0; i < n; ++i)
|
|
Packit |
67cb25 |
work[i] = fabs(x[i] - median);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
gsl_sort(work, 1, n);
|
|
Packit |
67cb25 |
mad = gsl_stats_median_from_sorted_data(work, 1, n);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
free(work);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return mad;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* calculate S_n statistic for input vector using slow/naive algorithm */
|
|
Packit |
67cb25 |
static double
|
|
Packit |
67cb25 |
slow_Sn0(const size_t n, const double x[])
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double *work1 = malloc(n * sizeof(double));
|
|
Packit |
67cb25 |
double *work2 = malloc(n * sizeof(double));
|
|
Packit |
67cb25 |
double Sn;
|
|
Packit |
67cb25 |
size_t i, j;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
for (i = 0; i < n; ++i)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
for (j = 0; j < n; ++j)
|
|
Packit |
67cb25 |
work1[j] = fabs(x[i] - x[j]);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* find himed_j | x_i - x_j | */
|
|
Packit |
67cb25 |
gsl_sort(work1, 1, n);
|
|
Packit |
67cb25 |
work2[i] = work1[n / 2];
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* find lomed_i { himed_j | x_i - x_j | } */
|
|
Packit |
67cb25 |
gsl_sort(work2, 1, n);
|
|
Packit |
67cb25 |
Sn = work2[(n + 1) / 2 - 1];
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
free(work1);
|
|
Packit |
67cb25 |
free(work2);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return Sn;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* calculate Q_n statistic for input vector using slow/naive algorithm */
|
|
Packit |
67cb25 |
static double
|
|
Packit |
67cb25 |
slow_Qn0(const size_t n, const double x[])
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
const size_t wsize = n * (n - 1) / 2;
|
|
Packit |
67cb25 |
const size_t n_2 = n / 2;
|
|
Packit |
67cb25 |
const size_t k = ((n_2 + 1) * n_2) / 2;
|
|
Packit |
67cb25 |
double *work;
|
|
Packit |
67cb25 |
double Qn;
|
|
Packit |
67cb25 |
size_t idx = 0;
|
|
Packit |
67cb25 |
size_t i, j;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
if (n < 2)
|
|
Packit |
67cb25 |
return (0.0);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
work = malloc(wsize * sizeof(double));
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
for (i = 0; i < n; ++i)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
for (j = i + 1; j < n; ++j)
|
|
Packit |
67cb25 |
work[idx++] = fabs(x[i] - x[j]);
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
gsl_sort(work, 1, idx);
|
|
Packit |
67cb25 |
Qn = work[k - 1];
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
free(work);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return Qn;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
static int
|
|
Packit |
67cb25 |
test_median(const double tol, const size_t n, gsl_rng * r)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double * x = malloc(n * sizeof(double));
|
|
Packit |
67cb25 |
double median1, median2;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
random_array(n, x, r);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
median1 = gsl_stats_median(x, 1, n);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
gsl_sort(x, 1, n);
|
|
Packit |
67cb25 |
median2 = gsl_stats_median_from_sorted_data(x, 1, n);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
gsl_test_rel(median1, median2, tol, "test_median n=%zu", n);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
free(x);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return 0;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
static int
|
|
Packit |
67cb25 |
test_mad(const double tol, const size_t n, gsl_rng * r)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double * x = malloc(n * sizeof(double));
|
|
Packit |
67cb25 |
double * work = malloc(n * sizeof(double));
|
|
Packit |
67cb25 |
double mad1, mad2;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
random_array(n, x, r);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
mad1 = slow_MAD(n, x);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
gsl_sort(x, 1, n);
|
|
Packit |
67cb25 |
mad2 = gsl_stats_mad0(x, 1, n, work);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
gsl_test_rel(mad1, mad2, tol, "test_mad n=%zu", n);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
free(x);
|
|
Packit |
67cb25 |
free(work);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return 0;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
static int
|
|
Packit |
67cb25 |
test_Sn(const double tol, const size_t n, gsl_rng * r)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double * x = malloc(n * sizeof(double));
|
|
Packit |
67cb25 |
double * work = malloc(n * sizeof(double));
|
|
Packit |
67cb25 |
double Sn1, Sn2;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
random_array(n, x, r);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* compute S_n with slow/naive algorithm */
|
|
Packit |
67cb25 |
Sn1 = slow_Sn0(n, x);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* compute S_n with efficient algorithm */
|
|
Packit |
67cb25 |
gsl_sort(x, 1, n);
|
|
Packit |
67cb25 |
Sn2 = gsl_stats_Sn0_from_sorted_data(x, 1, n, work);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
gsl_test_rel(Sn2, Sn1, tol, "test_Sn n=%zu", n);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
free(x);
|
|
Packit |
67cb25 |
free(work);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return 0;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
static int
|
|
Packit |
67cb25 |
test_Qn(const double tol, const size_t n, gsl_rng * r)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double * x = malloc(n * sizeof(double));
|
|
Packit |
67cb25 |
double * work = malloc(3 * n * sizeof(double));
|
|
Packit |
67cb25 |
int * work_int = malloc(5 * n * sizeof(int));
|
|
Packit |
67cb25 |
double Qn1, Qn2;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
random_array(n, x, r);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* compute Q_n with slow/naive algorithm */
|
|
Packit |
67cb25 |
Qn1 = slow_Qn0(n, x);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* compute Q_n with efficient algorithm */
|
|
Packit |
67cb25 |
gsl_sort(x, 1, n);
|
|
Packit |
67cb25 |
Qn2 = gsl_stats_Qn0_from_sorted_data(x, 1, n, work, work_int);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
gsl_test_rel(Qn2, Qn1, tol, "test_Qn n=%zu", n);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
free(x);
|
|
Packit |
67cb25 |
free(work);
|
|
Packit |
67cb25 |
free(work_int);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return 0;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
int
|
|
Packit |
67cb25 |
test_robust (void)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
const double tol = 1.0e-12;
|
|
Packit |
67cb25 |
gsl_rng * r = gsl_rng_alloc(gsl_rng_default);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
test_median(GSL_DBL_EPSILON, 1, r);
|
|
Packit |
67cb25 |
test_median(GSL_DBL_EPSILON, 2, r);
|
|
Packit |
67cb25 |
test_median(GSL_DBL_EPSILON, 3, r);
|
|
Packit |
67cb25 |
test_median(GSL_DBL_EPSILON, 100, r);
|
|
Packit |
67cb25 |
test_median(GSL_DBL_EPSILON, 101, r);
|
|
Packit |
67cb25 |
test_median(GSL_DBL_EPSILON, 500, r);
|
|
Packit |
67cb25 |
test_median(GSL_DBL_EPSILON, 501, r);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
test_mad(GSL_DBL_EPSILON, 1, r);
|
|
Packit |
67cb25 |
test_mad(GSL_DBL_EPSILON, 2, r);
|
|
Packit |
67cb25 |
test_mad(GSL_DBL_EPSILON, 3, r);
|
|
Packit |
67cb25 |
test_mad(GSL_DBL_EPSILON, 100, r);
|
|
Packit |
67cb25 |
test_mad(GSL_DBL_EPSILON, 101, r);
|
|
Packit |
67cb25 |
test_mad(GSL_DBL_EPSILON, 500, r);
|
|
Packit |
67cb25 |
test_mad(GSL_DBL_EPSILON, 501, r);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
test_Sn(tol, 1, r);
|
|
Packit |
67cb25 |
test_Sn(tol, 2, r);
|
|
Packit |
67cb25 |
test_Sn(tol, 3, r);
|
|
Packit |
67cb25 |
test_Sn(tol, 100, r);
|
|
Packit |
67cb25 |
test_Sn(tol, 101, r);
|
|
Packit |
67cb25 |
test_Sn(tol, 500, r);
|
|
Packit |
67cb25 |
test_Sn(tol, 501, r);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
test_Qn(tol, 1, r);
|
|
Packit |
67cb25 |
test_Qn(tol, 2, r);
|
|
Packit |
67cb25 |
test_Qn(tol, 3, r);
|
|
Packit |
67cb25 |
test_Qn(tol, 4, r);
|
|
Packit |
67cb25 |
test_Qn(tol, 5, r);
|
|
Packit |
67cb25 |
test_Qn(tol, 100, r);
|
|
Packit |
67cb25 |
test_Qn(tol, 101, r);
|
|
Packit |
67cb25 |
test_Qn(tol, 500, r);
|
|
Packit |
67cb25 |
test_Qn(tol, 501, r);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
gsl_rng_free(r);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return 0;
|
|
Packit |
67cb25 |
}
|