/* statistics/test_robust.c * * Copyright (C) 2018 Patrick Alken * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include #include #include int test_robust (void); /* random vector in [-1,1] */ static int random_array(const size_t n, double * x, gsl_rng * r) { size_t i; for (i = 0; i < n; ++i) x[i] = 2.0 * gsl_rng_uniform(r) - 1.0; return 0; } /* calculate MAD statistic for input vector using slow/naive algorithm */ static double slow_MAD(const size_t n, const double x[]) { double *work = malloc(n * sizeof(double)); double median, mad; size_t i; for (i = 0; i < n; ++i) work[i] = x[i]; gsl_sort(work, 1, n); median = gsl_stats_median_from_sorted_data(work, 1, n); for (i = 0; i < n; ++i) work[i] = fabs(x[i] - median); gsl_sort(work, 1, n); mad = gsl_stats_median_from_sorted_data(work, 1, n); free(work); return mad; } /* calculate S_n statistic for input vector using slow/naive algorithm */ static double slow_Sn0(const size_t n, const double x[]) { double *work1 = malloc(n * sizeof(double)); double *work2 = malloc(n * sizeof(double)); double Sn; size_t i, j; for (i = 0; i < n; ++i) { for (j = 0; j < n; ++j) work1[j] = fabs(x[i] - x[j]); /* find himed_j | x_i - x_j | */ gsl_sort(work1, 1, n); work2[i] = work1[n / 2]; } /* find lomed_i { himed_j | x_i - x_j | } */ gsl_sort(work2, 1, n); Sn = work2[(n + 1) / 2 - 1]; free(work1); free(work2); return Sn; } /* calculate Q_n statistic for input vector using slow/naive algorithm */ static double slow_Qn0(const size_t n, const double x[]) { const size_t wsize = n * (n - 1) / 2; const size_t n_2 = n / 2; const size_t k = ((n_2 + 1) * n_2) / 2; double *work; double Qn; size_t idx = 0; size_t i, j; if (n < 2) return (0.0); work = malloc(wsize * sizeof(double)); for (i = 0; i < n; ++i) { for (j = i + 1; j < n; ++j) work[idx++] = fabs(x[i] - x[j]); } gsl_sort(work, 1, idx); Qn = work[k - 1]; free(work); return Qn; } static int test_median(const double tol, const size_t n, gsl_rng * r) { double * x = malloc(n * sizeof(double)); double median1, median2; random_array(n, x, r); median1 = gsl_stats_median(x, 1, n); gsl_sort(x, 1, n); median2 = gsl_stats_median_from_sorted_data(x, 1, n); gsl_test_rel(median1, median2, tol, "test_median n=%zu", n); free(x); return 0; } static int test_mad(const double tol, const size_t n, gsl_rng * r) { double * x = malloc(n * sizeof(double)); double * work = malloc(n * sizeof(double)); double mad1, mad2; random_array(n, x, r); mad1 = slow_MAD(n, x); gsl_sort(x, 1, n); mad2 = gsl_stats_mad0(x, 1, n, work); gsl_test_rel(mad1, mad2, tol, "test_mad n=%zu", n); free(x); free(work); return 0; } static int test_Sn(const double tol, const size_t n, gsl_rng * r) { double * x = malloc(n * sizeof(double)); double * work = malloc(n * sizeof(double)); double Sn1, Sn2; random_array(n, x, r); /* compute S_n with slow/naive algorithm */ Sn1 = slow_Sn0(n, x); /* compute S_n with efficient algorithm */ gsl_sort(x, 1, n); Sn2 = gsl_stats_Sn0_from_sorted_data(x, 1, n, work); gsl_test_rel(Sn2, Sn1, tol, "test_Sn n=%zu", n); free(x); free(work); return 0; } static int test_Qn(const double tol, const size_t n, gsl_rng * r) { double * x = malloc(n * sizeof(double)); double * work = malloc(3 * n * sizeof(double)); int * work_int = malloc(5 * n * sizeof(int)); double Qn1, Qn2; random_array(n, x, r); /* compute Q_n with slow/naive algorithm */ Qn1 = slow_Qn0(n, x); /* compute Q_n with efficient algorithm */ gsl_sort(x, 1, n); Qn2 = gsl_stats_Qn0_from_sorted_data(x, 1, n, work, work_int); gsl_test_rel(Qn2, Qn1, tol, "test_Qn n=%zu", n); free(x); free(work); free(work_int); return 0; } int test_robust (void) { const double tol = 1.0e-12; gsl_rng * r = gsl_rng_alloc(gsl_rng_default); test_median(GSL_DBL_EPSILON, 1, r); test_median(GSL_DBL_EPSILON, 2, r); test_median(GSL_DBL_EPSILON, 3, r); test_median(GSL_DBL_EPSILON, 100, r); test_median(GSL_DBL_EPSILON, 101, r); test_median(GSL_DBL_EPSILON, 500, r); test_median(GSL_DBL_EPSILON, 501, r); test_mad(GSL_DBL_EPSILON, 1, r); test_mad(GSL_DBL_EPSILON, 2, r); test_mad(GSL_DBL_EPSILON, 3, r); test_mad(GSL_DBL_EPSILON, 100, r); test_mad(GSL_DBL_EPSILON, 101, r); test_mad(GSL_DBL_EPSILON, 500, r); test_mad(GSL_DBL_EPSILON, 501, r); test_Sn(tol, 1, r); test_Sn(tol, 2, r); test_Sn(tol, 3, r); test_Sn(tol, 100, r); test_Sn(tol, 101, r); test_Sn(tol, 500, r); test_Sn(tol, 501, r); test_Qn(tol, 1, r); test_Qn(tol, 2, r); test_Qn(tol, 3, r); test_Qn(tol, 4, r); test_Qn(tol, 5, r); test_Qn(tol, 100, r); test_Qn(tol, 101, r); test_Qn(tol, 500, r); test_Qn(tol, 501, r); gsl_rng_free(r); return 0; }