|
Packit |
67cb25 |
/* statistics/Qn_source.c
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* Copyright (C) 2018 Patrick Alken
|
|
Packit |
67cb25 |
* Copyright (C) 2005, 2006, 2007 Martin Maechler, ETH Zurich
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* This program is free software; you can redistribute it and/or modify
|
|
Packit |
67cb25 |
* it under the terms of the GNU General Public License as published by
|
|
Packit |
67cb25 |
* the Free Software Foundation; either version 3 of the License, or (at
|
|
Packit |
67cb25 |
* your option) any later version.
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* This program is distributed in the hope that it will be useful, but
|
|
Packit |
67cb25 |
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
67cb25 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
67cb25 |
* General Public License for more details.
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* You should have received a copy of the GNU General Public License
|
|
Packit |
67cb25 |
* along with this program; if not, write to the Free Software
|
|
Packit |
67cb25 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
Packit |
67cb25 |
*/
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* This is a merge of the C version of original files qn.f and sn.f,
|
|
Packit |
67cb25 |
* translated by f2c (version 20010821). ==== ====
|
|
Packit |
67cb25 |
* and then by f2c-clean,v 1.9 2000/01/13 13:46:53
|
|
Packit |
67cb25 |
* and further clean-edited manually by Martin Maechler.
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* Further added interface functions to be called via .C() from R or S-plus
|
|
Packit |
67cb25 |
* Note that Peter Rousseeuw has explicitely given permission to
|
|
Packit |
67cb25 |
* use his code under the GPL for the R project.
|
|
Packit |
67cb25 |
*/
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* Original comments by the authors of the Fortran original code,
|
|
Packit |
67cb25 |
* (merged for Qn & Sn in one file by M.M.):
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
This file contains fortran functions for two new robust estimators
|
|
Packit |
67cb25 |
of scale denoted as Qn and Sn, decribed in Rousseeuw and Croux (1993).
|
|
Packit |
67cb25 |
These estimators have a high breakdown point and a bounded influence
|
|
Packit |
67cb25 |
function. The implementation given here is very fast (running in
|
|
Packit |
67cb25 |
O(n logn) time) and needs little storage space.
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
Rousseeuw, P.J. and Croux, C. (1993)
|
|
Packit |
67cb25 |
Alternatives to the Median Absolute Deviation",
|
|
Packit |
67cb25 |
Journal of the American Statistical Association, Vol. 88, 1273-1283.
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
For both estimators, implementations in the pascal language can be
|
|
Packit |
67cb25 |
obtained from the original authors.
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
This software may be used and copied freely for scientific
|
|
Packit |
67cb25 |
and/or non-commercial purposes, provided reference is made
|
|
Packit |
67cb25 |
to the abovementioned paper.
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
Note by MM: We have explicit permission from P.Rousseeuw to
|
|
Packit |
67cb25 |
licence it under the GNU Public Licence.
|
|
Packit |
67cb25 |
*/
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
#ifndef int64_t
|
|
Packit |
67cb25 |
#define int64_t long int
|
|
Packit |
67cb25 |
#endif
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
static BASE FUNCTION(Qn,whimed)(BASE * a, int * w, int n, BASE * a_cand, BASE * a_srt, int * w_cand);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/*
|
|
Packit |
67cb25 |
gsl_stats_Qn0_from_sorted_data()
|
|
Packit |
67cb25 |
Efficient algorithm for the scale estimator:
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
Q_n0 = { |x_i - x_j|; i
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
i.e. the k-th order statistic of the |x_i - x_j|, where:
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
k = (floor(n/2) + 1 choose 2)
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
Inputs: sorted_data - sorted array containing the observations
|
|
Packit |
67cb25 |
stride - stride
|
|
Packit |
67cb25 |
n - length of 'sorted_data'
|
|
Packit |
67cb25 |
work - workspace of length 3n of type BASE
|
|
Packit |
67cb25 |
work_int - workspace of length 5n of type int
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
Return: Q_n statistic (without scale/correction factor); same type as input data
|
|
Packit |
67cb25 |
*/
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
BASE
|
|
Packit |
67cb25 |
FUNCTION(gsl_stats,Qn0_from_sorted_data) (const BASE sorted_data[],
|
|
Packit |
67cb25 |
const size_t stride,
|
|
Packit |
67cb25 |
const size_t n,
|
|
Packit |
67cb25 |
BASE work[],
|
|
Packit |
67cb25 |
int work_int[])
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
const int ni = (int) n;
|
|
Packit |
67cb25 |
BASE * a_srt = &work[n];
|
|
Packit |
67cb25 |
BASE * a_cand = &work[2*n];
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
int *left = &work_int[0];
|
|
Packit |
67cb25 |
int *right = &work_int[n];
|
|
Packit |
67cb25 |
int *p = &work_int[2*n];
|
|
Packit |
67cb25 |
int *q = &work_int[3*n];
|
|
Packit |
67cb25 |
int *weight = &work_int[4*n];
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
BASE trial = (BASE) 0.0;
|
|
Packit |
67cb25 |
int found = 0;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
int h, i, j, jh;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* following should be `long long int' : they can be of order n^2 */
|
|
Packit |
67cb25 |
int64_t k, knew, nl,nr, sump, sumq;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* check for quick return */
|
|
Packit |
67cb25 |
if (n < 2)
|
|
Packit |
67cb25 |
return ((BASE) 0.0);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
h = n / 2 + 1;
|
|
Packit |
67cb25 |
k = (int64_t)h * (h - 1) / 2;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
for (i = 0; i < ni; ++i)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
left[i] = ni - i + 1;
|
|
Packit |
67cb25 |
right[i] = (i <= h) ? ni : ni - (i - h);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* the n - (i-h) is from the paper; original code had `n' */
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
nl = (int64_t)n * (n + 1) / 2;
|
|
Packit |
67cb25 |
nr = (int64_t)n * n;
|
|
Packit |
67cb25 |
knew = k + nl;/* = k + (n+1 \over 2) */
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* L200: */
|
|
Packit |
67cb25 |
while (!found && nr - nl > ni)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
j = 0;
|
|
Packit |
67cb25 |
/* Truncation to float : try to make sure that the same values are got later (guard bits !) */
|
|
Packit |
67cb25 |
for (i = 1; i < ni; ++i)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
if (left[i] <= right[i])
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
weight[j] = right[i] - left[i] + 1;
|
|
Packit |
67cb25 |
jh = left[i] + weight[j] / 2;
|
|
Packit |
67cb25 |
work[j] = sorted_data[i * stride] - sorted_data[(ni - jh) * stride];
|
|
Packit |
67cb25 |
++j;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
trial = FUNCTION(Qn,whimed)(work, weight, j, a_cand, a_srt, /*iw_cand*/ p);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
j = 0;
|
|
Packit |
67cb25 |
for (i = ni - 1; i >= 0; --i)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
while (j < ni && ((double)(sorted_data[i * stride] - sorted_data[(ni - j - 1) * stride])) < trial)
|
|
Packit |
67cb25 |
++j;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
p[i] = j;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
j = ni + 1;
|
|
Packit |
67cb25 |
for (i = 0; i < ni; ++i)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
while ((double)(sorted_data[i * stride] - sorted_data[(ni - j + 1) * stride]) > trial)
|
|
Packit |
67cb25 |
--j;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
q[i] = j;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
sump = 0;
|
|
Packit |
67cb25 |
sumq = 0;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
for (i = 0; i < ni; ++i)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
sump += p[i];
|
|
Packit |
67cb25 |
sumq += q[i] - 1;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
if (knew <= sump)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
for (i = 0; i < ni; ++i)
|
|
Packit |
67cb25 |
right[i] = p[i];
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
nr = sump;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else if (knew > sumq)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
for (i = 0; i < ni; ++i)
|
|
Packit |
67cb25 |
left[i] = q[i];
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
nl = sumq;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else /* sump < knew <= sumq */
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
found = 1;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
} /* while */
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
if (found)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
return trial;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
j = 0;
|
|
Packit |
67cb25 |
for (i = 1; i < ni; ++i)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
int jj;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
for (jj = left[i]; jj <= right[i]; ++jj)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
work[j] = sorted_data[i * stride] - sorted_data[(ni - jj) * stride];
|
|
Packit |
67cb25 |
j++;
|
|
Packit |
67cb25 |
}/* j will be = sum_{i=2}^n (right[i] - left[i] + 1)_{+} */
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* return pull(work, j - 1, knew - nl) : */
|
|
Packit |
67cb25 |
knew -= (nl + 1); /* -1: 0-indexing */
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* sort work array */
|
|
Packit |
67cb25 |
TYPE (gsl_sort) (work, 1, j);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return (work[knew]);
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/*
|
|
Packit |
67cb25 |
gsl_stats_Qn_from_sorted_data()
|
|
Packit |
67cb25 |
Efficient algorithm for the scale estimator:
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
Q_n = 2.219 * d_n * { |x_i - x_j|; i
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
with:
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
k = (floor(n/2) + 1 choose 2)
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
and d_n is a correction factor for finite sample bias
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
Inputs: sorted_data - sorted array containing the observations
|
|
Packit |
67cb25 |
stride - stride
|
|
Packit |
67cb25 |
n - length of 'sorted_data'
|
|
Packit |
67cb25 |
work - workspace of length 3n of type BASE
|
|
Packit |
67cb25 |
work_int - workspace of length 5n of type int
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
Return: Q_n statistic
|
|
Packit |
67cb25 |
*/
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
double
|
|
Packit |
67cb25 |
FUNCTION(gsl_stats,Qn_from_sorted_data) (const BASE sorted_data[],
|
|
Packit |
67cb25 |
const size_t stride,
|
|
Packit |
67cb25 |
const size_t n,
|
|
Packit |
67cb25 |
BASE work[],
|
|
Packit |
67cb25 |
int work_int[])
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
const double scale = 2.21914; /* asymptotic consistency for sigma^2 */
|
|
Packit |
67cb25 |
double Qn0 = (double) FUNCTION(gsl_stats,Qn0_from_sorted_data)(sorted_data, stride, n, work, work_int);
|
|
Packit |
67cb25 |
double dn = 1.0;
|
|
Packit |
67cb25 |
double Qn;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* this correction factor deviates from the original paper Croux and Rousseeuw, 1992, and
|
|
Packit |
67cb25 |
* comes from the 'robustbase' R package */
|
|
Packit |
67cb25 |
if (n <= 12)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
if (n == 2) dn = .399356;
|
|
Packit |
67cb25 |
else if (n == 3) dn = .99365;
|
|
Packit |
67cb25 |
else if (n == 4) dn = .51321;
|
|
Packit |
67cb25 |
else if (n == 5) dn = .84401;
|
|
Packit |
67cb25 |
else if (n == 6) dn = .61220;
|
|
Packit |
67cb25 |
else if (n == 7) dn = .85877;
|
|
Packit |
67cb25 |
else if (n == 8) dn = .66993;
|
|
Packit |
67cb25 |
else if (n == 9) dn = .87344;
|
|
Packit |
67cb25 |
else if (n == 10) dn = .72014;
|
|
Packit |
67cb25 |
else if (n == 11) dn = .88906;
|
|
Packit |
67cb25 |
else if (n == 12) dn = .75743;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
if (n % 2 == 1) /* n odd */
|
|
Packit |
67cb25 |
dn = 1.60188 + (-2.1284 - 5.172 / n) / n;
|
|
Packit |
67cb25 |
else /* n even */
|
|
Packit |
67cb25 |
dn = 3.67561 + (1.9654 + (6.987 - 77.0 / n) / n) / n;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
dn = 1.0 / (dn / (double)n + 1.0);
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
Qn = scale * dn * Qn0;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return Qn;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/*
|
|
Packit |
67cb25 |
Algorithm to compute the weighted high median in O(n) time.
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
The whimed is defined as the smallest a[j] such that the sum
|
|
Packit |
67cb25 |
of the weights of all a[i] <= a[j] is strictly greater than
|
|
Packit |
67cb25 |
half of the total weight.
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
Arguments:
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
a: double array containing the observations
|
|
Packit |
67cb25 |
n: number of observations
|
|
Packit |
67cb25 |
w: array of (int/double) weights of the observations.
|
|
Packit |
67cb25 |
*/
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
static BASE
|
|
Packit |
67cb25 |
FUNCTION(Qn,whimed)(BASE * a, int * w, int n, BASE * a_cand, BASE * a_srt, int * w_cand)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
int n2, i, kcand;
|
|
Packit |
67cb25 |
/* sum of weights: `int' do overflow when n ~>= 1e5 */
|
|
Packit |
67cb25 |
int64_t wleft, wmid, wright, w_tot, wrest;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
BASE trial;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
w_tot = 0;
|
|
Packit |
67cb25 |
for (i = 0; i < n; ++i)
|
|
Packit |
67cb25 |
w_tot += w[i];
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
wrest = 0;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* REPEAT : */
|
|
Packit |
67cb25 |
do
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
for (i = 0; i < n; ++i)
|
|
Packit |
67cb25 |
a_srt[i] = a[i];
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
n2 = n/2; /* =^= n/2 +1 with 0-indexing */
|
|
Packit |
67cb25 |
#if 0
|
|
Packit |
67cb25 |
rPsort(a_srt, n, n2);
|
|
Packit |
67cb25 |
#else
|
|
Packit |
67cb25 |
TYPE (gsl_sort) (a_srt, 1, n);
|
|
Packit |
67cb25 |
#endif
|
|
Packit |
67cb25 |
trial = a_srt[n2];
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
wleft = 0;
|
|
Packit |
67cb25 |
wmid = 0;
|
|
Packit |
67cb25 |
wright = 0;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
for (i = 0; i < n; ++i)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
if (a[i] < trial)
|
|
Packit |
67cb25 |
wleft += w[i];
|
|
Packit |
67cb25 |
else if (a[i] > trial)
|
|
Packit |
67cb25 |
wright += w[i];
|
|
Packit |
67cb25 |
else
|
|
Packit |
67cb25 |
wmid += w[i];
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/* wleft = sum_{i; a[i] < trial} w[i]
|
|
Packit |
67cb25 |
* wmid = sum_{i; a[i] == trial} w[i] at least one 'i' since trial is one a[]!
|
|
Packit |
67cb25 |
* wright= sum_{i; a[i] > trial} w[i]
|
|
Packit |
67cb25 |
*/
|
|
Packit |
67cb25 |
kcand = 0;
|
|
Packit |
67cb25 |
if (2 * (wrest + wleft) > w_tot)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
for (i = 0; i < n; ++i)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
if (a[i] < trial)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
a_cand[kcand] = a[i];
|
|
Packit |
67cb25 |
w_cand[kcand] = w[i];
|
|
Packit |
67cb25 |
++kcand;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else if (2 * (wrest + wleft + wmid) <= w_tot)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
for (i = 0; i < n; ++i)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
if (a[i] > trial)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
a_cand[kcand] = a[i];
|
|
Packit |
67cb25 |
w_cand[kcand] = w[i];
|
|
Packit |
67cb25 |
++kcand;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
wrest += wleft + wmid;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
return trial;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
n = kcand;
|
|
Packit |
67cb25 |
for (i = 0; i < n; ++i)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
a[i] = a_cand[i];
|
|
Packit |
67cb25 |
w[i] = w_cand[i];
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
} while(1);
|
|
Packit |
67cb25 |
}
|