Blame statistics/Qn_source.c

Packit 67cb25
/* statistics/Qn_source.c
Packit 67cb25
 * 
Packit 67cb25
 * Copyright (C) 2018 Patrick Alken
Packit 67cb25
 * Copyright (C) 2005, 2006, 2007 Martin Maechler, ETH Zurich
Packit 67cb25
 * 
Packit 67cb25
 * This program is free software; you can redistribute it and/or modify
Packit 67cb25
 * it under the terms of the GNU General Public License as published by
Packit 67cb25
 * the Free Software Foundation; either version 3 of the License, or (at
Packit 67cb25
 * your option) any later version.
Packit 67cb25
 * 
Packit 67cb25
 * This program is distributed in the hope that it will be useful, but
Packit 67cb25
 * WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 67cb25
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 67cb25
 * General Public License for more details.
Packit 67cb25
 * 
Packit 67cb25
 * You should have received a copy of the GNU General Public License
Packit 67cb25
 * along with this program; if not, write to the Free Software
Packit 67cb25
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
Packit 67cb25
 */
Packit 67cb25
Packit 67cb25
/* This is a merge of the C version of original files  qn.f and sn.f,
Packit 67cb25
 * translated by f2c (version 20010821).	       ====	====
Packit 67cb25
 * and then by f2c-clean,v 1.9 2000/01/13 13:46:53
Packit 67cb25
 * and further clean-edited manually by Martin Maechler.
Packit 67cb25
 *
Packit 67cb25
 * Further added interface functions to be called via .C() from R or S-plus
Packit 67cb25
 * Note that Peter Rousseeuw has explicitely given permission to
Packit 67cb25
 * use his code under the GPL for the R project.
Packit 67cb25
 */
Packit 67cb25
Packit 67cb25
/* Original comments by the authors of the Fortran original code,
Packit 67cb25
 * (merged for Qn & Sn in one file by M.M.):
Packit 67cb25
Packit 67cb25
   This file contains fortran functions for two new robust estimators
Packit 67cb25
   of scale denoted as Qn and Sn, decribed in Rousseeuw and Croux (1993).
Packit 67cb25
   These estimators have a high breakdown point and a bounded influence
Packit 67cb25
   function. The implementation given here is very fast (running in
Packit 67cb25
   O(n logn) time) and needs little storage space.
Packit 67cb25
Packit 67cb25
	Rousseeuw, P.J. and Croux, C. (1993)
Packit 67cb25
	Alternatives to the Median Absolute Deviation",
Packit 67cb25
	Journal of the American Statistical Association, Vol. 88, 1273-1283.
Packit 67cb25
Packit 67cb25
   For both estimators, implementations in the pascal language can be
Packit 67cb25
   obtained from the original authors.
Packit 67cb25
Packit 67cb25
   This software may be used and copied freely for scientific
Packit 67cb25
   and/or non-commercial purposes, provided reference is made
Packit 67cb25
   to the abovementioned paper.
Packit 67cb25
Packit 67cb25
Note by MM: We have explicit permission from P.Rousseeuw to
Packit 67cb25
licence it under the GNU Public Licence.
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
#ifndef int64_t
Packit 67cb25
#define int64_t long int
Packit 67cb25
#endif
Packit 67cb25
Packit 67cb25
static BASE FUNCTION(Qn,whimed)(BASE * a, int * w, int n, BASE * a_cand, BASE * a_srt, int * w_cand);
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
gsl_stats_Qn0_from_sorted_data()
Packit 67cb25
  Efficient algorithm for the scale estimator:
Packit 67cb25
Packit 67cb25
    Q_n0 = { |x_i - x_j|; i
Packit 67cb25
Packit 67cb25
i.e. the k-th order statistic of the |x_i - x_j|, where:
Packit 67cb25
Packit 67cb25
k = (floor(n/2) + 1 choose 2)
Packit 67cb25
Packit 67cb25
Inputs: sorted_data - sorted array containing the observations
Packit 67cb25
        stride      - stride
Packit 67cb25
        n           - length of 'sorted_data'
Packit 67cb25
        work        - workspace of length 3n of type BASE
Packit 67cb25
        work_int    - workspace of length 5n of type int
Packit 67cb25
Packit 67cb25
Return: Q_n statistic (without scale/correction factor); same type as input data
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
BASE
Packit 67cb25
FUNCTION(gsl_stats,Qn0_from_sorted_data) (const BASE sorted_data[],
Packit 67cb25
                                          const size_t stride,
Packit 67cb25
                                          const size_t n,
Packit 67cb25
                                          BASE work[],
Packit 67cb25
                                          int work_int[])
Packit 67cb25
{
Packit 67cb25
  const int ni = (int) n;
Packit 67cb25
  BASE * a_srt = &work[n];
Packit 67cb25
  BASE * a_cand = &work[2*n];
Packit 67cb25
Packit 67cb25
  int *left = &work_int[0];
Packit 67cb25
  int *right = &work_int[n];
Packit 67cb25
  int *p = &work_int[2*n];
Packit 67cb25
  int *q = &work_int[3*n];
Packit 67cb25
  int *weight = &work_int[4*n];
Packit 67cb25
Packit 67cb25
  BASE trial = (BASE) 0.0;
Packit 67cb25
  int found = 0;
Packit 67cb25
Packit 67cb25
  int h, i, j, jh;
Packit 67cb25
Packit 67cb25
  /* following should be `long long int' : they can be of order n^2 */
Packit 67cb25
  int64_t k, knew, nl,nr, sump, sumq;
Packit 67cb25
Packit 67cb25
  /* check for quick return */
Packit 67cb25
  if (n < 2)
Packit 67cb25
    return ((BASE) 0.0);
Packit 67cb25
Packit 67cb25
  h = n / 2 + 1;
Packit 67cb25
  k = (int64_t)h * (h - 1) / 2;
Packit 67cb25
Packit 67cb25
  for (i = 0; i < ni; ++i)
Packit 67cb25
    {
Packit 67cb25
      left[i] = ni - i + 1;
Packit 67cb25
      right[i] = (i <= h) ? ni : ni - (i - h);
Packit 67cb25
Packit 67cb25
      /* the n - (i-h) is from the paper; original code had `n' */
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  nl = (int64_t)n * (n + 1) / 2;
Packit 67cb25
  nr = (int64_t)n * n;
Packit 67cb25
  knew = k + nl;/* = k + (n+1 \over 2) */
Packit 67cb25
Packit 67cb25
/* L200: */
Packit 67cb25
  while (!found && nr - nl > ni)
Packit 67cb25
    {
Packit 67cb25
      j = 0;
Packit 67cb25
      /* Truncation to float : try to make sure that the same values are got later (guard bits !) */
Packit 67cb25
      for (i = 1; i < ni; ++i)
Packit 67cb25
        {
Packit 67cb25
          if (left[i] <= right[i])
Packit 67cb25
            {
Packit 67cb25
              weight[j] = right[i] - left[i] + 1;
Packit 67cb25
              jh = left[i] + weight[j] / 2;
Packit 67cb25
              work[j] = sorted_data[i * stride] - sorted_data[(ni - jh) * stride];
Packit 67cb25
              ++j;
Packit 67cb25
            }
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      trial = FUNCTION(Qn,whimed)(work, weight, j, a_cand, a_srt, /*iw_cand*/ p);
Packit 67cb25
Packit 67cb25
      j = 0;
Packit 67cb25
      for (i = ni - 1; i >= 0; --i)
Packit 67cb25
        {
Packit 67cb25
          while (j < ni && ((double)(sorted_data[i * stride] - sorted_data[(ni - j - 1) * stride])) < trial)
Packit 67cb25
            ++j;
Packit 67cb25
Packit 67cb25
          p[i] = j;
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      j = ni + 1;
Packit 67cb25
      for (i = 0; i < ni; ++i)
Packit 67cb25
        {
Packit 67cb25
          while ((double)(sorted_data[i * stride] - sorted_data[(ni - j + 1) * stride]) > trial)
Packit 67cb25
            --j;
Packit 67cb25
Packit 67cb25
          q[i] = j;
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      sump = 0;
Packit 67cb25
      sumq = 0;
Packit 67cb25
Packit 67cb25
      for (i = 0; i < ni; ++i)
Packit 67cb25
        {
Packit 67cb25
          sump += p[i];
Packit 67cb25
          sumq += q[i] - 1;
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      if (knew <= sump)
Packit 67cb25
        {
Packit 67cb25
          for (i = 0; i < ni; ++i)
Packit 67cb25
            right[i] = p[i];
Packit 67cb25
Packit 67cb25
          nr = sump;
Packit 67cb25
        }
Packit 67cb25
      else if (knew > sumq)
Packit 67cb25
        {
Packit 67cb25
          for (i = 0; i < ni; ++i)
Packit 67cb25
            left[i] = q[i];
Packit 67cb25
Packit 67cb25
          nl = sumq;
Packit 67cb25
        }
Packit 67cb25
      else /* sump < knew <= sumq */
Packit 67cb25
        {
Packit 67cb25
          found = 1;
Packit 67cb25
        }
Packit 67cb25
    } /* while */
Packit 67cb25
Packit 67cb25
  if (found)
Packit 67cb25
    {
Packit 67cb25
      return trial;
Packit 67cb25
    }
Packit 67cb25
  else
Packit 67cb25
    {
Packit 67cb25
      j = 0;
Packit 67cb25
      for (i = 1; i < ni; ++i)
Packit 67cb25
        {
Packit 67cb25
          int jj;
Packit 67cb25
Packit 67cb25
          for (jj = left[i]; jj <= right[i]; ++jj)
Packit 67cb25
            {
Packit 67cb25
              work[j] = sorted_data[i * stride] - sorted_data[(ni - jj) * stride];
Packit 67cb25
              j++;
Packit 67cb25
            }/* j will be = sum_{i=2}^n (right[i] - left[i] + 1)_{+}  */
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      /* return pull(work, j - 1, knew - nl)	: */
Packit 67cb25
      knew -= (nl + 1); /* -1: 0-indexing */
Packit 67cb25
Packit 67cb25
      /* sort work array */
Packit 67cb25
      TYPE (gsl_sort) (work, 1, j);
Packit 67cb25
Packit 67cb25
      return (work[knew]);
Packit 67cb25
    }
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
gsl_stats_Qn_from_sorted_data()
Packit 67cb25
  Efficient algorithm for the scale estimator:
Packit 67cb25
Packit 67cb25
    Q_n = 2.219 * d_n * { |x_i - x_j|; i
Packit 67cb25
Packit 67cb25
with:
Packit 67cb25
Packit 67cb25
k = (floor(n/2) + 1 choose 2)
Packit 67cb25
Packit 67cb25
and d_n is a correction factor for finite sample bias
Packit 67cb25
Packit 67cb25
Inputs: sorted_data - sorted array containing the observations
Packit 67cb25
        stride      - stride
Packit 67cb25
        n           - length of 'sorted_data'
Packit 67cb25
        work        - workspace of length 3n of type BASE
Packit 67cb25
        work_int    - workspace of length 5n of type int
Packit 67cb25
Packit 67cb25
Return: Q_n statistic
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
double
Packit 67cb25
FUNCTION(gsl_stats,Qn_from_sorted_data) (const BASE sorted_data[],
Packit 67cb25
                                         const size_t stride,
Packit 67cb25
                                         const size_t n,
Packit 67cb25
                                         BASE work[],
Packit 67cb25
                                         int work_int[])
Packit 67cb25
{
Packit 67cb25
  const double scale = 2.21914; /* asymptotic consistency for sigma^2 */
Packit 67cb25
  double Qn0 = (double) FUNCTION(gsl_stats,Qn0_from_sorted_data)(sorted_data, stride, n, work, work_int);
Packit 67cb25
  double dn = 1.0;
Packit 67cb25
  double Qn;
Packit 67cb25
Packit 67cb25
  /* this correction factor deviates from the original paper Croux and Rousseeuw, 1992, and
Packit 67cb25
   * comes from the 'robustbase' R package */
Packit 67cb25
  if (n <= 12)
Packit 67cb25
    {
Packit 67cb25
      if (n == 2) dn = .399356;
Packit 67cb25
      else if (n == 3) dn = .99365;
Packit 67cb25
      else if (n == 4) dn = .51321;
Packit 67cb25
      else if (n == 5) dn = .84401;
Packit 67cb25
      else if (n == 6) dn = .61220;
Packit 67cb25
      else if (n == 7) dn = .85877;
Packit 67cb25
      else if (n == 8) dn = .66993;
Packit 67cb25
      else if (n == 9) dn = .87344;
Packit 67cb25
      else if (n == 10) dn = .72014;
Packit 67cb25
      else if (n == 11) dn = .88906;
Packit 67cb25
      else if (n == 12) dn = .75743;
Packit 67cb25
    }
Packit 67cb25
  else
Packit 67cb25
    {
Packit 67cb25
      if (n % 2 == 1) /* n odd */
Packit 67cb25
        dn = 1.60188 + (-2.1284 - 5.172 / n) / n;
Packit 67cb25
      else            /* n even */
Packit 67cb25
        dn = 3.67561 + (1.9654 + (6.987 - 77.0 / n) / n) / n;
Packit 67cb25
Packit 67cb25
      dn = 1.0 / (dn / (double)n + 1.0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  Qn = scale * dn * Qn0;
Packit 67cb25
Packit 67cb25
  return Qn;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
  Algorithm to compute the weighted high median in O(n) time.
Packit 67cb25
Packit 67cb25
  The whimed is defined as the smallest a[j] such that the sum
Packit 67cb25
  of the weights of all a[i] <= a[j] is strictly greater than
Packit 67cb25
  half of the total weight.
Packit 67cb25
Packit 67cb25
  Arguments:
Packit 67cb25
Packit 67cb25
  a: double array containing the observations
Packit 67cb25
  n: number of observations
Packit 67cb25
  w: array of (int/double) weights of the observations.
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
static BASE
Packit 67cb25
FUNCTION(Qn,whimed)(BASE * a, int * w, int n, BASE * a_cand, BASE * a_srt, int * w_cand)
Packit 67cb25
{
Packit 67cb25
  int n2, i, kcand;
Packit 67cb25
  /* sum of weights: `int' do overflow when  n ~>= 1e5 */
Packit 67cb25
  int64_t wleft, wmid, wright, w_tot, wrest;
Packit 67cb25
Packit 67cb25
  BASE trial;
Packit 67cb25
Packit 67cb25
  w_tot = 0;
Packit 67cb25
  for (i = 0; i < n; ++i)
Packit 67cb25
    w_tot += w[i];
Packit 67cb25
Packit 67cb25
  wrest = 0;
Packit 67cb25
Packit 67cb25
  /* REPEAT : */
Packit 67cb25
  do
Packit 67cb25
    {
Packit 67cb25
      for (i = 0; i < n; ++i)
Packit 67cb25
        a_srt[i] = a[i];
Packit 67cb25
Packit 67cb25
      n2 = n/2; /* =^= n/2 +1 with 0-indexing */
Packit 67cb25
#if 0
Packit 67cb25
      rPsort(a_srt, n, n2);
Packit 67cb25
#else
Packit 67cb25
      TYPE (gsl_sort) (a_srt, 1, n);
Packit 67cb25
#endif
Packit 67cb25
      trial = a_srt[n2];
Packit 67cb25
Packit 67cb25
      wleft = 0;
Packit 67cb25
      wmid = 0;
Packit 67cb25
      wright = 0;
Packit 67cb25
Packit 67cb25
      for (i = 0; i < n; ++i)
Packit 67cb25
        {
Packit 67cb25
          if (a[i] < trial)
Packit 67cb25
            wleft += w[i];
Packit 67cb25
          else if (a[i] > trial)
Packit 67cb25
            wright += w[i];
Packit 67cb25
          else
Packit 67cb25
            wmid += w[i];
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      /* wleft = sum_{i; a[i]	 < trial}  w[i]
Packit 67cb25
       * wmid	 = sum_{i; a[i] == trial}  w[i] at least one 'i' since trial is one a[]!
Packit 67cb25
       * wright= sum_{i; a[i]	 > trial}  w[i]
Packit 67cb25
       */
Packit 67cb25
      kcand = 0;
Packit 67cb25
      if (2 * (wrest + wleft) > w_tot)
Packit 67cb25
        {
Packit 67cb25
          for (i = 0; i < n; ++i)
Packit 67cb25
            {
Packit 67cb25
              if (a[i] < trial)
Packit 67cb25
                {
Packit 67cb25
                  a_cand[kcand] = a[i];
Packit 67cb25
                  w_cand[kcand] = w[i];
Packit 67cb25
                  ++kcand;
Packit 67cb25
                }
Packit 67cb25
            }
Packit 67cb25
        }
Packit 67cb25
      else if (2 * (wrest + wleft + wmid) <= w_tot)
Packit 67cb25
        {
Packit 67cb25
          for (i = 0; i < n; ++i)
Packit 67cb25
            {
Packit 67cb25
              if (a[i] > trial)
Packit 67cb25
                {
Packit 67cb25
                  a_cand[kcand] = a[i];
Packit 67cb25
                  w_cand[kcand] = w[i];
Packit 67cb25
                  ++kcand;
Packit 67cb25
                }
Packit 67cb25
            }
Packit 67cb25
Packit 67cb25
          wrest += wleft + wmid;
Packit 67cb25
        }
Packit 67cb25
      else
Packit 67cb25
        {
Packit 67cb25
          return trial;
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      n = kcand;
Packit 67cb25
      for (i = 0; i < n; ++i)
Packit 67cb25
        {
Packit 67cb25
          a[i] = a_cand[i];
Packit 67cb25
          w[i] = w_cand[i];
Packit 67cb25
        }
Packit 67cb25
    } while(1);
Packit 67cb25
}