Blame statistics/Sn_source.c

Packit 67cb25
/* statistics/Sn_source.c
Packit 67cb25
 * 
Packit 67cb25
 * Copyright (C) 2018 Patrick Alken
Packit 67cb25
 * Copyright (C) 2005, 2006, 2007 Martin Maechler, ETH Zurich
Packit 67cb25
 * 
Packit 67cb25
 * This program is free software; you can redistribute it and/or modify
Packit 67cb25
 * it under the terms of the GNU General Public License as published by
Packit 67cb25
 * the Free Software Foundation; either version 3 of the License, or (at
Packit 67cb25
 * your option) any later version.
Packit 67cb25
 * 
Packit 67cb25
 * This program is distributed in the hope that it will be useful, but
Packit 67cb25
 * WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 67cb25
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 67cb25
 * General Public License for more details.
Packit 67cb25
 * 
Packit 67cb25
 * You should have received a copy of the GNU General Public License
Packit 67cb25
 * along with this program; if not, write to the Free Software
Packit 67cb25
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
Packit 67cb25
 */
Packit 67cb25
Packit 67cb25
/* This is a merge of the C version of original files  qn.f and sn.f,
Packit 67cb25
 * translated by f2c (version 20010821).	       ====	====
Packit 67cb25
 * and then by f2c-clean,v 1.9 2000/01/13 13:46:53
Packit 67cb25
 * and further clean-edited manually by Martin Maechler.
Packit 67cb25
 *
Packit 67cb25
 * Further added interface functions to be called via .C() from R or S-plus
Packit 67cb25
 * Note that Peter Rousseeuw has explicitely given permission to
Packit 67cb25
 * use his code under the GPL for the R project.
Packit 67cb25
 */
Packit 67cb25
Packit 67cb25
/* Original comments by the authors of the Fortran original code,
Packit 67cb25
 * (merged for Qn & Sn in one file by M.M.):
Packit 67cb25
Packit 67cb25
   This file contains fortran functions for two new robust estimators
Packit 67cb25
   of scale denoted as Qn and Sn, decribed in Rousseeuw and Croux (1993).
Packit 67cb25
   These estimators have a high breakdown point and a bounded influence
Packit 67cb25
   function. The implementation given here is very fast (running in
Packit 67cb25
   O(n logn) time) and needs little storage space.
Packit 67cb25
Packit 67cb25
	Rousseeuw, P.J. and Croux, C. (1993)
Packit 67cb25
	Alternatives to the Median Absolute Deviation",
Packit 67cb25
	Journal of the American Statistical Association, Vol. 88, 1273-1283.
Packit 67cb25
Packit 67cb25
   For both estimators, implementations in the pascal language can be
Packit 67cb25
   obtained from the original authors.
Packit 67cb25
Packit 67cb25
   This software may be used and copied freely for scientific
Packit 67cb25
   and/or non-commercial purposes, provided reference is made
Packit 67cb25
   to the abovementioned paper.
Packit 67cb25
Packit 67cb25
Note by MM: We have explicit permission from P.Rousseeuw to
Packit 67cb25
licence it under the GNU Public Licence.
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
gsl_stats_Sn0_from_sorted_data()
Packit 67cb25
  Efficient algorithm for the scale estimator:
Packit 67cb25
Packit 67cb25
    S_n0 = LOMED_{i} HIMED_{i} |x_i - x_j|
Packit 67cb25
Packit 67cb25
which can equivalently be written as
Packit 67cb25
Packit 67cb25
    S_n0 = LOMED_{i} LOMED_{j != i} |x_i - x_j|
Packit 67cb25
Packit 67cb25
Inputs: sorted_data - sorted array containing the observations
Packit 67cb25
        stride      - stride
Packit 67cb25
        n           - length of 'sorted_data'
Packit 67cb25
        work        - workspace of length n
Packit 67cb25
                      work[i] := LOMED_{j != i} | x_i - x_j |
Packit 67cb25
Packit 67cb25
Return: S_n statistic (without scale/correction factor)
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
BASE
Packit 67cb25
FUNCTION(gsl_stats,Sn0_from_sorted_data) (const BASE sorted_data[],
Packit 67cb25
                                          const size_t stride,
Packit 67cb25
                                          const size_t n,
Packit 67cb25
                                          BASE work[])
Packit 67cb25
{
Packit 67cb25
  /* Local variables */
Packit 67cb25
  double medA, medB;
Packit 67cb25
  int i, diff, half, Amin, Amax, even, length;
Packit 67cb25
  int leftA, leftB, nA, nB, tryA, tryB, rightA, rightB;
Packit 67cb25
  int np1_2 = (n + 1) / 2;
Packit 67cb25
Packit 67cb25
  work[0] = sorted_data[n / 2 * stride] - sorted_data[0];
Packit 67cb25
Packit 67cb25
  /* first half for() loop : */
Packit 67cb25
  for (i = 2; i <= np1_2; ++i)
Packit 67cb25
    {
Packit 67cb25
      nA = i - 1;
Packit 67cb25
      nB = n - i;
Packit 67cb25
      diff = nB - nA;
Packit 67cb25
      leftA = leftB	= 1;
Packit 67cb25
      rightA = rightB = nB;
Packit 67cb25
      Amin = diff / 2 + 1;
Packit 67cb25
      Amax = diff / 2 + nA;
Packit 67cb25
Packit 67cb25
      while (leftA < rightA)
Packit 67cb25
        {
Packit 67cb25
          length = rightA - leftA + 1;
Packit 67cb25
          even = 1 - length % 2;
Packit 67cb25
          half = (length - 1) / 2;
Packit 67cb25
          tryA = leftA + half;
Packit 67cb25
          tryB = leftB + half;
Packit 67cb25
          if (tryA < Amin)
Packit 67cb25
            {
Packit 67cb25
              rightB = tryB;
Packit 67cb25
              leftA = tryA + even;
Packit 67cb25
            }
Packit 67cb25
          else
Packit 67cb25
            {
Packit 67cb25
              if (tryA > Amax)
Packit 67cb25
                {
Packit 67cb25
                  rightA = tryA;
Packit 67cb25
                  leftB = tryB + even;
Packit 67cb25
                }
Packit 67cb25
              else
Packit 67cb25
                {
Packit 67cb25
                  medA = sorted_data[(i - 1) * stride] - sorted_data[(i - tryA + Amin - 2) * stride];
Packit 67cb25
                  medB = sorted_data[(tryB + i - 1) * stride] - sorted_data[(i - 1) * stride];
Packit 67cb25
                  if (medA >= medB)
Packit 67cb25
                    {
Packit 67cb25
                      rightA = tryA;
Packit 67cb25
                      leftB = tryB + even;
Packit 67cb25
                    }
Packit 67cb25
                  else
Packit 67cb25
                    {
Packit 67cb25
                      rightB = tryB;
Packit 67cb25
                      leftA = tryA + even;
Packit 67cb25
                    }
Packit 67cb25
                }
Packit 67cb25
            }
Packit 67cb25
        } /* while */
Packit 67cb25
Packit 67cb25
      if (leftA > Amax)
Packit 67cb25
        {
Packit 67cb25
          work[i - 1] = sorted_data[(leftB + i - 1) * stride] - sorted_data[(i - 1) * stride];
Packit 67cb25
        }
Packit 67cb25
      else
Packit 67cb25
        {
Packit 67cb25
          medA = sorted_data[(i - 1) * stride] - sorted_data[(i - leftA + Amin - 2) * stride];
Packit 67cb25
          medB = sorted_data[(leftB + i - 1) * stride] - sorted_data[(i - 1) * stride];
Packit 67cb25
          work[i - 1] = GSL_MIN(medA, medB);
Packit 67cb25
        }
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  /* second half for() loop : */
Packit 67cb25
  for (i = np1_2 + 1; i <= (int) n - 1; ++i)
Packit 67cb25
    {
Packit 67cb25
      nA = n - i;
Packit 67cb25
      nB = i - 1;
Packit 67cb25
      diff = nB - nA;
Packit 67cb25
      leftA  = leftB	= 1;
Packit 67cb25
      rightA = rightB = nB;
Packit 67cb25
      Amin = diff / 2 + 1;
Packit 67cb25
      Amax = diff / 2 + nA;
Packit 67cb25
Packit 67cb25
      while (leftA < rightA)
Packit 67cb25
        {
Packit 67cb25
          length = rightA - leftA + 1;
Packit 67cb25
          even = 1 - length % 2;
Packit 67cb25
          half = (length - 1) / 2;
Packit 67cb25
          tryA = leftA + half;
Packit 67cb25
          tryB = leftB + half;
Packit 67cb25
Packit 67cb25
          if (tryA < Amin)
Packit 67cb25
            {
Packit 67cb25
              rightB = tryB;
Packit 67cb25
              leftA = tryA + even;
Packit 67cb25
            }
Packit 67cb25
          else
Packit 67cb25
            {
Packit 67cb25
              if (tryA > Amax)
Packit 67cb25
                {
Packit 67cb25
                  rightA = tryA;
Packit 67cb25
                  leftB = tryB + even;
Packit 67cb25
                }
Packit 67cb25
              else
Packit 67cb25
                {
Packit 67cb25
                  medA = sorted_data[(i + tryA - Amin) * stride] - sorted_data[(i - 1) * stride];
Packit 67cb25
                  medB = sorted_data[(i - 1) * stride] - sorted_data[(i - tryB - 1) * stride];
Packit 67cb25
                  if (medA >= medB)
Packit 67cb25
                    {
Packit 67cb25
                      rightA = tryA;
Packit 67cb25
                      leftB = tryB + even;
Packit 67cb25
                    }
Packit 67cb25
                  else
Packit 67cb25
                    {
Packit 67cb25
                      rightB = tryB;
Packit 67cb25
                      leftA = tryA + even;
Packit 67cb25
                    }
Packit 67cb25
                }
Packit 67cb25
            }
Packit 67cb25
        } /* while */
Packit 67cb25
Packit 67cb25
      if (leftA > Amax)
Packit 67cb25
        {
Packit 67cb25
          work[i - 1] = sorted_data[(i - 1) * stride] - sorted_data[(i - leftB - 1) * stride];
Packit 67cb25
        }
Packit 67cb25
      else
Packit 67cb25
        {
Packit 67cb25
          medA = sorted_data[(i + leftA - Amin) * stride] - sorted_data[(i - 1) * stride];
Packit 67cb25
          medB = sorted_data[(i - 1) * stride] - sorted_data[(i - leftB - 1) * stride];
Packit 67cb25
          work[i - 1] = GSL_MIN(medA, medB);
Packit 67cb25
        }
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  work[n - 1] = sorted_data[(n - 1) * stride] - sorted_data[(np1_2 - 1) * stride];
Packit 67cb25
Packit 67cb25
  /* sort work array */
Packit 67cb25
  TYPE (gsl_sort) (work, 1, n);
Packit 67cb25
Packit 67cb25
  return work[np1_2 - 1];
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
gsl_stats_Sn_from_sorted_data()
Packit 67cb25
  Efficient algorithm for the scale estimator:
Packit 67cb25
Packit 67cb25
    S_n = 1.1926 * c_n LOMED_{i} HIMED_{i} |x_i - x_j|
Packit 67cb25
Packit 67cb25
which can equivalently be written as
Packit 67cb25
Packit 67cb25
    S_n = 1.1926 * c_n * LOMED_{i} LOMED_{j != i} |x_i - x_j|
Packit 67cb25
Packit 67cb25
and c_n is a correction factor for small sample bias
Packit 67cb25
Packit 67cb25
Inputs: sorted_data - sorted array containing the observations
Packit 67cb25
        stride      - stride
Packit 67cb25
        n           - length of 'sorted_data'
Packit 67cb25
        work        - workspace of length n
Packit 67cb25
                      work[i] := LOMED_{j != i} | x_i - x_j |
Packit 67cb25
Packit 67cb25
Return: S_n statistic
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
double
Packit 67cb25
FUNCTION(gsl_stats,Sn_from_sorted_data) (const BASE sorted_data[],
Packit 67cb25
                                         const size_t stride,
Packit 67cb25
                                         const size_t n,
Packit 67cb25
                                         BASE work[])
Packit 67cb25
{
Packit 67cb25
  const double scale = 1.1926; /* asymptotic consistency for sigma^2 */
Packit 67cb25
  double Sn0 = (double) FUNCTION(gsl_stats,Sn0_from_sorted_data)(sorted_data, stride, n, work);
Packit 67cb25
  double cn = 1.0;
Packit 67cb25
  double Sn;
Packit 67cb25
Packit 67cb25
  /* determine correction factor for finite sample bias */
Packit 67cb25
  if (n <= 9)
Packit 67cb25
    {
Packit 67cb25
      if (n == 2) cn = 0.743;
Packit 67cb25
      else if (n == 3) cn = 1.851;
Packit 67cb25
      else if (n == 4) cn = 0.954;
Packit 67cb25
      else if (n == 5) cn = 1.351;
Packit 67cb25
      else if (n == 6) cn = 0.993;
Packit 67cb25
      else if (n == 7) cn = 1.198;
Packit 67cb25
      else if (n == 8) cn = 1.005;
Packit 67cb25
      else if (n == 9) cn = 1.131;
Packit 67cb25
    }
Packit 67cb25
  else if (n % 2 == 1) /* n odd, >= 11 */
Packit 67cb25
    {
Packit 67cb25
      cn = (double) n / (n - 0.9);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  Sn = scale * cn * Sn0;
Packit 67cb25
Packit 67cb25
  return Sn;
Packit 67cb25
}