/* rstat/rquantile.c
*
* Copyright (C) 2015 Patrick Alken
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <config.h>
#include <stdlib.h>
#include <math.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_sort.h>
#include <gsl/gsl_statistics.h>
#include <gsl/gsl_rstat.h>
/*
* Running quantile calculation based on the paper
*
* [1] R. Jain and I. Chlamtac, "The P^2 algorithm for dynamic
* calculation of quantiles and histograms without storing
* observations", Communications of the ACM, October 1985
*/
static double calc_psq(const double qp1, const double q, const double qm1,
const double d, const double np1, const double n, const double nm1);
gsl_rstat_quantile_workspace *
gsl_rstat_quantile_alloc(const double p)
{
gsl_rstat_quantile_workspace *w;
w = calloc(1, sizeof(gsl_rstat_quantile_workspace));
if (w == 0)
{
GSL_ERROR_NULL ("failed to allocate space for workspace", GSL_ENOMEM);
}
w->p = p;
gsl_rstat_quantile_reset(w);
return w;
} /* gsl_rstat_quantile_alloc() */
void
gsl_rstat_quantile_free(gsl_rstat_quantile_workspace *w)
{
free(w);
} /* gsl_rstat_quantile_free() */
int
gsl_rstat_quantile_reset(gsl_rstat_quantile_workspace *w)
{
const double p = w->p;
size_t i;
/* initialize positions n */
for (i = 0; i < 5; ++i)
w->npos[i] = i + 1;
/* initialize n' */
w->np[0] = 1.0;
w->np[1] = 1.0 + 2.0 * p;
w->np[2] = 1.0 + 4.0 * p;
w->np[3] = 3.0 + 2.0 * p;
w->np[4] = 5.0;
/* initialize dn' */
w->dnp[0] = 0.0;
w->dnp[1] = 0.5 * p;
w->dnp[2] = p;
w->dnp[3] = 0.5 * (1.0 + p);
w->dnp[4] = 1.0;
w->n = 0;
return GSL_SUCCESS;
}
int
gsl_rstat_quantile_add(const double x, gsl_rstat_quantile_workspace *w)
{
if (w->n < 5)
{
w->q[w->n] = x;
}
else
{
int i;
int k = -1;
if (w->n == 5)
{
/* initialization: sort the first five heights */
gsl_sort(w->q, 1, w->n);
}
/* step B1: find k such that q_k <= x < q_{k+1} */
if (x < w->q[0])
{
w->q[0] = x;
k = 0;
}
else if (x >= w->q[4])
{
w->q[4] = x;
k = 3;
}
else
{
for (i = 0; i <= 3; ++i)
{
if (w->q[i] <= x && x < w->q[i + 1])
{
k = i;
break;
}
}
}
if (k < 0)
{
/* we could get here if x is nan */
GSL_ERROR ("invalid input argument x", GSL_EINVAL);
}
/* step B2(a): update n_i */
for (i = k + 1; i <= 4; ++i)
++(w->npos[i]);
/* step B2(b): update n_i' */
for (i = 0; i < 5; ++i)
w->np[i] += w->dnp[i];
/* step B3: update heights */
for (i = 1; i <= 3; ++i)
{
double ni = (double) w->npos[i];
double d = w->np[i] - ni;
if ((d >= 1.0 && (w->npos[i + 1] - w->npos[i] > 1)) ||
(d <= -1.0 && (w->npos[i - 1] - w->npos[i] < -1)))
{
int dsign = (d > 0.0) ? 1 : -1;
double qp1 = w->q[i + 1];
double qi = w->q[i];
double qm1 = w->q[i - 1];
double np1 = (double) w->npos[i + 1];
double nm1 = (double) w->npos[i - 1];
double qp = calc_psq(qp1, qi, qm1, (double) dsign,
np1, ni, nm1);
if (qm1 < qp && qp < qp1)
w->q[i] = qp;
else
{
/* use linear formula */
w->q[i] += dsign * (w->q[i + dsign] - qi) / ((double) w->npos[i + dsign] - ni);
}
w->npos[i] += dsign;
}
}
}
++(w->n);
return GSL_SUCCESS;
} /* gsl_rstat_quantile_add() */
double
gsl_rstat_quantile_get(gsl_rstat_quantile_workspace *w)
{
if (w->n >= 5)
{
return w->q[2];
}
else
{
/* not yet initialized */
gsl_sort(w->q, 1, w->n);
return gsl_stats_quantile_from_sorted_data(w->q, 1, w->n, w->p);
}
} /* gsl_rstat_quantile_get() */
static double
calc_psq(const double qp1, const double q, const double qm1,
const double d, const double np1, const double n, const double nm1)
{
double outer = d / (np1 - nm1);
double inner_left = (n - nm1 + d) * (qp1 - q) / (np1 - n);
double inner_right = (np1 - n - d) * (q - qm1) / (n - nm1);
return q + outer * (inner_left + inner_right);
} /* calc_psq() */