Blob Blame History Raw
/* filter/impulse.c
 *
 * Impulse detecting filters
 * 
 * Copyright (C) 2018 Patrick Alken
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or (at
 * your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */
 
#include <config.h>
#include <stdlib.h>
#include <math.h>

#include <gsl/gsl_math.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_filter.h>

static int filter_impulse(const double scale, const double epsilon, const double t, const gsl_vector * x, const gsl_vector * xmedian,
                          gsl_vector * y, gsl_vector * xsigma, size_t * noutlier, gsl_vector_int * ioutlier);
 
/*
gsl_filter_impulse_alloc()
  Allocate a workspace for impulse detection filtering.

Inputs: K - number of samples in window; if even, it is rounded up to
            the next odd, to have a symmetric window

Return: pointer to workspace
*/

gsl_filter_impulse_workspace *
gsl_filter_impulse_alloc(const size_t K)
{
  gsl_filter_impulse_workspace *w;

  w = calloc(1, sizeof(gsl_filter_impulse_workspace));
  if (w == 0)
    {
      GSL_ERROR_NULL ("failed to allocate space for workspace", GSL_ENOMEM);
    }

  w->movstat_workspace_p = gsl_movstat_alloc(K);
  if (w->movstat_workspace_p == 0)
    {
      gsl_filter_impulse_free(w);
      return NULL;
    }

  return w;
}

void
gsl_filter_impulse_free(gsl_filter_impulse_workspace * w)
{
  if (w->movstat_workspace_p)
    gsl_movstat_free(w->movstat_workspace_p);

  free(w);
}

/*
gsl_filter_impulse()
  Apply an impulse detection filter to an input vector. The filter output is

y_i = { x_i, |x_i - m_i| <= t * S_i
      { m_i, |x_i - m_i| > t * S_i

where m_i is the median of the window W_i^H and S_i is the scale estimate (MAD, IQR, S_n, Q_n)

Inputs: endtype    - how to handle signal end points
        scale_type - which statistic to use for scale estimate (MAD, IQR, etc)
        t          - number of standard deviations required to identity outliers (>= 0)
        x          - input vector, size n
        y          - (output) filtered vector, size n
        xmedian    - (output) vector of median values of x, size n
                     xmedian_i = median of window centered on x_i
        xsigma     - (output) vector of estimated local standard deviations of x, size n
                     xsigma_i = sigma for i-th window: scale*MAD
        noutlier   - (output) number of outliers detected
        ioutlier   - (output) boolean array indicating outliers identified, size n; may be NULL
                     ioutlier_i = 1 if outlier detected, 0 if not
        w          - workspace

Notes:
*/

int
gsl_filter_impulse(const gsl_filter_end_t endtype, const gsl_filter_scale_t scale_type, const double t,
                   const gsl_vector * x, gsl_vector * y, gsl_vector * xmedian, gsl_vector * xsigma, size_t * noutlier,
                   gsl_vector_int * ioutlier, gsl_filter_impulse_workspace * w)
{
  const size_t n = x->size;

  if (n != y->size)
    {
      GSL_ERROR("input and output vectors must have same length", GSL_EBADLEN);
    }
  else if (xmedian->size != n)
    {
      GSL_ERROR("xmedian vector must match input size", GSL_EBADLEN);
    }
  else if (xsigma->size != n)
    {
      GSL_ERROR("xsigma vector must match input size", GSL_EBADLEN);
    }
  else if ((ioutlier != NULL) && (ioutlier->size != n))
    {
      GSL_ERROR("ioutlier vector must match input size", GSL_EBADLEN);
    }
  else if (t < 0.0)
    {
      GSL_ERROR("t must be non-negative", GSL_EDOM);
    }
  else
    {
      int status;
      double scale = 1.0;

      switch (scale_type)
        {
          case GSL_FILTER_SCALE_MAD:
            {
              /* compute window medians and MADs */
              gsl_movstat_mad(endtype, x, xmedian, xsigma, w->movstat_workspace_p);

              break;
            }

          case GSL_FILTER_SCALE_IQR:
            {
              /* multiplication factor for IQR to estimate stddev for Gaussian signal */
              scale = 0.741301109252801;

              /* calculate the window medians */
              gsl_movstat_median(endtype, x, xmedian, w->movstat_workspace_p);
      
              /* calculate window IQRs */
              gsl_movstat_qqr(endtype, x, 0.25, xsigma, w->movstat_workspace_p);

              break;
            }

          case GSL_FILTER_SCALE_SN:
            {
              /* calculate the window medians */
              gsl_movstat_median(endtype, x, xmedian, w->movstat_workspace_p);
      
              /* calculate window S_n values */
              gsl_movstat_Sn(endtype, x, xsigma, w->movstat_workspace_p);

              break;
            }

          case GSL_FILTER_SCALE_QN:
            {
              /* calculate the window medians */
              gsl_movstat_median(endtype, x, xmedian, w->movstat_workspace_p);
      
              /* calculate window Q_n values */
              gsl_movstat_Qn(endtype, x, xsigma, w->movstat_workspace_p);

              break;
            }

          default:
            GSL_ERROR("unknown scale type", GSL_EDOM);
            break;
        }

      /* apply impulse detecting filter using previously computed scale estimate */
      status = filter_impulse(scale, 0.0, t, x, xmedian, y, xsigma, noutlier, ioutlier);

      return status;
    }
}

/*
filter_impulse()
  Apply an impulse detection filter to an input vector. The filter output is

y_i = { x_i, |x_i - m_i| <= t * S_i OR S_i < epsilon
      { m_i, |x_i - m_i| > t * S_i

where m_i is the median of the window W_i^H and S_i is the scale estimate (MAD, IQR, etc)

Inputs: scale    - scale factor to multiply xsigma to get unbiased estimate of stddev for Gaussian data
        epsilon  - minimum allowed scale estimate for identifying outliers
        t        - number of standard deviations required to identity outliers (>= 0)
        x        - input vector, size n
        xmedian  - vector of median values of x, size n
                   xmedian_i = median of window centered on x_i
        y        - (output) filtered vector, size n
        xsigma   - (output) vector of estimated local standard deviations of x, size n
                   xsigma_i = S_n for i-th window
        noutlier - (output) number of outliers detected
        ioutlier - (output) boolean array indicating outliers identified, size n; may be NULL
                   ioutlier_i = 1 if outlier detected, 0 if not

Notes:
1) If S_i = 0 or is very small for a particular sample, then the filter may erroneously flag the
sample as an outlier, since it will act as a standard median filter. To avoid this scenario, the
parameter epsilon specifies the minimum value of S_i which can be used in the filter test. Any
samples for which S_i < epsilon are passed through unchanged.
*/

static int
filter_impulse(const double scale, const double epsilon, const double t, const gsl_vector * x, const gsl_vector * xmedian,
               gsl_vector * y, gsl_vector * xsigma, size_t * noutlier, gsl_vector_int * ioutlier)
{
  const size_t n = x->size;

  if (n != y->size)
    {
      GSL_ERROR("input and output vectors must have same length", GSL_EBADLEN);
    }
  else if (xmedian->size != n)
    {
      GSL_ERROR("xmedian vector must match input size", GSL_EBADLEN);
    }
  else if (xsigma->size != n)
    {
      GSL_ERROR("xsigma vector must match input size", GSL_EBADLEN);
    }
  else if ((ioutlier != NULL) && (ioutlier->size != n))
    {
      GSL_ERROR("ioutlier vector must match input size", GSL_EBADLEN);
    }
  else if (t < 0.0)
    {
      GSL_ERROR("t must be non-negative", GSL_EDOM);
    }
  else
    {
      size_t i;

      *noutlier = 0;

      /* build output vector */
      for (i = 0; i < n; ++i)
        {
          double xi = gsl_vector_get(x, i);
          double xmedi = gsl_vector_get(xmedian, i);
          double absdevi = fabs(xi - xmedi); /* absolute deviation for this sample */
          double *xsigmai = gsl_vector_ptr(xsigma, i);

          /* multiply by scale factor to get estimate of standard deviation */
          *xsigmai *= scale;

          /*
           * If the absolute deviation for this sample is more than t stddevs
           * for this window (and S_i is sufficiently large to avoid scale implosion),
           * set the output value to the window median, otherwise use the original sample
           */
          if ((*xsigmai >= epsilon) && (absdevi > t * (*xsigmai)))
            {
              gsl_vector_set(y, i, xmedi);
              ++(*noutlier);
              if (ioutlier)
                gsl_vector_int_set(ioutlier, i, 1);
            }
          else
            {
              gsl_vector_set(y, i, xi);
              if (ioutlier)
                gsl_vector_int_set(ioutlier, i, 0);
            }
        }

      return GSL_SUCCESS;
    }
}