/* filter/impulse.c * * Impulse detecting filters * * Copyright (C) 2018 Patrick Alken * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include static int filter_impulse(const double scale, const double epsilon, const double t, const gsl_vector * x, const gsl_vector * xmedian, gsl_vector * y, gsl_vector * xsigma, size_t * noutlier, gsl_vector_int * ioutlier); /* gsl_filter_impulse_alloc() Allocate a workspace for impulse detection filtering. Inputs: K - number of samples in window; if even, it is rounded up to the next odd, to have a symmetric window Return: pointer to workspace */ gsl_filter_impulse_workspace * gsl_filter_impulse_alloc(const size_t K) { gsl_filter_impulse_workspace *w; w = calloc(1, sizeof(gsl_filter_impulse_workspace)); if (w == 0) { GSL_ERROR_NULL ("failed to allocate space for workspace", GSL_ENOMEM); } w->movstat_workspace_p = gsl_movstat_alloc(K); if (w->movstat_workspace_p == 0) { gsl_filter_impulse_free(w); return NULL; } return w; } void gsl_filter_impulse_free(gsl_filter_impulse_workspace * w) { if (w->movstat_workspace_p) gsl_movstat_free(w->movstat_workspace_p); free(w); } /* gsl_filter_impulse() Apply an impulse detection filter to an input vector. The filter output is y_i = { x_i, |x_i - m_i| <= t * S_i { m_i, |x_i - m_i| > t * S_i where m_i is the median of the window W_i^H and S_i is the scale estimate (MAD, IQR, S_n, Q_n) Inputs: endtype - how to handle signal end points scale_type - which statistic to use for scale estimate (MAD, IQR, etc) t - number of standard deviations required to identity outliers (>= 0) x - input vector, size n y - (output) filtered vector, size n xmedian - (output) vector of median values of x, size n xmedian_i = median of window centered on x_i xsigma - (output) vector of estimated local standard deviations of x, size n xsigma_i = sigma for i-th window: scale*MAD noutlier - (output) number of outliers detected ioutlier - (output) boolean array indicating outliers identified, size n; may be NULL ioutlier_i = 1 if outlier detected, 0 if not w - workspace Notes: */ int gsl_filter_impulse(const gsl_filter_end_t endtype, const gsl_filter_scale_t scale_type, const double t, const gsl_vector * x, gsl_vector * y, gsl_vector * xmedian, gsl_vector * xsigma, size_t * noutlier, gsl_vector_int * ioutlier, gsl_filter_impulse_workspace * w) { const size_t n = x->size; if (n != y->size) { GSL_ERROR("input and output vectors must have same length", GSL_EBADLEN); } else if (xmedian->size != n) { GSL_ERROR("xmedian vector must match input size", GSL_EBADLEN); } else if (xsigma->size != n) { GSL_ERROR("xsigma vector must match input size", GSL_EBADLEN); } else if ((ioutlier != NULL) && (ioutlier->size != n)) { GSL_ERROR("ioutlier vector must match input size", GSL_EBADLEN); } else if (t < 0.0) { GSL_ERROR("t must be non-negative", GSL_EDOM); } else { int status; double scale = 1.0; switch (scale_type) { case GSL_FILTER_SCALE_MAD: { /* compute window medians and MADs */ gsl_movstat_mad(endtype, x, xmedian, xsigma, w->movstat_workspace_p); break; } case GSL_FILTER_SCALE_IQR: { /* multiplication factor for IQR to estimate stddev for Gaussian signal */ scale = 0.741301109252801; /* calculate the window medians */ gsl_movstat_median(endtype, x, xmedian, w->movstat_workspace_p); /* calculate window IQRs */ gsl_movstat_qqr(endtype, x, 0.25, xsigma, w->movstat_workspace_p); break; } case GSL_FILTER_SCALE_SN: { /* calculate the window medians */ gsl_movstat_median(endtype, x, xmedian, w->movstat_workspace_p); /* calculate window S_n values */ gsl_movstat_Sn(endtype, x, xsigma, w->movstat_workspace_p); break; } case GSL_FILTER_SCALE_QN: { /* calculate the window medians */ gsl_movstat_median(endtype, x, xmedian, w->movstat_workspace_p); /* calculate window Q_n values */ gsl_movstat_Qn(endtype, x, xsigma, w->movstat_workspace_p); break; } default: GSL_ERROR("unknown scale type", GSL_EDOM); break; } /* apply impulse detecting filter using previously computed scale estimate */ status = filter_impulse(scale, 0.0, t, x, xmedian, y, xsigma, noutlier, ioutlier); return status; } } /* filter_impulse() Apply an impulse detection filter to an input vector. The filter output is y_i = { x_i, |x_i - m_i| <= t * S_i OR S_i < epsilon { m_i, |x_i - m_i| > t * S_i where m_i is the median of the window W_i^H and S_i is the scale estimate (MAD, IQR, etc) Inputs: scale - scale factor to multiply xsigma to get unbiased estimate of stddev for Gaussian data epsilon - minimum allowed scale estimate for identifying outliers t - number of standard deviations required to identity outliers (>= 0) x - input vector, size n xmedian - vector of median values of x, size n xmedian_i = median of window centered on x_i y - (output) filtered vector, size n xsigma - (output) vector of estimated local standard deviations of x, size n xsigma_i = S_n for i-th window noutlier - (output) number of outliers detected ioutlier - (output) boolean array indicating outliers identified, size n; may be NULL ioutlier_i = 1 if outlier detected, 0 if not Notes: 1) If S_i = 0 or is very small for a particular sample, then the filter may erroneously flag the sample as an outlier, since it will act as a standard median filter. To avoid this scenario, the parameter epsilon specifies the minimum value of S_i which can be used in the filter test. Any samples for which S_i < epsilon are passed through unchanged. */ static int filter_impulse(const double scale, const double epsilon, const double t, const gsl_vector * x, const gsl_vector * xmedian, gsl_vector * y, gsl_vector * xsigma, size_t * noutlier, gsl_vector_int * ioutlier) { const size_t n = x->size; if (n != y->size) { GSL_ERROR("input and output vectors must have same length", GSL_EBADLEN); } else if (xmedian->size != n) { GSL_ERROR("xmedian vector must match input size", GSL_EBADLEN); } else if (xsigma->size != n) { GSL_ERROR("xsigma vector must match input size", GSL_EBADLEN); } else if ((ioutlier != NULL) && (ioutlier->size != n)) { GSL_ERROR("ioutlier vector must match input size", GSL_EBADLEN); } else if (t < 0.0) { GSL_ERROR("t must be non-negative", GSL_EDOM); } else { size_t i; *noutlier = 0; /* build output vector */ for (i = 0; i < n; ++i) { double xi = gsl_vector_get(x, i); double xmedi = gsl_vector_get(xmedian, i); double absdevi = fabs(xi - xmedi); /* absolute deviation for this sample */ double *xsigmai = gsl_vector_ptr(xsigma, i); /* multiply by scale factor to get estimate of standard deviation */ *xsigmai *= scale; /* * If the absolute deviation for this sample is more than t stddevs * for this window (and S_i is sufficiently large to avoid scale implosion), * set the output value to the window median, otherwise use the original sample */ if ((*xsigmai >= epsilon) && (absdevi > t * (*xsigmai))) { gsl_vector_set(y, i, xmedi); ++(*noutlier); if (ioutlier) gsl_vector_int_set(ioutlier, i, 1); } else { gsl_vector_set(y, i, xi); if (ioutlier) gsl_vector_int_set(ioutlier, i, 0); } } return GSL_SUCCESS; } }