Blame multifit/multirobust.c

Packit 67cb25
/* multirobust.c
Packit 67cb25
 * 
Packit 67cb25
 * Copyright (C) 2013 Patrick Alken
Packit 67cb25
 * 
Packit 67cb25
 * This program is free software; you can redistribute it and/or modify
Packit 67cb25
 * it under the terms of the GNU General Public License as published by
Packit 67cb25
 * the Free Software Foundation; either version 3 of the License, or (at
Packit 67cb25
 * your option) any later version.
Packit 67cb25
 * 
Packit 67cb25
 * This program is distributed in the hope that it will be useful, but
Packit 67cb25
 * WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 67cb25
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 67cb25
 * General Public License for more details.
Packit 67cb25
 * 
Packit 67cb25
 * You should have received a copy of the GNU General Public License
Packit 67cb25
 * along with this program; if not, write to the Free Software
Packit 67cb25
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
Packit 67cb25
 *
Packit 67cb25
 * This module contains routines related to robust linear least squares. The
Packit 67cb25
 * algorithm used closely follows the publications:
Packit 67cb25
 *
Packit 67cb25
 * [1] DuMouchel, W. and F. O'Brien (1989), "Integrating a robust
Packit 67cb25
 * option into a multiple regression computing environment,"
Packit 67cb25
 * Computer Science and Statistics:  Proceedings of the 21st
Packit 67cb25
 * Symposium on the Interface, American Statistical Association
Packit 67cb25
 *
Packit 67cb25
 * [2] Street, J.O., R.J. Carroll, and D. Ruppert (1988), "A note on
Packit 67cb25
 * computing robust regression estimates via iteratively
Packit 67cb25
 * reweighted least squares," The American Statistician, v. 42, 
Packit 67cb25
 * pp. 152-154.
Packit 67cb25
 */
Packit 67cb25
Packit 67cb25
#include <config.h>
Packit 67cb25
#include <gsl/gsl_math.h>
Packit 67cb25
#include <gsl/gsl_multifit.h>
Packit 67cb25
#include <gsl/gsl_vector.h>
Packit 67cb25
#include <gsl/gsl_matrix.h>
Packit 67cb25
#include <gsl/gsl_statistics.h>
Packit 67cb25
#include <gsl/gsl_sort.h>
Packit 67cb25
#include <gsl/gsl_sort_vector.h>
Packit 67cb25
#include <gsl/gsl_blas.h>
Packit 67cb25
#include <gsl/gsl_linalg.h>
Packit 67cb25
Packit 67cb25
static int robust_test_convergence(const gsl_vector *c_prev, const gsl_vector *c,
Packit 67cb25
                                   const double tol);
Packit 67cb25
static double robust_madsigma(const gsl_vector *r, const size_t p, gsl_vector *workn);
Packit 67cb25
static double robust_robsigma(const gsl_vector *r, const double s,
Packit 67cb25
                              const double tune, gsl_multifit_robust_workspace *w);
Packit 67cb25
static double robust_sigma(const double s_ols, const double s_rob,
Packit 67cb25
                           gsl_multifit_robust_workspace *w);
Packit 67cb25
static int robust_covariance(const double sigma, gsl_matrix *cov,
Packit 67cb25
                             gsl_multifit_robust_workspace *w);
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
gsl_multifit_robust_alloc
Packit 67cb25
  Allocate a robust workspace
Packit 67cb25
Packit 67cb25
Inputs: T - robust weighting algorithm
Packit 67cb25
        n - number of observations
Packit 67cb25
        p - number of model parameters
Packit 67cb25
Packit 67cb25
Return: pointer to workspace
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
gsl_multifit_robust_workspace *
Packit 67cb25
gsl_multifit_robust_alloc(const gsl_multifit_robust_type *T,
Packit 67cb25
                          const size_t n, const size_t p)
Packit 67cb25
{
Packit 67cb25
  gsl_multifit_robust_workspace *w;
Packit 67cb25
Packit 67cb25
  if (n < p)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR_VAL("observations n must be >= p", GSL_EINVAL, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  w = calloc(1, sizeof(gsl_multifit_robust_workspace));
Packit 67cb25
  if (w == 0)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR_VAL("failed to allocate space for multifit_robust struct",
Packit 67cb25
                    GSL_ENOMEM, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  w->n = n;
Packit 67cb25
  w->p = p;
Packit 67cb25
  w->type = T;
Packit 67cb25
  w->maxiter = 100; /* maximum iterations */
Packit 67cb25
  w->tune = w->type->tuning_default;
Packit 67cb25
Packit 67cb25
  w->multifit_p = gsl_multifit_linear_alloc(n, p);
Packit 67cb25
  if (w->multifit_p == 0)
Packit 67cb25
    {
Packit 67cb25
      gsl_multifit_robust_free(w);
Packit 67cb25
      GSL_ERROR_VAL("failed to allocate space for multifit_linear struct",
Packit 67cb25
                    GSL_ENOMEM, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  w->r = gsl_vector_alloc(n);
Packit 67cb25
  if (w->r == 0)
Packit 67cb25
    {
Packit 67cb25
      gsl_multifit_robust_free(w);
Packit 67cb25
      GSL_ERROR_VAL("failed to allocate space for residuals",
Packit 67cb25
                    GSL_ENOMEM, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  w->weights = gsl_vector_alloc(n);
Packit 67cb25
  if (w->weights == 0)
Packit 67cb25
    {
Packit 67cb25
      gsl_multifit_robust_free(w);
Packit 67cb25
      GSL_ERROR_VAL("failed to allocate space for weights", GSL_ENOMEM, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  w->c_prev = gsl_vector_alloc(p);
Packit 67cb25
  if (w->c_prev == 0)
Packit 67cb25
    {
Packit 67cb25
      gsl_multifit_robust_free(w);
Packit 67cb25
      GSL_ERROR_VAL("failed to allocate space for c_prev", GSL_ENOMEM, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  w->resfac = gsl_vector_alloc(n);
Packit 67cb25
  if (w->resfac == 0)
Packit 67cb25
    {
Packit 67cb25
      gsl_multifit_robust_free(w);
Packit 67cb25
      GSL_ERROR_VAL("failed to allocate space for residual factors",
Packit 67cb25
                    GSL_ENOMEM, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  w->psi = gsl_vector_alloc(n);
Packit 67cb25
  if (w->psi == 0)
Packit 67cb25
    {
Packit 67cb25
      gsl_multifit_robust_free(w);
Packit 67cb25
      GSL_ERROR_VAL("failed to allocate space for psi", GSL_ENOMEM, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  w->dpsi = gsl_vector_alloc(n);
Packit 67cb25
  if (w->dpsi == 0)
Packit 67cb25
    {
Packit 67cb25
      gsl_multifit_robust_free(w);
Packit 67cb25
      GSL_ERROR_VAL("failed to allocate space for dpsi", GSL_ENOMEM, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  w->QSI = gsl_matrix_alloc(p, p);
Packit 67cb25
  if (w->QSI == 0)
Packit 67cb25
    {
Packit 67cb25
      gsl_multifit_robust_free(w);
Packit 67cb25
      GSL_ERROR_VAL("failed to allocate space for QSI", GSL_ENOMEM, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  w->D = gsl_vector_alloc(p);
Packit 67cb25
  if (w->D == 0)
Packit 67cb25
    {
Packit 67cb25
      gsl_multifit_robust_free(w);
Packit 67cb25
      GSL_ERROR_VAL("failed to allocate space for D", GSL_ENOMEM, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  w->workn = gsl_vector_alloc(n);
Packit 67cb25
  if (w->workn == 0)
Packit 67cb25
    {
Packit 67cb25
      gsl_multifit_robust_free(w);
Packit 67cb25
      GSL_ERROR_VAL("failed to allocate space for workn", GSL_ENOMEM, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  w->stats.sigma_ols = 0.0;
Packit 67cb25
  w->stats.sigma_mad = 0.0;
Packit 67cb25
  w->stats.sigma_rob = 0.0;
Packit 67cb25
  w->stats.sigma = 0.0;
Packit 67cb25
  w->stats.Rsq = 0.0;
Packit 67cb25
  w->stats.adj_Rsq = 0.0;
Packit 67cb25
  w->stats.rmse = 0.0;
Packit 67cb25
  w->stats.sse = 0.0;
Packit 67cb25
  w->stats.dof = n - p;
Packit 67cb25
  w->stats.weights = w->weights;
Packit 67cb25
  w->stats.r = w->r;
Packit 67cb25
Packit 67cb25
  return w;
Packit 67cb25
} /* gsl_multifit_robust_alloc() */
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
gsl_multifit_robust_free()
Packit 67cb25
  Free memory associated with robust workspace
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
void
Packit 67cb25
gsl_multifit_robust_free(gsl_multifit_robust_workspace *w)
Packit 67cb25
{
Packit 67cb25
  if (w->multifit_p)
Packit 67cb25
    gsl_multifit_linear_free(w->multifit_p);
Packit 67cb25
Packit 67cb25
  if (w->r)
Packit 67cb25
    gsl_vector_free(w->r);
Packit 67cb25
Packit 67cb25
  if (w->weights)
Packit 67cb25
    gsl_vector_free(w->weights);
Packit 67cb25
Packit 67cb25
  if (w->c_prev)
Packit 67cb25
    gsl_vector_free(w->c_prev);
Packit 67cb25
Packit 67cb25
  if (w->resfac)
Packit 67cb25
    gsl_vector_free(w->resfac);
Packit 67cb25
Packit 67cb25
  if (w->psi)
Packit 67cb25
    gsl_vector_free(w->psi);
Packit 67cb25
Packit 67cb25
  if (w->dpsi)
Packit 67cb25
    gsl_vector_free(w->dpsi);
Packit 67cb25
Packit 67cb25
  if (w->QSI)
Packit 67cb25
    gsl_matrix_free(w->QSI);
Packit 67cb25
Packit 67cb25
  if (w->D)
Packit 67cb25
    gsl_vector_free(w->D);
Packit 67cb25
Packit 67cb25
  if (w->workn)
Packit 67cb25
    gsl_vector_free(w->workn);
Packit 67cb25
Packit 67cb25
  free(w);
Packit 67cb25
} /* gsl_multifit_robust_free() */
Packit 67cb25
Packit 67cb25
int
Packit 67cb25
gsl_multifit_robust_tune(const double tune, gsl_multifit_robust_workspace *w)
Packit 67cb25
{
Packit 67cb25
  w->tune = tune;
Packit 67cb25
  return GSL_SUCCESS;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
int
Packit 67cb25
gsl_multifit_robust_maxiter(const size_t maxiter,
Packit 67cb25
                            gsl_multifit_robust_workspace *w)
Packit 67cb25
{
Packit 67cb25
  if (w->maxiter == 0)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR("maxiter must be greater than 0", GSL_EINVAL);
Packit 67cb25
    }
Packit 67cb25
  else
Packit 67cb25
    {
Packit 67cb25
      w->maxiter = maxiter;
Packit 67cb25
      return GSL_SUCCESS;
Packit 67cb25
    }
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
const char *
Packit 67cb25
gsl_multifit_robust_name(const gsl_multifit_robust_workspace *w)
Packit 67cb25
{
Packit 67cb25
  return w->type->name;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
gsl_multifit_robust_stats
Packit 67cb25
gsl_multifit_robust_statistics(const gsl_multifit_robust_workspace *w)
Packit 67cb25
{
Packit 67cb25
  return w->stats;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
gsl_multifit_robust_weights()
Packit 67cb25
  Compute iterative weights for given residuals
Packit 67cb25
Packit 67cb25
Inputs: r   - residuals
Packit 67cb25
        wts - (output) where to store weights
Packit 67cb25
              w_i = r_i / (sigma_mad * tune)
Packit 67cb25
        w   - workspace
Packit 67cb25
Packit 67cb25
Return: success/error
Packit 67cb25
Packit 67cb25
Notes:
Packit 67cb25
1) Sizes of r and wts must be equal
Packit 67cb25
2) Size of r/wts may be less than or equal to w->n, to allow
Packit 67cb25
for computing weights of a subset of data
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
int
Packit 67cb25
gsl_multifit_robust_weights(const gsl_vector *r, gsl_vector *wts,
Packit 67cb25
                            gsl_multifit_robust_workspace *w)
Packit 67cb25
{
Packit 67cb25
  if (r->size != wts->size)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR("residual vector does not match weight vector size", GSL_EBADLEN);
Packit 67cb25
    }
Packit 67cb25
  else
Packit 67cb25
    {
Packit 67cb25
      int s;
Packit 67cb25
      double sigma;
Packit 67cb25
Packit 67cb25
      sigma = robust_madsigma(r, w->p, wts);
Packit 67cb25
Packit 67cb25
      /* scale residuals by sigma and tuning factor */
Packit 67cb25
      gsl_vector_memcpy(wts, r);
Packit 67cb25
      gsl_vector_scale(wts, 1.0 / (sigma * w->tune));
Packit 67cb25
Packit 67cb25
      /* compute weights in-place */
Packit 67cb25
      s = w->type->wfun(wts, wts);
Packit 67cb25
Packit 67cb25
      return s;
Packit 67cb25
    }
Packit 67cb25
} /* gsl_multifit_robust_weights() */
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
gsl_multifit_robust()
Packit 67cb25
  Perform robust iteratively reweighted linear least squares
Packit 67cb25
fit
Packit 67cb25
Packit 67cb25
Inputs: X     - design matrix of basis functions
Packit 67cb25
        y     - right hand side vector
Packit 67cb25
        c     - (output) model coefficients
Packit 67cb25
        cov   - (output) covariance matrix
Packit 67cb25
        w     - workspace
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
int
Packit 67cb25
gsl_multifit_robust(const gsl_matrix * X,
Packit 67cb25
                    const gsl_vector * y,
Packit 67cb25
                    gsl_vector * c,
Packit 67cb25
                    gsl_matrix * cov,
Packit 67cb25
                    gsl_multifit_robust_workspace *w)
Packit 67cb25
{
Packit 67cb25
  /* check matrix and vector sizes */
Packit 67cb25
  if (X->size1 != y->size)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR
Packit 67cb25
        ("number of observations in y does not match rows of matrix X",
Packit 67cb25
         GSL_EBADLEN);
Packit 67cb25
    }
Packit 67cb25
  else if (X->size2 != c->size)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR ("number of parameters c does not match columns of matrix X",
Packit 67cb25
                 GSL_EBADLEN);
Packit 67cb25
    }
Packit 67cb25
  else if (cov->size1 != cov->size2)
Packit 67cb25
    {   
Packit 67cb25
      GSL_ERROR ("covariance matrix is not square", GSL_ENOTSQR);
Packit 67cb25
    }   
Packit 67cb25
  else if (c->size != cov->size1)
Packit 67cb25
    {   
Packit 67cb25
      GSL_ERROR
Packit 67cb25
        ("number of parameters does not match size of covariance matrix",
Packit 67cb25
         GSL_EBADLEN);
Packit 67cb25
    }
Packit 67cb25
  else if (X->size1 != w->n || X->size2 != w->p)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR
Packit 67cb25
        ("size of workspace does not match size of observation matrix",
Packit 67cb25
         GSL_EBADLEN);
Packit 67cb25
    }
Packit 67cb25
  else
Packit 67cb25
    {
Packit 67cb25
      int s;
Packit 67cb25
      double chisq;
Packit 67cb25
      const double tol = GSL_SQRT_DBL_EPSILON;
Packit 67cb25
      int converged = 0;
Packit 67cb25
      size_t numit = 0;
Packit 67cb25
      const size_t n = y->size;
Packit 67cb25
      double sigy = gsl_stats_sd(y->data, y->stride, n);
Packit 67cb25
      double sig_lower;
Packit 67cb25
      size_t i;
Packit 67cb25
Packit 67cb25
      /*
Packit 67cb25
       * if the initial fit is very good, then finding outliers by comparing
Packit 67cb25
       * them to the residual standard deviation is difficult. Therefore we
Packit 67cb25
       * set a lower bound on the standard deviation estimate that is a small
Packit 67cb25
       * fraction of the standard deviation of the data values
Packit 67cb25
       */
Packit 67cb25
      sig_lower = 1.0e-6 * sigy;
Packit 67cb25
      if (sig_lower == 0.0)
Packit 67cb25
        sig_lower = 1.0;
Packit 67cb25
Packit 67cb25
      /* compute initial estimates using ordinary least squares */
Packit 67cb25
      s = gsl_multifit_linear(X, y, c, cov, &chisq, w->multifit_p);
Packit 67cb25
      if (s)
Packit 67cb25
        return s;
Packit 67cb25
Packit 67cb25
      /* save Q S^{-1} of original matrix */
Packit 67cb25
      gsl_matrix_memcpy(w->QSI, w->multifit_p->QSI);
Packit 67cb25
      gsl_vector_memcpy(w->D, w->multifit_p->D);
Packit 67cb25
Packit 67cb25
      /* compute statistical leverage of each data point */
Packit 67cb25
      s = gsl_linalg_SV_leverage(w->multifit_p->A, w->resfac);
Packit 67cb25
      if (s)
Packit 67cb25
        return s;
Packit 67cb25
Packit 67cb25
      /* correct residuals with factor 1 / sqrt(1 - h) */
Packit 67cb25
      for (i = 0; i < n; ++i)
Packit 67cb25
        {
Packit 67cb25
          double h = gsl_vector_get(w->resfac, i);
Packit 67cb25
Packit 67cb25
          if (h > 0.9999)
Packit 67cb25
            h = 0.9999;
Packit 67cb25
Packit 67cb25
          gsl_vector_set(w->resfac, i, 1.0 / sqrt(1.0 - h));
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      /* compute residuals from OLS fit r = y - X c */
Packit 67cb25
      s = gsl_multifit_linear_residuals(X, y, c, w->r);
Packit 67cb25
      if (s)
Packit 67cb25
        return s;
Packit 67cb25
Packit 67cb25
      /* compute estimate of sigma from ordinary least squares */
Packit 67cb25
      w->stats.sigma_ols = gsl_blas_dnrm2(w->r) / sqrt((double) w->stats.dof);
Packit 67cb25
Packit 67cb25
      while (!converged && ++numit <= w->maxiter)
Packit 67cb25
        {
Packit 67cb25
          double sig;
Packit 67cb25
Packit 67cb25
          /* adjust residuals by statistical leverage (see DuMouchel and O'Brien) */
Packit 67cb25
          s = gsl_vector_mul(w->r, w->resfac);
Packit 67cb25
          if (s)
Packit 67cb25
            return s;
Packit 67cb25
Packit 67cb25
          /* compute estimate of standard deviation using MAD */
Packit 67cb25
          sig = robust_madsigma(w->r, w->p, w->workn);
Packit 67cb25
Packit 67cb25
          /* scale residuals by standard deviation and tuning parameter */
Packit 67cb25
          gsl_vector_scale(w->r, 1.0 / (GSL_MAX(sig, sig_lower) * w->tune));
Packit 67cb25
Packit 67cb25
          /* compute weights using these residuals */
Packit 67cb25
          s = w->type->wfun(w->r, w->weights);
Packit 67cb25
          if (s)
Packit 67cb25
            return s;
Packit 67cb25
Packit 67cb25
          gsl_vector_memcpy(w->c_prev, c);
Packit 67cb25
Packit 67cb25
          /* solve weighted least squares with new weights */
Packit 67cb25
          s = gsl_multifit_wlinear(X, w->weights, y, c, cov, &chisq, w->multifit_p);
Packit 67cb25
          if (s)
Packit 67cb25
            return s;
Packit 67cb25
Packit 67cb25
          /* compute new residuals r = y - X c */
Packit 67cb25
          s = gsl_multifit_linear_residuals(X, y, c, w->r);
Packit 67cb25
          if (s)
Packit 67cb25
            return s;
Packit 67cb25
Packit 67cb25
          converged = robust_test_convergence(w->c_prev, c, tol);
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      /* compute final MAD sigma */
Packit 67cb25
      w->stats.sigma_mad = robust_madsigma(w->r, w->p, w->workn);
Packit 67cb25
Packit 67cb25
      /* compute robust estimate of sigma */
Packit 67cb25
      w->stats.sigma_rob = robust_robsigma(w->r, w->stats.sigma_mad, w->tune, w);
Packit 67cb25
Packit 67cb25
      /* compute final estimate of sigma */
Packit 67cb25
      w->stats.sigma = robust_sigma(w->stats.sigma_ols, w->stats.sigma_rob, w);
Packit 67cb25
Packit 67cb25
      /* store number of iterations */
Packit 67cb25
      w->stats.numit = numit;
Packit 67cb25
Packit 67cb25
      {
Packit 67cb25
        double dof = (double) w->stats.dof;
Packit 67cb25
        double rnorm = w->stats.sigma * sqrt(dof); /* see DuMouchel, sec 4.2 */
Packit 67cb25
        double ss_err = rnorm * rnorm;
Packit 67cb25
        double ss_tot = gsl_stats_tss(y->data, y->stride, n);
Packit 67cb25
Packit 67cb25
        /* compute R^2 */
Packit 67cb25
        w->stats.Rsq = 1.0 - ss_err / ss_tot;
Packit 67cb25
Packit 67cb25
        /* compute adjusted R^2 */
Packit 67cb25
        w->stats.adj_Rsq = 1.0 - (1.0 - w->stats.Rsq) * ((double)n - 1.0) / dof;
Packit 67cb25
Packit 67cb25
        /* compute rmse */
Packit 67cb25
        w->stats.rmse = sqrt(ss_err / dof);
Packit 67cb25
Packit 67cb25
        /* store SSE */
Packit 67cb25
        w->stats.sse = ss_err;
Packit 67cb25
      }
Packit 67cb25
Packit 67cb25
      /* calculate covariance matrix = sigma^2 (X^T X)^{-1} */
Packit 67cb25
      s = robust_covariance(w->stats.sigma, cov, w);
Packit 67cb25
      if (s)
Packit 67cb25
        return s;
Packit 67cb25
Packit 67cb25
      /* raise an error if not converged */
Packit 67cb25
      if (numit > w->maxiter)
Packit 67cb25
        {
Packit 67cb25
          GSL_ERROR("maximum iterations exceeded", GSL_EMAXITER);
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      return s;
Packit 67cb25
    }
Packit 67cb25
} /* gsl_multifit_robust() */
Packit 67cb25
Packit 67cb25
/* Estimation of values for given x */
Packit 67cb25
int
Packit 67cb25
gsl_multifit_robust_est(const gsl_vector * x, const gsl_vector * c,
Packit 67cb25
                        const gsl_matrix * cov, double *y, double *y_err)
Packit 67cb25
{
Packit 67cb25
  int s = gsl_multifit_linear_est(x, c, cov, y, y_err);
Packit 67cb25
Packit 67cb25
  return s;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
gsl_multifit_robust_residuals()
Packit 67cb25
  Compute robust / studentized residuals from fit
Packit 67cb25
Packit 67cb25
r_i = (y_i - Y_i) / (sigma * sqrt(1 - h_i))
Packit 67cb25
Packit 67cb25
Inputs: X - design matrix
Packit 67cb25
        y - rhs vector
Packit 67cb25
        c - fit coefficients
Packit 67cb25
        r - (output) studentized residuals
Packit 67cb25
        w - workspace
Packit 67cb25
Packit 67cb25
Notes:
Packit 67cb25
1) gsl_multifit_robust() must first be called to compute the coefficients
Packit 67cb25
c, the leverage factors in w->resfac, and sigma in w->stats.sigma
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
int
Packit 67cb25
gsl_multifit_robust_residuals(const gsl_matrix * X, const gsl_vector * y,
Packit 67cb25
                              const gsl_vector * c, gsl_vector * r,
Packit 67cb25
                              gsl_multifit_robust_workspace * w)
Packit 67cb25
{
Packit 67cb25
  if (X->size1 != y->size)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR
Packit 67cb25
        ("number of observations in y does not match rows of matrix X",
Packit 67cb25
         GSL_EBADLEN);
Packit 67cb25
    }
Packit 67cb25
  else if (X->size2 != c->size)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR ("number of parameters c does not match columns of matrix X",
Packit 67cb25
                 GSL_EBADLEN);
Packit 67cb25
    }
Packit 67cb25
  else if (y->size != r->size)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR ("number of observations in y does not match number of residuals",
Packit 67cb25
                 GSL_EBADLEN);
Packit 67cb25
    }
Packit 67cb25
  else
Packit 67cb25
    {
Packit 67cb25
      const double sigma = w->stats.sigma; /* previously calculated sigma */
Packit 67cb25
      int s;
Packit 67cb25
      size_t i;
Packit 67cb25
Packit 67cb25
      /* compute r = y - X c */
Packit 67cb25
      s = gsl_multifit_linear_residuals(X, y, c, r);
Packit 67cb25
      if (s)
Packit 67cb25
        return s;
Packit 67cb25
Packit 67cb25
      for (i = 0; i < r->size; ++i)
Packit 67cb25
        {
Packit 67cb25
          double hfac = gsl_vector_get(w->resfac, i); /* 1/sqrt(1 - h_i) */
Packit 67cb25
          double *ri = gsl_vector_ptr(r, i);
Packit 67cb25
Packit 67cb25
          /* multiply residual by 1 / (sigma * sqrt(1 - h_i)) */
Packit 67cb25
          *ri *= hfac / sigma;
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      return s;
Packit 67cb25
    }
Packit 67cb25
} /* gsl_multifit_robust_residuals() */
Packit 67cb25
Packit 67cb25
/***********************************
Packit 67cb25
 * INTERNAL ROUTINES               *
Packit 67cb25
 ***********************************/
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
robust_test_convergence()
Packit 67cb25
  Test for convergence in robust least squares
Packit 67cb25
Packit 67cb25
Convergence criteria:
Packit 67cb25
Packit 67cb25
|c_i^(k) - c_i^(k-1)| <= tol * max(|c_i^(k)|, |c_i^(k-1)|)
Packit 67cb25
Packit 67cb25
for all i. k refers to iteration number.
Packit 67cb25
Packit 67cb25
Inputs: c_prev - coefficients from previous iteration
Packit 67cb25
        c      - coefficients from current iteration
Packit 67cb25
        tol    - tolerance
Packit 67cb25
Packit 67cb25
Return: 1 if converged, 0 if not
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
static int
Packit 67cb25
robust_test_convergence(const gsl_vector *c_prev, const gsl_vector *c,
Packit 67cb25
                        const double tol)
Packit 67cb25
{
Packit 67cb25
  size_t p = c->size;
Packit 67cb25
  size_t i;
Packit 67cb25
Packit 67cb25
  for (i = 0; i < p; ++i)
Packit 67cb25
    {
Packit 67cb25
      double ai = gsl_vector_get(c_prev, i);
Packit 67cb25
      double bi = gsl_vector_get(c, i);
Packit 67cb25
Packit 67cb25
      if (fabs(bi - ai) > tol * GSL_MAX(fabs(ai), fabs(bi)))
Packit 67cb25
        return 0; /* not yet converged */
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  /* converged */
Packit 67cb25
  return 1;
Packit 67cb25
} /* robust_test_convergence() */
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
robust_madsigma()
Packit 67cb25
  Estimate the standard deviation of the residuals using
Packit 67cb25
the Median-Absolute-Deviation (MAD) of the residuals,
Packit 67cb25
throwing away the smallest p residuals.
Packit 67cb25
Packit 67cb25
See: Street et al, 1988
Packit 67cb25
Packit 67cb25
Inputs: r     - vector of residuals
Packit 67cb25
        p     - number of model coefficients (smallest p residuals are
Packit 67cb25
                ignored)
Packit 67cb25
        workn - workspace of size n = length(r)
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
static double
Packit 67cb25
robust_madsigma(const gsl_vector *r, const size_t p, gsl_vector *workn)
Packit 67cb25
{
Packit 67cb25
  size_t n = r->size;
Packit 67cb25
  double sigma;
Packit 67cb25
  size_t i;
Packit 67cb25
Packit 67cb25
  /* allow for the possibility that r->size < w->n */
Packit 67cb25
  gsl_vector_view v1 = gsl_vector_subvector(workn, 0, n);
Packit 67cb25
  gsl_vector_view v2;
Packit 67cb25
Packit 67cb25
  /* copy |r| into workn */
Packit 67cb25
  for (i = 0; i < n; ++i)
Packit 67cb25
    {
Packit 67cb25
      gsl_vector_set(&v1.vector, i, fabs(gsl_vector_get(r, i)));
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  gsl_sort_vector(&v1.vector);
Packit 67cb25
Packit 67cb25
  /*
Packit 67cb25
   * ignore the smallest p residuals when computing the median
Packit 67cb25
   * (see Street et al 1988)
Packit 67cb25
   */
Packit 67cb25
  v2 = gsl_vector_subvector(&v1.vector, p - 1, n - p + 1);
Packit 67cb25
  sigma = gsl_stats_median_from_sorted_data(v2.vector.data, v2.vector.stride, v2.vector.size) / 0.6745;
Packit 67cb25
Packit 67cb25
  return sigma;
Packit 67cb25
} /* robust_madsigma() */
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
robust_robsigma()
Packit 67cb25
  Compute robust estimate of sigma so that
Packit 67cb25
sigma^2 * inv(X' * X) is a reasonable estimate of
Packit 67cb25
the covariance for robust regression. Based heavily
Packit 67cb25
on the equations of Street et al, 1988.
Packit 67cb25
Packit 67cb25
Inputs: r    - vector of residuals y - X c
Packit 67cb25
        s    - sigma estimate using MAD
Packit 67cb25
        tune - tuning constant
Packit 67cb25
        w    - workspace
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
static double
Packit 67cb25
robust_robsigma(const gsl_vector *r, const double s,
Packit 67cb25
                const double tune, gsl_multifit_robust_workspace *w)
Packit 67cb25
{
Packit 67cb25
  double sigma;
Packit 67cb25
  size_t i;
Packit 67cb25
  const size_t n = w->n;
Packit 67cb25
  const size_t p = w->p;
Packit 67cb25
  const double st = s * tune;
Packit 67cb25
  double a, b, lambda;
Packit 67cb25
Packit 67cb25
  /* compute u = r / sqrt(1 - h) / st */
Packit 67cb25
  gsl_vector_memcpy(w->workn, r);
Packit 67cb25
  gsl_vector_mul(w->workn, w->resfac);
Packit 67cb25
  gsl_vector_scale(w->workn, 1.0 / st);
Packit 67cb25
Packit 67cb25
  /* compute w(u) and psi'(u) */
Packit 67cb25
  w->type->wfun(w->workn, w->psi);
Packit 67cb25
  w->type->psi_deriv(w->workn, w->dpsi);
Packit 67cb25
Packit 67cb25
  /* compute psi(u) = u*w(u) */
Packit 67cb25
  gsl_vector_mul(w->psi, w->workn);
Packit 67cb25
Packit 67cb25
  /* Street et al, Eq (3) */
Packit 67cb25
  a = gsl_stats_mean(w->dpsi->data, w->dpsi->stride, n);
Packit 67cb25
Packit 67cb25
  /* Street et al, Eq (5) */
Packit 67cb25
  b = 0.0;
Packit 67cb25
  for (i = 0; i < n; ++i)
Packit 67cb25
    {
Packit 67cb25
      double psi_i = gsl_vector_get(w->psi, i);
Packit 67cb25
      double resfac = gsl_vector_get(w->resfac, i);
Packit 67cb25
      double fac = 1.0 / (resfac*resfac); /* 1 - h */
Packit 67cb25
Packit 67cb25
      b += fac * psi_i * psi_i;
Packit 67cb25
    }
Packit 67cb25
  b /= (double) (n - p);
Packit 67cb25
Packit 67cb25
  /* Street et al, Eq (5) */
Packit 67cb25
  lambda = 1.0 + ((double)p)/((double)n) * (1.0 - a) / a;
Packit 67cb25
Packit 67cb25
  sigma = lambda * sqrt(b) * st / a;
Packit 67cb25
Packit 67cb25
  return sigma;
Packit 67cb25
} /* robust_robsigma() */
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
robust_sigma()
Packit 67cb25
  Compute final estimate of residual standard deviation, using
Packit 67cb25
the OLS and robust sigma estimates.
Packit 67cb25
Packit 67cb25
This equation is taken from DuMouchel and O'Brien, sec 4.1:
Packit 67cb25
\hat{\sigma_R}
Packit 67cb25
Packit 67cb25
Inputs: s_ols - OLS sigma
Packit 67cb25
        s_rob - robust sigma
Packit 67cb25
        w     - workspace
Packit 67cb25
Packit 67cb25
Return: final estimate of sigma
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
static double
Packit 67cb25
robust_sigma(const double s_ols, const double s_rob,
Packit 67cb25
             gsl_multifit_robust_workspace *w)
Packit 67cb25
{
Packit 67cb25
  double sigma;
Packit 67cb25
  const double p = (double) w->p;
Packit 67cb25
  const double n = (double) w->n;
Packit 67cb25
Packit 67cb25
  /* see DuMouchel and O'Brien, sec 4.1 */
Packit 67cb25
  sigma = GSL_MAX(s_rob,
Packit 67cb25
                  sqrt((s_ols*s_ols*p*p + s_rob*s_rob*n) /
Packit 67cb25
                       (p*p + n)));
Packit 67cb25
Packit 67cb25
  return sigma;
Packit 67cb25
} /* robust_sigma() */
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
robust_covariance()
Packit 67cb25
  Calculate final covariance matrix, defined as:
Packit 67cb25
Packit 67cb25
  sigma * (X^T X)^{-1}
Packit 67cb25
Packit 67cb25
Inputs: sigma - residual standard deviation
Packit 67cb25
        cov   - (output) covariance matrix
Packit 67cb25
        w     - workspace
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
static int
Packit 67cb25
robust_covariance(const double sigma, gsl_matrix *cov,
Packit 67cb25
                  gsl_multifit_robust_workspace *w)
Packit 67cb25
{
Packit 67cb25
  int status = 0;
Packit 67cb25
  const size_t p = w->p;
Packit 67cb25
  const double s2 = sigma * sigma;
Packit 67cb25
  size_t i, j;
Packit 67cb25
  gsl_matrix *QSI = w->QSI;
Packit 67cb25
  gsl_vector *D = w->D;
Packit 67cb25
Packit 67cb25
  /* Form variance-covariance matrix cov = s2 * (Q S^-1) (Q S^-1)^T */
Packit 67cb25
Packit 67cb25
  for (i = 0; i < p; i++)
Packit 67cb25
    {
Packit 67cb25
      gsl_vector_view row_i = gsl_matrix_row (QSI, i);
Packit 67cb25
      double d_i = gsl_vector_get (D, i);
Packit 67cb25
Packit 67cb25
      for (j = i; j < p; j++)
Packit 67cb25
        {
Packit 67cb25
          gsl_vector_view row_j = gsl_matrix_row (QSI, j);
Packit 67cb25
          double d_j = gsl_vector_get (D, j);
Packit 67cb25
          double s;
Packit 67cb25
Packit 67cb25
          gsl_blas_ddot (&row_i.vector, &row_j.vector, &s);
Packit 67cb25
Packit 67cb25
          gsl_matrix_set (cov, i, j, s * s2 / (d_i * d_j));
Packit 67cb25
          gsl_matrix_set (cov, j, i, s * s2 / (d_i * d_j));
Packit 67cb25
        }
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  return status;
Packit 67cb25
} /* robust_covariance() */