Blame multilarge_nlinear/cgst.c

Packit 67cb25
/* multilarge_nlinear/cgst.c
Packit 67cb25
 * 
Packit 67cb25
 * Copyright (C) 2016 Patrick Alken
Packit 67cb25
 * 
Packit 67cb25
 * This program is free software; you can redistribute it and/or modify
Packit 67cb25
 * it under the terms of the GNU General Public License as published by
Packit 67cb25
 * the Free Software Foundation; either version 3 of the License, or (at
Packit 67cb25
 * your option) any later version.
Packit 67cb25
 * 
Packit 67cb25
 * This program is distributed in the hope that it will be useful, but
Packit 67cb25
 * WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 67cb25
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 67cb25
 * General Public License for more details.
Packit 67cb25
 * 
Packit 67cb25
 * You should have received a copy of the GNU General Public License
Packit 67cb25
 * along with this program; if not, write to the Free Software
Packit 67cb25
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
Packit 67cb25
 */
Packit 67cb25
Packit 67cb25
#include <config.h>
Packit 67cb25
#include <gsl/gsl_math.h>
Packit 67cb25
#include <gsl/gsl_vector.h>
Packit 67cb25
#include <gsl/gsl_matrix.h>
Packit 67cb25
#include <gsl/gsl_linalg.h>
Packit 67cb25
#include <gsl/gsl_multilarge_nlinear.h>
Packit 67cb25
#include <gsl/gsl_errno.h>
Packit 67cb25
#include <gsl/gsl_blas.h>
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
 * This module contains an implementation of the Steihaug-Toint
Packit 67cb25
 * conjugate gradient algorithm for nonlinear optimization problems.
Packit 67cb25
 * This implementation closely follows the following works:
Packit 67cb25
 *
Packit 67cb25
 * [1] T. Steihaug, The conjugate gradient method and trust regions
Packit 67cb25
 *     in large scale optimization, SIAM J. Num. Anal., 20(3) 1983.
Packit 67cb25
 *
Packit 67cb25
 * In the below algorithm, the Jacobian and gradient are scaled
Packit 67cb25
 * according to:
Packit 67cb25
 *
Packit 67cb25
 * J~ = J D^{-1}
Packit 67cb25
 * g~ = D^{-1}
Packit 67cb25
 *
Packit 67cb25
 * prior to any calculations which results in better numerical
Packit 67cb25
 * stability when solving for the Gauss-Newton step. The resulting
Packit 67cb25
 * step vector is then backtransformed as:
Packit 67cb25
 *
Packit 67cb25
 * dx = D^{-1} dx~
Packit 67cb25
 */
Packit 67cb25
Packit 67cb25
typedef struct
Packit 67cb25
{
Packit 67cb25
  size_t n;                  /* number of observations */
Packit 67cb25
  size_t p;                  /* number of parameters */
Packit 67cb25
  gsl_vector *z;             /* Gauss-Newton step, size p */
Packit 67cb25
  gsl_vector *r;             /* steepest descent step, size p */
Packit 67cb25
  gsl_vector *d;             /* steepest descent step, size p */
Packit 67cb25
  gsl_vector *workp;         /* workspace, length p */
Packit 67cb25
  gsl_vector *workn;         /* workspace, length n */
Packit 67cb25
  double norm_g;             /* || g~ || */
Packit 67cb25
Packit 67cb25
  double cgtol;              /* tolerance for CG solution */
Packit 67cb25
  size_t cgmaxit;            /* maximum CG iterations */
Packit 67cb25
} cgst_state_t;
Packit 67cb25
Packit 67cb25
#include "common.c"
Packit 67cb25
Packit 67cb25
static void * cgst_alloc (const void * params, const size_t n, const size_t p);
Packit 67cb25
static void cgst_free(void *vstate);
Packit 67cb25
static int cgst_init(const void *vtrust_state, void *vstate);
Packit 67cb25
static int cgst_preloop(const void * vtrust_state, void * vstate);
Packit 67cb25
static int cgst_step(const void * vtrust_state, const double delta,
Packit 67cb25
                     gsl_vector * dx, void * vstate);
Packit 67cb25
static int cgst_preduction(const void * vtrust_state, const gsl_vector * dx,
Packit 67cb25
                           double * pred, void * vstate);
Packit 67cb25
static double cgst_calc_tau(const gsl_vector * p, const gsl_vector * d,
Packit 67cb25
                            const double delta);
Packit 67cb25
Packit 67cb25
static void *
Packit 67cb25
cgst_alloc (const void * params, const size_t n, const size_t p)
Packit 67cb25
{
Packit 67cb25
  const gsl_multilarge_nlinear_parameters *par = (const gsl_multilarge_nlinear_parameters *) params;
Packit 67cb25
  cgst_state_t *state;
Packit 67cb25
  
Packit 67cb25
  state = calloc(1, sizeof(cgst_state_t));
Packit 67cb25
  if (state == NULL)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR_NULL ("failed to allocate st state", GSL_ENOMEM);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  state->z = gsl_vector_alloc(p);
Packit 67cb25
  if (state->z == NULL)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR_NULL ("failed to allocate space for z", GSL_ENOMEM);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  state->r = gsl_vector_alloc(p);
Packit 67cb25
  if (state->r == NULL)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR_NULL ("failed to allocate space for r", GSL_ENOMEM);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  state->d = gsl_vector_alloc(p);
Packit 67cb25
  if (state->d == NULL)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR_NULL ("failed to allocate space for d", GSL_ENOMEM);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  state->workp = gsl_vector_alloc(p);
Packit 67cb25
  if (state->workp == NULL)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR_NULL ("failed to allocate space for workp", GSL_ENOMEM);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  state->workn = gsl_vector_alloc(n);
Packit 67cb25
  if (state->workn == NULL)
Packit 67cb25
    {
Packit 67cb25
      GSL_ERROR_NULL ("failed to allocate space for workn", GSL_ENOMEM);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  state->n = n;
Packit 67cb25
  state->p = p;
Packit 67cb25
Packit 67cb25
  state->cgmaxit = par->max_iter;
Packit 67cb25
  if (state->cgmaxit == 0)
Packit 67cb25
    state->cgmaxit = n;
Packit 67cb25
Packit 67cb25
  state->cgtol = par->tol;
Packit 67cb25
Packit 67cb25
  return state;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
static void
Packit 67cb25
cgst_free(void *vstate)
Packit 67cb25
{
Packit 67cb25
  cgst_state_t *state = (cgst_state_t *) vstate;
Packit 67cb25
Packit 67cb25
  if (state->z)
Packit 67cb25
    gsl_vector_free(state->z);
Packit 67cb25
Packit 67cb25
  if (state->r)
Packit 67cb25
    gsl_vector_free(state->r);
Packit 67cb25
Packit 67cb25
  if (state->d)
Packit 67cb25
    gsl_vector_free(state->d);
Packit 67cb25
Packit 67cb25
  if (state->workp)
Packit 67cb25
    gsl_vector_free(state->workp);
Packit 67cb25
Packit 67cb25
  if (state->workn)
Packit 67cb25
    gsl_vector_free(state->workn);
Packit 67cb25
Packit 67cb25
  free(state);
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
cgst_init()
Packit 67cb25
  Initialize cgst solver
Packit 67cb25
Packit 67cb25
Inputs: vtrust_state - trust state
Packit 67cb25
        vstate       - workspace
Packit 67cb25
Packit 67cb25
Return: success/error
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
static int
Packit 67cb25
cgst_init(const void *vtrust_state, void *vstate)
Packit 67cb25
{
Packit 67cb25
  /* nothing to do */
Packit 67cb25
Packit 67cb25
  (void)vtrust_state;
Packit 67cb25
  (void)vstate;
Packit 67cb25
Packit 67cb25
  return GSL_SUCCESS;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
static int
Packit 67cb25
cgst_preloop(const void * vtrust_state, void * vstate)
Packit 67cb25
{
Packit 67cb25
  /* nothing to do */
Packit 67cb25
Packit 67cb25
  (void)vtrust_state;
Packit 67cb25
  (void)vstate;
Packit 67cb25
Packit 67cb25
  return GSL_SUCCESS;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
cgst_step()
Packit 67cb25
  Calculate a new step vector
Packit 67cb25
Packit 67cb25
Return:
Packit 67cb25
GSL_SUCCESS if CG solution found
Packit 67cb25
GSL_EMAXITER if no solution found
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
static int
Packit 67cb25
cgst_step(const void * vtrust_state, const double delta,
Packit 67cb25
          gsl_vector * dx, void * vstate)
Packit 67cb25
{
Packit 67cb25
  int status;
Packit 67cb25
  const gsl_multilarge_nlinear_trust_state *trust_state =
Packit 67cb25
    (const gsl_multilarge_nlinear_trust_state *) vtrust_state;
Packit 67cb25
  cgst_state_t *state = (cgst_state_t *) vstate;
Packit 67cb25
  const gsl_vector * x = trust_state->x;
Packit 67cb25
  const gsl_vector * f = trust_state->f;
Packit 67cb25
  const gsl_vector * swts = trust_state->sqrt_wts;
Packit 67cb25
  const gsl_vector * diag = trust_state->diag;
Packit 67cb25
  const gsl_multilarge_nlinear_parameters * params = trust_state->params;
Packit 67cb25
  gsl_multilarge_nlinear_fdf * fdf = trust_state->fdf;
Packit 67cb25
  double alpha, beta, u;
Packit 67cb25
  double norm_Jd;   /* || J D^{-1} d_i || */
Packit 67cb25
  double norm_r;    /* || r_i || */
Packit 67cb25
  double norm_rp1;  /* || r_{i+1} || */
Packit 67cb25
  size_t i;
Packit 67cb25
Packit 67cb25
  /* Step 1 of [1], section 2; scale gradient as
Packit 67cb25
   *
Packit 67cb25
   * g~ = D^{-1} g
Packit 67cb25
   *
Packit 67cb25
   * for better numerical stability
Packit 67cb25
   */
Packit 67cb25
Packit 67cb25
  for (i = 0; i < state->p; ++i)
Packit 67cb25
    {
Packit 67cb25
      double gi = gsl_vector_get(trust_state->g, i);
Packit 67cb25
      double di = gsl_vector_get(trust_state->diag, i);
Packit 67cb25
Packit 67cb25
      gsl_vector_set(state->z, i, 0.0);
Packit 67cb25
      gsl_vector_set(state->r, i, -gi / di);
Packit 67cb25
      gsl_vector_set(state->d, i, -gi / di);
Packit 67cb25
      gsl_vector_set(state->workp, i, gi / di);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  /* compute || g~ || */
Packit 67cb25
  state->norm_g = gsl_blas_dnrm2(state->workp);
Packit 67cb25
Packit 67cb25
  for (i = 0; i < state->cgmaxit; ++i)
Packit 67cb25
    {
Packit 67cb25
      /* workp := D^{-1} d_i */
Packit 67cb25
      gsl_vector_memcpy(state->workp, state->d);
Packit 67cb25
      gsl_vector_div(state->workp, trust_state->diag);
Packit 67cb25
Packit 67cb25
      /* workn := J D^{-1} d_i */
Packit 67cb25
      status = gsl_multilarge_nlinear_eval_df(CblasNoTrans, x, f, state->workp,
Packit 67cb25
                                              swts, params->h_df, params->fdtype,
Packit 67cb25
                                              fdf, state->workn, NULL, NULL);
Packit 67cb25
      if (status)
Packit 67cb25
        return status;
Packit 67cb25
Packit 67cb25
      /* compute || J D^{-1} d_i || */
Packit 67cb25
      norm_Jd = gsl_blas_dnrm2(state->workn);
Packit 67cb25
Packit 67cb25
      /* Step 2 of [1], section 2 */
Packit 67cb25
      if (norm_Jd == 0.0)
Packit 67cb25
        {
Packit 67cb25
          double tau = cgst_calc_tau(state->z, state->d, delta);
Packit 67cb25
Packit 67cb25
          /* dx = z_i + tau*d_i */
Packit 67cb25
          scaled_addition(1.0, state->z, tau, state->d, dx);
Packit 67cb25
          gsl_vector_div(dx, diag);
Packit 67cb25
Packit 67cb25
          return GSL_SUCCESS;
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      /* Step 3 of [1], section 2 */
Packit 67cb25
Packit 67cb25
      norm_r = gsl_blas_dnrm2(state->r);
Packit 67cb25
      u = norm_r / norm_Jd;
Packit 67cb25
      alpha = u * u;
Packit 67cb25
Packit 67cb25
      /* workp <= z_{i+1} = z_i + alpha_i*d_i */
Packit 67cb25
      scaled_addition(1.0, state->z, alpha, state->d, state->workp);
Packit 67cb25
Packit 67cb25
      u = gsl_blas_dnrm2(state->workp);
Packit 67cb25
      if (u >= delta)
Packit 67cb25
        {
Packit 67cb25
          double tau = cgst_calc_tau(state->z, state->d, delta);
Packit 67cb25
Packit 67cb25
          /* dx = z_i + tau*d_i */
Packit 67cb25
          scaled_addition(1.0, state->z, tau, state->d, dx);
Packit 67cb25
          gsl_vector_div(dx, diag);
Packit 67cb25
Packit 67cb25
          return GSL_SUCCESS;
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      /* store z_{i+1} */
Packit 67cb25
      gsl_vector_memcpy(state->z, state->workp);
Packit 67cb25
Packit 67cb25
      /* Step 4 of [1], section 2 */
Packit 67cb25
Packit 67cb25
      /* compute: workp := alpha B d_i = alpha D^{-1} J^T J D^{-1} d_i,
Packit 67cb25
       * where J D^{-1} d_i is already stored in workn */
Packit 67cb25
      status = gsl_multilarge_nlinear_eval_df(CblasTrans, x, f, state->workn,
Packit 67cb25
                                              swts, params->h_df, params->fdtype,
Packit 67cb25
                                              fdf, state->workp, NULL, NULL);
Packit 67cb25
      if (status)
Packit 67cb25
        return status;
Packit 67cb25
Packit 67cb25
      gsl_vector_div(state->workp, trust_state->diag);
Packit 67cb25
      gsl_vector_scale(state->workp, alpha);
Packit 67cb25
Packit 67cb25
      /* r_{i+1} = r_i - alpha*B*d_i */
Packit 67cb25
      gsl_vector_sub(state->r, state->workp);
Packit 67cb25
      norm_rp1 = gsl_blas_dnrm2(state->r);
Packit 67cb25
Packit 67cb25
      u = norm_rp1 / state->norm_g;
Packit 67cb25
      if (u < state->cgtol)
Packit 67cb25
        {
Packit 67cb25
          gsl_vector_memcpy(dx, state->z);
Packit 67cb25
          gsl_vector_div(dx, diag);
Packit 67cb25
          return GSL_SUCCESS;
Packit 67cb25
        }
Packit 67cb25
Packit 67cb25
      /* Step 5 of [1], section 2 */
Packit 67cb25
Packit 67cb25
      /* compute u = ||r_{i+1}|| / || r_i|| */
Packit 67cb25
      u = norm_rp1 / norm_r;
Packit 67cb25
      beta = u * u;
Packit 67cb25
Packit 67cb25
      /* compute: d_{i+1} = rt_{i+1} + beta*d_i */
Packit 67cb25
      scaled_addition(1.0, state->r, beta, state->d, state->d);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  /* failed to converge, return current estimate */
Packit 67cb25
  gsl_vector_memcpy(dx, state->z);
Packit 67cb25
  gsl_vector_div(dx, diag);
Packit 67cb25
Packit 67cb25
  return GSL_EMAXITER;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
static int
Packit 67cb25
cgst_preduction(const void * vtrust_state, const gsl_vector * dx,
Packit 67cb25
                double * pred, void * vstate)
Packit 67cb25
{
Packit 67cb25
  const gsl_multilarge_nlinear_trust_state *trust_state =
Packit 67cb25
    (const gsl_multilarge_nlinear_trust_state *) vtrust_state;
Packit 67cb25
  cgst_state_t *state = (cgst_state_t *) vstate;
Packit 67cb25
Packit 67cb25
  *pred = quadratic_preduction(trust_state, dx, state->workn);
Packit 67cb25
Packit 67cb25
  return GSL_SUCCESS;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
cgst_calc_tau()
Packit 67cb25
  Compute tau > 0 such that:
Packit 67cb25
Packit 67cb25
|| p + tau*d || = delta
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
static double
Packit 67cb25
cgst_calc_tau(const gsl_vector * p, const gsl_vector * d,
Packit 67cb25
              const double delta)
Packit 67cb25
{
Packit 67cb25
  double norm_p, norm_d, u;
Packit 67cb25
  double t1, t2, tau;
Packit 67cb25
Packit 67cb25
  norm_p = gsl_blas_dnrm2(p);
Packit 67cb25
  norm_d = gsl_blas_dnrm2(d);
Packit 67cb25
Packit 67cb25
  /* compute (p, d) */
Packit 67cb25
  gsl_blas_ddot(p, d, &u);
Packit 67cb25
Packit 67cb25
  t1 = u / (norm_d * norm_d);
Packit 67cb25
  t2 = t1*u + (delta + norm_p) * (delta - norm_p);
Packit 67cb25
  tau = -t1 + sqrt(t2) / norm_d;
Packit 67cb25
Packit 67cb25
  return tau;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
static const gsl_multilarge_nlinear_trs cgst_type =
Packit 67cb25
{
Packit 67cb25
  "steihaug-toint",
Packit 67cb25
  cgst_alloc,
Packit 67cb25
  cgst_init,
Packit 67cb25
  cgst_preloop,
Packit 67cb25
  cgst_step,
Packit 67cb25
  cgst_preduction,
Packit 67cb25
  cgst_free
Packit 67cb25
};
Packit 67cb25
Packit 67cb25
const gsl_multilarge_nlinear_trs *gsl_multilarge_nlinear_trs_cgst = &cgst_type;