/* multifit/lmniel.c
 * 
 * Copyright (C) 2014 Patrick Alken
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or (at
 * your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include <config.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_vector.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_multifit_nlin.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_blas.h>

#define SCALE 0

/*
 * This module contains an implementation of the Levenberg-Marquardt
 * algorithm for nonlinear optimization problems. This implementation
 * closely follows the following works:
 *
 * [1] H. B. Nielsen, K. Madsen, Introduction to Optimization and
 *     Data Fitting, Informatics and Mathematical Modeling,
 *     Technical University of Denmark (DTU), 2010.
 */

typedef struct
{
  gsl_matrix *A;             /* J^T J */
  gsl_matrix *A_copy;        /* copy of J^T J */
  gsl_matrix *J;             /* Jacobian J(x) */
  gsl_vector *diag;          /* D = diag(J^T J) */
  gsl_vector *rhs;           /* rhs vector = -g = -J^T f */
  gsl_vector *x_trial;       /* trial parameter vector */
  gsl_vector *f_trial;       /* trial function vector */
  gsl_vector *work;          /* workspace length p */
  long nu;                   /* nu */
  double mu;                 /* LM damping parameter mu */
  double tau;                /* initial scale factor for mu */
} lmniel_state_t;

#include "lmmisc.c"

#define LM_ONE_THIRD         (0.333333333333333)

static int lmniel_alloc (void *vstate, const size_t n, const size_t p);
static void lmniel_free(void *vstate);
static int lmniel_set(void *vstate, const gsl_vector * swts,
                      gsl_multifit_function_fdf *fdf,
                      gsl_vector *x, gsl_vector *f, gsl_vector *dx);
static int lmniel_iterate(void *vstate, const gsl_vector *swts,
                          gsl_multifit_function_fdf *fdf,
                          gsl_vector *x, gsl_vector *f, gsl_vector *dx);

static int
lmniel_alloc (void *vstate, const size_t n, const size_t p)
{
  lmniel_state_t *state = (lmniel_state_t *) vstate;

  state->A = gsl_matrix_alloc(p, p);
  if (state->A == NULL)
    {
      GSL_ERROR ("failed to allocate space for A", GSL_ENOMEM);
    }

  state->J = gsl_matrix_alloc(n, p);
  if (state->J == NULL)
    {
      GSL_ERROR ("failed to allocate space for J", GSL_ENOMEM);
    }

  state->diag = gsl_vector_alloc(p);
  if (state->diag == NULL)
    {
      GSL_ERROR ("failed to allocate space for diag", GSL_ENOMEM);
    }

  state->rhs = gsl_vector_alloc(p);
  if (state->rhs == NULL)
    {
      GSL_ERROR ("failed to allocate space for rhs", GSL_ENOMEM);
    }

  state->work = gsl_vector_alloc(p);
  if (state->work == NULL)
    {
      GSL_ERROR ("failed to allocate space for work", GSL_ENOMEM);
    }

  state->A_copy = gsl_matrix_alloc(p, p);
  if (state->A_copy == NULL)
    {
      GSL_ERROR ("failed to allocate space for A_copy", GSL_ENOMEM);
    }

  state->x_trial = gsl_vector_alloc(p);
  if (state->x_trial == NULL)
    {
      GSL_ERROR ("failed to allocate space for x_trial", GSL_ENOMEM);
    }

  state->f_trial = gsl_vector_alloc(n);
  if (state->f_trial == NULL)
    {
      GSL_ERROR ("failed to allocate space for f_trial", GSL_ENOMEM);
    }

  state->tau = 1.0e-3;

  return GSL_SUCCESS;
} /* lmniel_alloc() */

static void
lmniel_free(void *vstate)
{
  lmniel_state_t *state = (lmniel_state_t *) vstate;

  if (state->A)
    gsl_matrix_free(state->A);

  if (state->J)
    gsl_matrix_free(state->J);

  if (state->diag)
    gsl_vector_free(state->diag);

  if (state->rhs)
    gsl_vector_free(state->rhs);

  if (state->work)
    gsl_vector_free(state->work);

  if (state->A_copy)
    gsl_matrix_free(state->A_copy);

  if (state->x_trial)
    gsl_vector_free(state->x_trial);

  if (state->f_trial)
    gsl_vector_free(state->f_trial);
} /* lmniel_free() */

static int
lmniel_set(void *vstate, const gsl_vector *swts,
           gsl_multifit_function_fdf *fdf, gsl_vector *x,
           gsl_vector *f, gsl_vector *dx)
{
  int status;
  lmniel_state_t *state = (lmniel_state_t *) vstate;
  const size_t p = x->size;
  size_t i;

  /* initialize counters for function and Jacobian evaluations */
  fdf->nevalf = 0;
  fdf->nevaldf = 0;

  /* evaluate function and Jacobian at x and apply weight transform */
  status = gsl_multifit_eval_wf(fdf, x, swts, f);
  if (status)
   return status;

  if (fdf->df)
    status = gsl_multifit_eval_wdf(fdf, x, swts, state->J);
  else
    status = gsl_multifit_fdfsolver_dif_df(x, swts, fdf, f, state->J);
  if (status)
    return status;

  /* compute rhs = -J^T f */
  gsl_blas_dgemv(CblasTrans, -1.0, state->J, f, 0.0, state->rhs);

#if SCALE
  gsl_vector_set_zero(state->diag);
#else
  gsl_vector_set_all(state->diag, 1.0);
#endif

  /* set default parameters */
  state->nu = 2;

#if SCALE
  state->mu = state->tau;
#else
  /* compute mu_0 = tau * max(diag(J^T J)) */
  state->mu = -1.0;
  for (i = 0; i < p; ++i)
    {
      gsl_vector_view c = gsl_matrix_column(state->J, i);
      double result; /* (J^T J)_{ii} */

      gsl_blas_ddot(&c.vector, &c.vector, &result);
      state->mu = GSL_MAX(state->mu, result);
    }

  state->mu *= state->tau;
#endif

  return GSL_SUCCESS;
} /* lmniel_set() */

/*
lmniel_iterate()
  This function performs 1 iteration of the LM algorithm 6.18
from [1]. The algorithm is slightly modified to loop until we
find an acceptable step dx, in order to guarantee that each
function call contains a new input vector x.

Args: vstate - lm workspace
      swts   - data weights (NULL if unweighted)
      fdf    - function and Jacobian pointers
      x      - on input, current parameter vector
               on output, new parameter vector x + dx
      f      - on input, f(x)
               on output, f(x + dx)
      dx     - (output only) parameter step vector

Notes:
1) On input, the following must be initialized in state:
nu, mu, rhs, J

2) On output, the following are updated with the current iterates:
nu, mu, rhs, J

rhs needs to be set on each output, so that lmniel_gradient supplies
the correct g = J^T f
*/

static int
lmniel_iterate(void *vstate, const gsl_vector *swts,
               gsl_multifit_function_fdf *fdf, gsl_vector *x,
               gsl_vector *f, gsl_vector *dx)
{
  int status;
  lmniel_state_t *state = (lmniel_state_t *) vstate;
  gsl_matrix *J = state->J;                   /* Jacobian J(x) */
  gsl_matrix *A = state->A;                   /* J^T J */
  gsl_vector *rhs = state->rhs;               /* -g = -J^T f */
  gsl_vector *x_trial = state->x_trial;       /* trial x + dx */
  gsl_vector *f_trial = state->f_trial;       /* trial f(x + dx) */
  gsl_vector *diag = state->diag;             /* diag(D) */
  double dF;                                  /* F(x) - F(x + dx) */
  double dL;                                  /* L(0) - L(dx) */
  int foundstep = 0;                          /* found step dx */

  /* compute A = J^T J */
  status = gsl_blas_dsyrk(CblasLower, CblasTrans, 1.0, J, 0.0, A);
  if (status)
    return status;

  /* copy lower triangle to upper */
  gsl_matrix_transpose_tricpy('L', 0, A, A);

#if SCALE
  lmniel_update_diag(J, diag);
#endif

  /* loop until we find an acceptable step dx */
  while (!foundstep)
    {
      /* solve (A + mu*I) dx = g */
      status = lmniel_calc_dx(state->mu, A, rhs, dx, state);
      if (status)
        return status;

      /* compute x_trial = x + dx */
      lmniel_trial_step(x, dx, x_trial);

      /* compute f(x + dx) */
      status = gsl_multifit_eval_wf(fdf, x_trial, swts, f_trial);
      if (status)
       return status;

      /* compute dF = F(x) - F(x + dx) */
      dF = lmniel_calc_dF(f, f_trial);

      /* compute dL = L(0) - L(dx) = dx^T (mu*dx - g) */
      dL = lmniel_calc_dL(state->mu, diag, dx, rhs);

      /* check that rho = dF/dL > 0 */
      if ((dL > 0.0) && (dF >= 0.0))
        {
          /* reduction in error, step acceptable */

          double tmp;

          /* update LM parameter mu */
          tmp = 2.0 * (dF / dL) - 1.0;
          tmp = 1.0 - tmp*tmp*tmp;
          state->mu *= GSL_MAX(LM_ONE_THIRD, tmp);
          state->nu = 2;

          /* compute J <- J(x + dx) */
          if (fdf->df)
            status = gsl_multifit_eval_wdf(fdf, x_trial, swts, J);
          else
            status = gsl_multifit_fdfsolver_dif_df(x_trial, swts, fdf, f_trial, J);
          if (status)
            return status;

          /* update x <- x + dx */
          gsl_vector_memcpy(x, x_trial);

          /* update f <- f(x + dx) */
          gsl_vector_memcpy(f, f_trial);

          /* compute new rhs = -J^T f */
          gsl_blas_dgemv(CblasTrans, -1.0, J, f, 0.0, rhs);

          foundstep = 1;
        }
      else
        {
          long nu2;

          /* step did not reduce error, reject step */
          state->mu *= (double) state->nu;
          nu2 = state->nu << 1; /* 2*nu */
          if (nu2 <= state->nu)
            {
              gsl_vector_view d = gsl_matrix_diagonal(A);

              /*
               * nu has wrapped around / overflown, reset mu and nu
               * to original values and break to force another iteration
               */
              /*GSL_ERROR("nu parameter has overflown", GSL_EOVRFLW);*/
              state->nu = 2;
              state->mu = state->tau * gsl_vector_max(&d.vector);
              break;
            }
          state->nu = nu2;
        }
    } /* while (!foundstep) */

  return GSL_SUCCESS;
} /* lmniel_iterate() */

static int
lmniel_gradient(void *vstate, gsl_vector * g)
{
  lmniel_state_t *state = (lmniel_state_t *) vstate;
  gsl_vector_memcpy(g, state->rhs);
  gsl_vector_scale(g, -1.0);
  return GSL_SUCCESS;
}

static int
lmniel_jac(void *vstate, gsl_matrix * J)
{
  lmniel_state_t *state = (lmniel_state_t *) vstate;
  int s = gsl_matrix_memcpy(J, state->J);

  return s;
}

static const gsl_multifit_fdfsolver_type lmniel_type =
{
  "lmniel",
  sizeof(lmniel_state_t),
  &lmniel_alloc,
  &lmniel_set,
  &lmniel_iterate,
  &lmniel_gradient,
  &lmniel_jac,
  &lmniel_free
};

const gsl_multifit_fdfsolver_type *gsl_multifit_fdfsolver_lmniel = &lmniel_type;