/* multifit_nlinear/subspace2D.c * * Copyright (C) 2016 Patrick Alken * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include /* * This module implements a 2D subspace trust region subproblem method, * as outlined in * * [1] G. A. Shultz, R. B. Schnabel, and R. H. Byrd * A Family of Trust-Region-Based Algorithms for Unconstrained * Minimization with Strong Global Convergence Properties, * SIAM Journal on Numerical Analysis 1985 22:1, 47-67 * * [2] R. H. Byrd, R. B. Schnabel, G. A. Shultz, * Approximate solution of the trust region problem by * minimization over two-dimensional subspaces, * Mathematical Programming, January 1988, Volume 40, * Issue 1, pp 247-263 * * The idea is to solve: * * min_{dx} g^T dx + 1/2 dx^T B dx * * with constraints: * * ||D dx|| <= delta * dx \in span{dx_sd, dx_gn} * * where B is the Hessian matrix, B = J^T J * * The steps are as follows: * * 1. preloop: * a. Compute Gauss-Newton and steepest descent vectors, * dx_gn, dx_sd * b. Compute an orthonormal basis for span(D dx_sd, D dx_gn) by * constructing W = [ D dx_sd, D dx_gn ] and performing a QR * decomposition of W. The 2 columns of the Q matrix * will then span the column space of W. W should have rank 2 * unless D*dx_sd and D*dx_gn are parallel, in which case it will * have rank 1. * c. Precompute various quantities needed for the step calculation * * 2. step: * a. If the Gauss-Newton step is inside the trust region, use it * b. if W has rank 1, we cannot form a 2D subspace, so in this case * follow the steepest descent direction to the trust region boundary * and use that as the step. * c. In the full rank 2 case, if the GN point is outside the trust region, * then the minimizer of the objective function lies on the trust * region boundary. Therefore the minimization problem becomes: * * min_{dx} g^T dx + 1/2 dx^T B dx, with ||dx|| = delta, dx = Q * x * * where x is a 2-vector to be determined and the columns of Q are * the orthonormal basis vectors of the subspace. Note the equality * constraint now instead of <=. In terms of the new variable x, * the minimization problem becomes: * * min_x subg^T x + 1/2 x^T subB x, with ||Q*x|| = ||x|| = delta * * where: * subg = Q^T g (2-by-1) * subB = Q^T B Q (2-by-2) * * This equality constrained 2D minimization problem can be solved * with a Lagrangian multiplier, which results in a 4th degree polynomial * equation to be solved. The equation is: * * lambda^4 1 * + lambda^3 2 tr(B) * + lambda^2 (tr(B)^2 + 2 det(B) - g^T g / delta^2) * + lambda^1 (2 det(B) tr(B) - 2 g^T adj(B)^T g / delta^2) * + lambda^0 (det(B)^2 - g^T adj(B)^T adj(B) g / delta^2) * * where adj(B) is the adjugate matrix of B. * * We then check each of the 4 solutions for lambda to determine which * lambda results in the smallest objective function value. This x * is then used to construct the final step: dx = Q*x */ typedef struct { size_t n; /* number of observations */ size_t p; /* number of parameters */ gsl_vector *dx_gn; /* Gauss-Newton step, size p */ gsl_vector *dx_sd; /* steepest descent step, size p */ double norm_Dgn; /* || D dx_gn || */ double norm_Dsd; /* || D dx_sd || */ gsl_vector *workp; /* workspace, length p */ gsl_vector *workn; /* workspace, length n */ gsl_matrix *W; /* orthonormal basis for 2D subspace, p-by-2 */ gsl_matrix *JQ; /* J * Q, n-by-p */ gsl_vector *tau; /* Householder scalars */ gsl_vector *subg; /* subspace gradient = W^T g, 2-by-1 */ gsl_matrix *subB; /* subspace Hessian = W^T B W, 2-by-2 */ gsl_permutation *perm; /* permutation matrix */ double trB; /* Tr(subB) */ double detB; /* det(subB) */ double normg; /* || subg || */ double term0; /* g^T adj(B)^T adj(B) g */ double term1; /* g^T adj(B)^T g */ size_t rank; /* rank of [ dx_sd, dx_gn ] matrix */ gsl_poly_complex_workspace *poly_p; /* tunable parameters */ gsl_multifit_nlinear_parameters params; } subspace2D_state_t; #include "common.c" static void * subspace2D_alloc (const void * params, const size_t n, const size_t p); static void subspace2D_free(void *vstate); static int subspace2D_init(const void *vtrust_state, void *vstate); static int subspace2D_preloop(const void * vtrust_state, void * vstate); static int subspace2D_step(const void * vtrust_state, const double delta, gsl_vector * dx, void * vstate); static int subspace2D_preduction(const void * vtrust_state, const gsl_vector * dx, double * pred, void * vstate); static int subspace2D_solution(const double lambda, gsl_vector * x, subspace2D_state_t * state); static double subspace2D_objective(const gsl_vector * x, subspace2D_state_t * state); static int subspace2D_calc_gn(const gsl_multifit_nlinear_trust_state * trust_state, gsl_vector * dx); static int subspace2D_calc_sd(const gsl_multifit_nlinear_trust_state * trust_state, gsl_vector * dx, subspace2D_state_t * state); static void * subspace2D_alloc (const void * params, const size_t n, const size_t p) { const gsl_multifit_nlinear_parameters *par = (const gsl_multifit_nlinear_parameters *) params; subspace2D_state_t *state; state = calloc(1, sizeof(subspace2D_state_t)); if (state == NULL) { GSL_ERROR_NULL ("failed to allocate subspace2D state", GSL_ENOMEM); } state->dx_gn = gsl_vector_alloc(p); if (state->dx_gn == NULL) { GSL_ERROR_NULL ("failed to allocate space for dx_gn", GSL_ENOMEM); } state->dx_sd = gsl_vector_alloc(p); if (state->dx_sd == NULL) { GSL_ERROR_NULL ("failed to allocate space for dx_sd", GSL_ENOMEM); } state->workp = gsl_vector_alloc(p); if (state->workp == NULL) { GSL_ERROR_NULL ("failed to allocate space for workp", GSL_ENOMEM); } state->workn = gsl_vector_alloc(n); if (state->workn == NULL) { GSL_ERROR_NULL ("failed to allocate space for workn", GSL_ENOMEM); } state->W = gsl_matrix_alloc(p, 2); if (state->W == NULL) { GSL_ERROR_NULL ("failed to allocate space for W", GSL_ENOMEM); } state->JQ = gsl_matrix_alloc(n, p); if (state->JQ == NULL) { GSL_ERROR_NULL ("failed to allocate space for JQ", GSL_ENOMEM); } state->tau = gsl_vector_alloc(2); if (state->tau == NULL) { GSL_ERROR_NULL ("failed to allocate space for tau", GSL_ENOMEM); } state->subg = gsl_vector_alloc(2); if (state->subg == NULL) { GSL_ERROR_NULL ("failed to allocate space for subg", GSL_ENOMEM); } state->subB = gsl_matrix_alloc(2, 2); if (state->subB == NULL) { GSL_ERROR_NULL ("failed to allocate space for subB", GSL_ENOMEM); } state->perm = gsl_permutation_alloc(2); if (state->perm == NULL) { GSL_ERROR_NULL ("failed to allocate space for perm", GSL_ENOMEM); } state->poly_p = gsl_poly_complex_workspace_alloc(5); if (state->poly_p == NULL) { GSL_ERROR_NULL ("failed to allocate space for poly workspace", GSL_ENOMEM); } state->n = n; state->p = p; state->rank = 0; state->params = *par; return state; } static void subspace2D_free(void *vstate) { subspace2D_state_t *state = (subspace2D_state_t *) vstate; if (state->dx_gn) gsl_vector_free(state->dx_gn); if (state->dx_sd) gsl_vector_free(state->dx_sd); if (state->workp) gsl_vector_free(state->workp); if (state->workn) gsl_vector_free(state->workn); if (state->W) gsl_matrix_free(state->W); if (state->JQ) gsl_matrix_free(state->JQ); if (state->tau) gsl_vector_free(state->tau); if (state->subg) gsl_vector_free(state->subg); if (state->subB) gsl_matrix_free(state->subB); if (state->perm) gsl_permutation_free(state->perm); if (state->poly_p) gsl_poly_complex_workspace_free(state->poly_p); free(state); } /* subspace2D_init() Initialize subspace2D solver Inputs: vtrust_state - trust state vstate - workspace Return: success/error */ static int subspace2D_init(const void *vtrust_state, void *vstate) { (void)vtrust_state; (void)vstate; return GSL_SUCCESS; } /* subspace2D_preloop() Initialize subspace2D method prior to iteration loop. This involves computing the Gauss-Newton step and steepest descent step Notes: on output, 1) state->dx_gn contains Gauss-Newton step 2) state->dx_sd contains steepest descent step 3) state->rank contains the rank([dx_sd, dx_gn]) 4) if full rank subspace (rank = 2), then: state->trB = Tr(subB) state->detB = det(subB) state->normg = || subg || */ static int subspace2D_preloop(const void * vtrust_state, void * vstate) { int status; const gsl_multifit_nlinear_trust_state *trust_state = (const gsl_multifit_nlinear_trust_state *) vtrust_state; subspace2D_state_t *state = (subspace2D_state_t *) vstate; gsl_vector_view v; double work_data[2]; gsl_vector_view work = gsl_vector_view_array(work_data, 2); int signum; /* calculate Gauss-Newton step */ status = subspace2D_calc_gn(trust_state, state->dx_gn); if (status) return status; /* now calculate the steepest descent step */ status = subspace2D_calc_sd(trust_state, state->dx_sd, state); if (status) return status; /* store norms */ state->norm_Dgn = scaled_enorm(trust_state->diag, state->dx_gn); state->norm_Dsd = scaled_enorm(trust_state->diag, state->dx_sd); /* * now compute orthonormal basis for span(D dx_sd, D dx_gn) using * QR decomposition; set W = [ D dx_sd, D dx_gn ] and normalize each * column to unit magnitude. Then the Q matrix will form a basis for Col(W) */ v = gsl_matrix_column(state->W, 0); gsl_vector_memcpy(&v.vector, state->dx_sd); gsl_vector_mul(&v.vector, trust_state->diag); if (state->norm_Dsd != 0) gsl_vector_scale(&v.vector, 1.0 / state->norm_Dsd); v = gsl_matrix_column(state->W, 1); gsl_vector_memcpy(&v.vector, state->dx_gn); gsl_vector_mul(&v.vector, trust_state->diag); if (state->norm_Dgn != 0) gsl_vector_scale(&v.vector, 1.0 / state->norm_Dgn); /* use a rank revealing QR decomposition in case dx_sd and dx_gn * are parallel */ gsl_linalg_QRPT_decomp(state->W, state->tau, state->perm, &signum, &work.vector); /* check for parallel dx_sd, dx_gn, in which case rank will be 1 */ state->rank = gsl_linalg_QRPT_rank(state->W, -1.0); if (state->rank == 2) { /* * full rank subspace, compute: * subg = Q^T D^{-1} g * subB = Q^T D^{-1} B D^{-1} Q where B = J^T J */ const size_t p = state->p; size_t i; gsl_matrix_view JQ = gsl_matrix_submatrix(state->JQ, 0, 0, state->n, GSL_MIN(2, p)); double B00, B10, B11, g0, g1; /* compute subg */ gsl_vector_memcpy(state->workp, trust_state->g); gsl_vector_div(state->workp, trust_state->diag); gsl_linalg_QR_QTvec(state->W, state->tau, state->workp); g0 = gsl_vector_get(state->workp, 0); g1 = gsl_vector_get(state->workp, 1); gsl_vector_set(state->subg, 0, g0); gsl_vector_set(state->subg, 1, g1); /* compute subB */ /* compute J D^{-1} */ gsl_matrix_memcpy(state->JQ, trust_state->J); for (i = 0; i < p; ++i) { gsl_vector_view c = gsl_matrix_column(state->JQ, i); double di = gsl_vector_get(trust_state->diag, i); gsl_vector_scale(&c.vector, 1.0 / di); } /* compute J D^{-1} Q */ gsl_linalg_QR_matQ(state->W, state->tau, state->JQ); /* compute subB = Q^T D^{-1} J^T J D^{-1} Q */ gsl_blas_dsyrk(CblasLower, CblasTrans, 1.0, &JQ.matrix, 0.0, state->subB); B00 = gsl_matrix_get(state->subB, 0, 0); B10 = gsl_matrix_get(state->subB, 1, 0); B11 = gsl_matrix_get(state->subB, 1, 1); state->trB = B00 + B11; state->detB = B00*B11 - B10*B10; state->normg = gsl_blas_dnrm2(state->subg); /* g^T adj(B)^T adj(B) g */ state->term0 = (B10*B10 + B11*B11)*g0*g0 - 2*B10*(B00 + B11)*g0*g1 + (B00*B00 + B10*B10)*g1*g1; /* g^T adj(B)^T g */ state->term1 = B11 * g0 * g0 + g1 * (B00*g1 - 2*B10*g0); } return GSL_SUCCESS; } /* subspace2D_step() Calculate a new step with 2D subspace method. Based on [1]. We seek a vector dx in span{dx_gn, dx_sd} which minimizes the model function subject to ||dx|| <= delta */ static int subspace2D_step(const void * vtrust_state, const double delta, gsl_vector * dx, void * vstate) { const gsl_multifit_nlinear_trust_state *trust_state = (const gsl_multifit_nlinear_trust_state *) vtrust_state; subspace2D_state_t *state = (subspace2D_state_t *) vstate; if (state->norm_Dgn <= delta) { /* Gauss-Newton step is inside trust region, use it as final step * since it is the global minimizer of the quadratic model function */ gsl_vector_memcpy(dx, state->dx_gn); } else if (state->rank < 2) { /* rank of [dx_sd, dx_gn] is 1, meaning dx_sd and dx_gn * are parallel so we can't form a 2D subspace. Follow the steepest * descent direction to the trust region boundary as our step */ gsl_vector_memcpy(dx, state->dx_sd); gsl_vector_scale(dx, delta / state->norm_Dsd); } else { int status; const double delta_sq = delta * delta; double u = state->normg / delta; double a[5]; double z[8]; #if 1 a[0] = state->detB * state->detB - state->term0 / delta_sq; a[1] = 2 * state->detB * state->trB - 2 * state->term1 / delta_sq; a[2] = state->trB * state->trB + 2 * state->detB - u * u; a[3] = 2 * state->trB; a[4] = 1.0; #else double TrB_D = state->trB * delta; double detB_D = state->detB * delta; double normg_sq = state->normg * state->normg; a[0] = detB_D * detB_D - state->term0; a[1] = 2 * state->detB * state->trB * delta_sq - 2 * state->term1; a[2] = TrB_D * TrB_D + 2 * state->detB * delta_sq - normg_sq; a[3] = 2 * state->trB * delta_sq; a[4] = delta_sq; #endif status = gsl_poly_complex_solve(a, 5, state->poly_p, z); if (status == GSL_SUCCESS) { size_t i; double min = 0.0; int mini = -1; double x_data[2]; gsl_vector_view x = gsl_vector_view_array(x_data, 2); /* * loop through all four values of the Lagrange multiplier * lambda. For each lambda, evaluate the objective function * with Re(lambda) to determine which lambda minimizes the * function */ for (i = 0; i < 4; ++i) { double cost, normx; /*fprintf(stderr, "root: %.12e + %.12e i\n", z[2*i], z[2*i+1]);*/ status = subspace2D_solution(z[2*i], &x.vector, state); if (status != GSL_SUCCESS) continue; /* singular matrix system */ /* ensure ||x|| = delta */ normx = gsl_blas_dnrm2(&x.vector); if (normx == 0.0) continue; gsl_vector_scale(&x.vector, delta / normx); /* evaluate objective function to determine minimizer */ cost = subspace2D_objective(&x.vector, state); if (mini < 0 || cost < min) { mini = (int) i; min = cost; } } if (mini < 0) { /* did not find minimizer - should not get here */ return GSL_FAILURE; } else { /* compute x which minimizes objective function */ subspace2D_solution(z[2*mini], &x.vector, state); /* dx = Q * x */ gsl_vector_set_zero(dx); gsl_vector_set(dx, 0, gsl_vector_get(&x.vector, 0)); gsl_vector_set(dx, 1, gsl_vector_get(&x.vector, 1)); gsl_linalg_QR_Qvec(state->W, state->tau, dx); /* compute final dx by multiplying by D^{-1} */ gsl_vector_div(dx, trust_state->diag); } } else { GSL_ERROR ("gsl_poly_complex_solve failed", status); } } return GSL_SUCCESS; } static int subspace2D_preduction(const void * vtrust_state, const gsl_vector * dx, double * pred, void * vstate) { const gsl_multifit_nlinear_trust_state *trust_state = (const gsl_multifit_nlinear_trust_state *) vtrust_state; subspace2D_state_t *state = (subspace2D_state_t *) vstate; *pred = quadratic_preduction(trust_state->f, trust_state->J, dx, state->workn); return GSL_SUCCESS; } /* solve 2D subspace problem: (B + lambda*I) x = -g */ static int subspace2D_solution(const double lambda, gsl_vector * x, subspace2D_state_t * state) { int status = GSL_SUCCESS; double C_data[4]; gsl_matrix_view C = gsl_matrix_view_array(C_data, 2, 2); double B00 = gsl_matrix_get(state->subB, 0, 0); double B10 = gsl_matrix_get(state->subB, 1, 0); double B11 = gsl_matrix_get(state->subB, 1, 1); /* construct C = B + lambda*I */ gsl_matrix_set(&C.matrix, 0, 0, B00 + lambda); gsl_matrix_set(&C.matrix, 1, 0, B10); gsl_matrix_set(&C.matrix, 0, 1, B10); gsl_matrix_set(&C.matrix, 1, 1, B11 + lambda); /* use modified Cholesky in case C is not positive definite */ gsl_linalg_mcholesky_decomp(&C.matrix, state->perm, NULL); gsl_linalg_mcholesky_solve(&C.matrix, state->perm, state->subg, x); gsl_vector_scale(x, -1.0); return status; } /* evaluate 2D objective function: f(x) = g^T x + 1/2 x^T B x */ static double subspace2D_objective(const gsl_vector * x, subspace2D_state_t * state) { double f; double y_data[2]; gsl_vector_view y = gsl_vector_view_array(y_data, 2); /* compute: y = g + 1/2 B x */ gsl_vector_memcpy(&y.vector, state->subg); gsl_blas_dsymv(CblasLower, 0.5, state->subB, x, 1.0, &y.vector); /* compute: f = x^T ( g + 1/2 B x ) */ gsl_blas_ddot(x, &y.vector, &f); return f; } /* subspace2D_calc_gn() Calculate Gauss-Newton step which satisfies: J dx_gn = -f Inputs: trust_state - trust state variables dx - (output) Gauss-Newton step Return: success/error */ static int subspace2D_calc_gn(const gsl_multifit_nlinear_trust_state * trust_state, gsl_vector * dx) { int status; const gsl_multifit_nlinear_parameters *params = trust_state->params; /* initialize linear least squares solver */ status = (params->solver->init)(trust_state, trust_state->solver_state); if (status) return status; /* prepare the linear solver to compute Gauss-Newton step */ status = (params->solver->presolve)(0.0, trust_state, trust_state->solver_state); if (status) return status; /* solve: J dx_gn = -f for Gauss-Newton step */ status = (params->solver->solve)(trust_state->f, dx, trust_state, trust_state->solver_state); if (status) return status; return GSL_SUCCESS; } /* subspace2D_calc_sd() Calculate steepest descent step, dx_sd = - || D^{-1} g ||^2 / || J D^{-2} g ||^2 D^{-2} g Inputs: trust_state - trust state variables dx - (output) steepest descent vector state - workspace Return: success/error */ static int subspace2D_calc_sd(const gsl_multifit_nlinear_trust_state * trust_state, gsl_vector * dx, subspace2D_state_t * state) { double norm_Dinvg; /* || D^{-1} g || */ double norm_JDinv2g; /* || J D^{-2} g || */ double alpha; /* || D^{-1} g ||^2 / || J D^{-2} g ||^2 */ double u; /* compute workp = D^{-1} g and its norm */ gsl_vector_memcpy(state->workp, trust_state->g); gsl_vector_div(state->workp, trust_state->diag); norm_Dinvg = gsl_blas_dnrm2(state->workp); /* compute workp = D^{-2} g */ gsl_vector_div(state->workp, trust_state->diag); /* compute: workn = J D^{-2} g */ gsl_blas_dgemv(CblasNoTrans, 1.0, trust_state->J, state->workp, 0.0, state->workn); norm_JDinv2g = gsl_blas_dnrm2(state->workn); u = norm_Dinvg / norm_JDinv2g; alpha = u * u; /* dx_sd = -alpha D^{-2} g */ gsl_vector_memcpy(dx, state->workp); gsl_vector_scale(dx, -alpha); return GSL_SUCCESS; } static const gsl_multifit_nlinear_trs subspace2D_type = { "2D-subspace", subspace2D_alloc, subspace2D_init, subspace2D_preloop, subspace2D_step, subspace2D_preduction, subspace2D_free }; const gsl_multifit_nlinear_trs *gsl_multifit_nlinear_trs_subspace2D = &subspace2D_type;