/* linalg/tridiag.c
*
* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2007 Gerard Jungman, Brian Gough, David Necas
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
/* Author: G. Jungman */
#include <config.h>
#include <stdlib.h>
#include <math.h>
#include <gsl/gsl_errno.h>
#include "tridiag.h"
#include <gsl/gsl_linalg.h>
/* for description of method see [Engeln-Mullges + Uhlig, p. 92]
*
* diag[0] offdiag[0] 0 .....
* offdiag[0] diag[1] offdiag[1] .....
* 0 offdiag[1] diag[2]
* 0 0 offdiag[2] .....
*/
static
int
solve_tridiag(
const double diag[], size_t d_stride,
const double offdiag[], size_t o_stride,
const double b[], size_t b_stride,
double x[], size_t x_stride,
size_t N)
{
int status = GSL_SUCCESS;
double *gamma = (double *) malloc (N * sizeof (double));
double *alpha = (double *) malloc (N * sizeof (double));
double *c = (double *) malloc (N * sizeof (double));
double *z = (double *) malloc (N * sizeof (double));
if (gamma == 0 || alpha == 0 || c == 0 || z == 0)
{
GSL_ERROR("failed to allocate working space", GSL_ENOMEM);
}
else
{
size_t i, j;
/* Cholesky decomposition
A = L.D.L^t
lower_diag(L) = gamma
diag(D) = alpha
*/
alpha[0] = diag[0];
gamma[0] = offdiag[0] / alpha[0];
if (alpha[0] == 0) {
status = GSL_EZERODIV;
}
for (i = 1; i < N - 1; i++)
{
alpha[i] = diag[d_stride * i] - offdiag[o_stride*(i - 1)] * gamma[i - 1];
gamma[i] = offdiag[o_stride * i] / alpha[i];
if (alpha[i] == 0) {
status = GSL_EZERODIV;
}
}
if (N > 1)
{
alpha[N - 1] = diag[d_stride * (N - 1)] - offdiag[o_stride*(N - 2)] * gamma[N - 2];
}
/* update RHS */
z[0] = b[0];
for (i = 1; i < N; i++)
{
z[i] = b[b_stride * i] - gamma[i - 1] * z[i - 1];
}
for (i = 0; i < N; i++)
{
c[i] = z[i] / alpha[i];
}
/* backsubstitution */
x[x_stride * (N - 1)] = c[N - 1];
if (N >= 2)
{
for (i = N - 2, j = 0; j <= N - 2; j++, i--)
{
x[x_stride * i] = c[i] - gamma[i] * x[x_stride * (i + 1)];
}
}
}
if (z != 0)
free (z);
if (c != 0)
free (c);
if (alpha != 0)
free (alpha);
if (gamma != 0)
free (gamma);
if (status == GSL_EZERODIV) {
GSL_ERROR ("matrix must be positive definite", status);
}
return status;
}
/* plain gauss elimination, only not bothering with the zeroes
*
* diag[0] abovediag[0] 0 .....
* belowdiag[0] diag[1] abovediag[1] .....
* 0 belowdiag[1] diag[2]
* 0 0 belowdiag[2] .....
*/
static
int
solve_tridiag_nonsym(
const double diag[], size_t d_stride,
const double abovediag[], size_t a_stride,
const double belowdiag[], size_t b_stride,
const double rhs[], size_t r_stride,
double x[], size_t x_stride,
size_t N)
{
int status = GSL_SUCCESS;
double *alpha = (double *) malloc (N * sizeof (double));
double *z = (double *) malloc (N * sizeof (double));
if (alpha == 0 || z == 0)
{
GSL_ERROR("failed to allocate working space", GSL_ENOMEM);
}
else
{
size_t i, j;
/* Bidiagonalization (eliminating belowdiag)
& rhs update
diag' = alpha
rhs' = z
*/
alpha[0] = diag[0];
z[0] = rhs[0];
if (alpha[0] == 0) {
status = GSL_EZERODIV;
}
for (i = 1; i < N; i++)
{
const double t = belowdiag[b_stride*(i - 1)]/alpha[i-1];
alpha[i] = diag[d_stride*i] - t*abovediag[a_stride*(i - 1)];
z[i] = rhs[r_stride*i] - t*z[i-1];
if (alpha[i] == 0) {
status = GSL_EZERODIV;
}
}
/* backsubstitution */
x[x_stride * (N - 1)] = z[N - 1]/alpha[N - 1];
if (N >= 2)
{
for (i = N - 2, j = 0; j <= N - 2; j++, i--)
{
x[x_stride * i] = (z[i] - abovediag[a_stride*i] * x[x_stride * (i + 1)])/alpha[i];
}
}
}
if (z != 0)
free (z);
if (alpha != 0)
free (alpha);
if (status == GSL_EZERODIV) {
GSL_ERROR ("matrix must be positive definite", status);
}
return status;
}
/* for description of method see [Engeln-Mullges + Uhlig, p. 96]
*
* diag[0] offdiag[0] 0 ..... offdiag[N-1]
* offdiag[0] diag[1] offdiag[1] .....
* 0 offdiag[1] diag[2]
* 0 0 offdiag[2] .....
* ... ...
* offdiag[N-1] ...
*
*/
static
int
solve_cyc_tridiag(
const double diag[], size_t d_stride,
const double offdiag[], size_t o_stride,
const double b[], size_t b_stride,
double x[], size_t x_stride,
size_t N)
{
int status = GSL_SUCCESS;
double * delta = (double *) malloc (N * sizeof (double));
double * gamma = (double *) malloc (N * sizeof (double));
double * alpha = (double *) malloc (N * sizeof (double));
double * c = (double *) malloc (N * sizeof (double));
double * z = (double *) malloc (N * sizeof (double));
if (delta == 0 || gamma == 0 || alpha == 0 || c == 0 || z == 0)
{
GSL_ERROR("failed to allocate working space", GSL_ENOMEM);
}
else
{
size_t i, j;
double sum = 0.0;
/* factor */
if (N == 1)
{
x[0] = b[0] / diag[0];
free(delta);
free(gamma);
free(alpha);
free(c);
free(z);
return GSL_SUCCESS;
}
alpha[0] = diag[0];
gamma[0] = offdiag[0] / alpha[0];
delta[0] = offdiag[o_stride * (N-1)] / alpha[0];
if (alpha[0] == 0) {
status = GSL_EZERODIV;
}
for (i = 1; i < N - 2; i++)
{
alpha[i] = diag[d_stride * i] - offdiag[o_stride * (i-1)] * gamma[i - 1];
gamma[i] = offdiag[o_stride * i] / alpha[i];
delta[i] = -delta[i - 1] * offdiag[o_stride * (i-1)] / alpha[i];
if (alpha[i] == 0) {
status = GSL_EZERODIV;
}
}
for (i = 0; i < N - 2; i++)
{
sum += alpha[i] * delta[i] * delta[i];
}
alpha[N - 2] = diag[d_stride * (N - 2)] - offdiag[o_stride * (N - 3)] * gamma[N - 3];
gamma[N - 2] = (offdiag[o_stride * (N - 2)] - offdiag[o_stride * (N - 3)] * delta[N - 3]) / alpha[N - 2];
alpha[N - 1] = diag[d_stride * (N - 1)] - sum - alpha[(N - 2)] * gamma[N - 2] * gamma[N - 2];
/* update */
z[0] = b[0];
for (i = 1; i < N - 1; i++)
{
z[i] = b[b_stride * i] - z[i - 1] * gamma[i - 1];
}
sum = 0.0;
for (i = 0; i < N - 2; i++)
{
sum += delta[i] * z[i];
}
z[N - 1] = b[b_stride * (N - 1)] - sum - gamma[N - 2] * z[N - 2];
for (i = 0; i < N; i++)
{
c[i] = z[i] / alpha[i];
}
/* backsubstitution */
x[x_stride * (N - 1)] = c[N - 1];
x[x_stride * (N - 2)] = c[N - 2] - gamma[N - 2] * x[x_stride * (N - 1)];
if (N >= 3)
{
for (i = N - 3, j = 0; j <= N - 3; j++, i--)
{
x[x_stride * i] = c[i] - gamma[i] * x[x_stride * (i + 1)] - delta[i] * x[x_stride * (N - 1)];
}
}
}
if (z != 0)
free (z);
if (c != 0)
free (c);
if (alpha != 0)
free (alpha);
if (gamma != 0)
free (gamma);
if (delta != 0)
free (delta);
if (status == GSL_EZERODIV) {
GSL_ERROR ("matrix must be positive definite", status);
}
return status;
}
/* solve following system w/o the corner elements and then use
* Sherman-Morrison formula to compensate for them
*
* diag[0] abovediag[0] 0 ..... belowdiag[N-1]
* belowdiag[0] diag[1] abovediag[1] .....
* 0 belowdiag[1] diag[2]
* 0 0 belowdiag[2] .....
* ... ...
* abovediag[N-1] ...
*/
static
int solve_cyc_tridiag_nonsym(
const double diag[], size_t d_stride,
const double abovediag[], size_t a_stride,
const double belowdiag[], size_t b_stride,
const double rhs[], size_t r_stride,
double x[], size_t x_stride,
size_t N)
{
int status = GSL_SUCCESS;
double *alpha = (double *) malloc (N * sizeof (double));
double *zb = (double *) malloc (N * sizeof (double));
double *zu = (double *) malloc (N * sizeof (double));
double *w = (double *) malloc (N * sizeof (double));
if (alpha == 0 || zb == 0 || zu == 0 || w == 0)
{
GSL_ERROR("failed to allocate working space", GSL_ENOMEM);
}
else
{
double beta;
/* Bidiagonalization (eliminating belowdiag)
& rhs update
diag' = alpha
rhs' = zb
rhs' for Aq=u is zu
*/
zb[0] = rhs[0];
if (diag[0] != 0) beta = -diag[0]; else beta = 1;
{
const double q = 1 - abovediag[0]*belowdiag[0]/(diag[0]*diag[d_stride]);
if (fabs(q/beta) > 0.5 && fabs(q/beta) < 2) {
beta *= (fabs(q/beta) < 1) ? 0.5 : 2;
}
}
zu[0] = beta;
alpha[0] = diag[0] - beta;
if (alpha[0] == 0) {
status = GSL_EZERODIV;
}
{
size_t i;
for (i = 1; i+1 < N; i++)
{
const double t = belowdiag[b_stride*(i - 1)]/alpha[i-1];
alpha[i] = diag[d_stride*i] - t*abovediag[a_stride*(i - 1)];
zb[i] = rhs[r_stride*i] - t*zb[i-1];
zu[i] = -t*zu[i-1];
/* FIXME!!! */
if (alpha[i] == 0) {
status = GSL_EZERODIV;
}
}
}
{
const size_t i = N-1;
const double t = belowdiag[b_stride*(i - 1)]/alpha[i-1];
alpha[i] = diag[d_stride*i]
- abovediag[a_stride*i]*belowdiag[b_stride*i]/beta
- t*abovediag[a_stride*(i - 1)];
zb[i] = rhs[r_stride*i] - t*zb[i-1];
zu[i] = abovediag[a_stride*i] - t*zu[i-1];
/* FIXME!!! */
if (alpha[i] == 0) {
status = GSL_EZERODIV;
}
}
/* backsubstitution */
{
size_t i, j;
w[N-1] = zu[N-1]/alpha[N-1];
x[x_stride*(N-1)] = zb[N-1]/alpha[N-1];
for (i = N - 2, j = 0; j <= N - 2; j++, i--)
{
w[i] = (zu[i] - abovediag[a_stride*i] * w[i+1])/alpha[i];
x[i*x_stride] = (zb[i] - abovediag[a_stride*i] * x[x_stride*(i + 1)])/alpha[i];
}
}
/* Sherman-Morrison */
{
const double vw = w[0] + belowdiag[b_stride*(N - 1)]/beta * w[N-1];
const double vx = x[0] + belowdiag[b_stride*(N - 1)]/beta * x[x_stride*(N - 1)];
/* FIXME!!! */
if (vw + 1 == 0) {
status = GSL_EZERODIV;
}
{
size_t i;
for (i = 0; i < N; i++)
x[i*x_stride] -= vx/(1 + vw)*w[i];
}
}
}
if (zb != 0)
free (zb);
if (zu != 0)
free (zu);
if (w != 0)
free (w);
if (alpha != 0)
free (alpha);
if (status == GSL_EZERODIV) {
GSL_ERROR ("matrix must be positive definite", status);
}
return status;
}
int
gsl_linalg_solve_symm_tridiag(
const gsl_vector * diag,
const gsl_vector * offdiag,
const gsl_vector * rhs,
gsl_vector * solution)
{
if(diag->size != rhs->size)
{
GSL_ERROR ("size of diag must match rhs", GSL_EBADLEN);
}
else if (offdiag->size != rhs->size-1)
{
GSL_ERROR ("size of offdiag must match rhs-1", GSL_EBADLEN);
}
else if (solution->size != rhs->size)
{
GSL_ERROR ("size of solution must match rhs", GSL_EBADLEN);
}
else
{
return solve_tridiag(diag->data, diag->stride,
offdiag->data, offdiag->stride,
rhs->data, rhs->stride,
solution->data, solution->stride,
diag->size);
}
}
int
gsl_linalg_solve_tridiag(
const gsl_vector * diag,
const gsl_vector * abovediag,
const gsl_vector * belowdiag,
const gsl_vector * rhs,
gsl_vector * solution)
{
if(diag->size != rhs->size)
{
GSL_ERROR ("size of diag must match rhs", GSL_EBADLEN);
}
else if (abovediag->size != rhs->size-1)
{
GSL_ERROR ("size of abovediag must match rhs-1", GSL_EBADLEN);
}
else if (belowdiag->size != rhs->size-1)
{
GSL_ERROR ("size of belowdiag must match rhs-1", GSL_EBADLEN);
}
else if (solution->size != rhs->size)
{
GSL_ERROR ("size of solution must match rhs", GSL_EBADLEN);
}
else
{
return solve_tridiag_nonsym(diag->data, diag->stride,
abovediag->data, abovediag->stride,
belowdiag->data, belowdiag->stride,
rhs->data, rhs->stride,
solution->data, solution->stride,
diag->size);
}
}
int
gsl_linalg_solve_symm_cyc_tridiag(
const gsl_vector * diag,
const gsl_vector * offdiag,
const gsl_vector * rhs,
gsl_vector * solution)
{
if(diag->size != rhs->size)
{
GSL_ERROR ("size of diag must match rhs", GSL_EBADLEN);
}
else if (offdiag->size != rhs->size)
{
GSL_ERROR ("size of offdiag must match rhs", GSL_EBADLEN);
}
else if (solution->size != rhs->size)
{
GSL_ERROR ("size of solution must match rhs", GSL_EBADLEN);
}
else if (diag->size < 3)
{
GSL_ERROR ("size of cyclic system must be 3 or more", GSL_EBADLEN);
}
else
{
return solve_cyc_tridiag(diag->data, diag->stride,
offdiag->data, offdiag->stride,
rhs->data, rhs->stride,
solution->data, solution->stride,
diag->size);
}
}
int
gsl_linalg_solve_cyc_tridiag(
const gsl_vector * diag,
const gsl_vector * abovediag,
const gsl_vector * belowdiag,
const gsl_vector * rhs,
gsl_vector * solution)
{
if(diag->size != rhs->size)
{
GSL_ERROR ("size of diag must match rhs", GSL_EBADLEN);
}
else if (abovediag->size != rhs->size)
{
GSL_ERROR ("size of abovediag must match rhs", GSL_EBADLEN);
}
else if (belowdiag->size != rhs->size)
{
GSL_ERROR ("size of belowdiag must match rhs", GSL_EBADLEN);
}
else if (solution->size != rhs->size)
{
GSL_ERROR ("size of solution must match rhs", GSL_EBADLEN);
}
else if (diag->size < 3)
{
GSL_ERROR ("size of cyclic system must be 3 or more", GSL_EBADLEN);
}
else
{
return solve_cyc_tridiag_nonsym(diag->data, diag->stride,
abovediag->data, abovediag->stride,
belowdiag->data, belowdiag->stride,
rhs->data, rhs->stride,
solution->data, solution->stride,
diag->size);
}
}