Blame internal/ceres/schur_eliminator.h

Packit ea1746
// Ceres Solver - A fast non-linear least squares minimizer
Packit ea1746
// Copyright 2015 Google Inc. All rights reserved.
Packit ea1746
// http://ceres-solver.org/
Packit ea1746
//
Packit ea1746
// Redistribution and use in source and binary forms, with or without
Packit ea1746
// modification, are permitted provided that the following conditions are met:
Packit ea1746
//
Packit ea1746
// * Redistributions of source code must retain the above copyright notice,
Packit ea1746
//   this list of conditions and the following disclaimer.
Packit ea1746
// * Redistributions in binary form must reproduce the above copyright notice,
Packit ea1746
//   this list of conditions and the following disclaimer in the documentation
Packit ea1746
//   and/or other materials provided with the distribution.
Packit ea1746
// * Neither the name of Google Inc. nor the names of its contributors may be
Packit ea1746
//   used to endorse or promote products derived from this software without
Packit ea1746
//   specific prior written permission.
Packit ea1746
//
Packit ea1746
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
Packit ea1746
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
Packit ea1746
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
Packit ea1746
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
Packit ea1746
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
Packit ea1746
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
Packit ea1746
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
Packit ea1746
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
Packit ea1746
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
Packit ea1746
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
Packit ea1746
// POSSIBILITY OF SUCH DAMAGE.
Packit ea1746
//
Packit ea1746
// Author: sameeragarwal@google.com (Sameer Agarwal)
Packit ea1746
Packit ea1746
#ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_H_
Packit ea1746
#define CERES_INTERNAL_SCHUR_ELIMINATOR_H_
Packit ea1746
Packit ea1746
#include <map>
Packit ea1746
#include <vector>
Packit ea1746
#include "ceres/mutex.h"
Packit ea1746
#include "ceres/block_random_access_matrix.h"
Packit ea1746
#include "ceres/block_sparse_matrix.h"
Packit ea1746
#include "ceres/block_structure.h"
Packit ea1746
#include "ceres/linear_solver.h"
Packit ea1746
#include "ceres/internal/eigen.h"
Packit ea1746
#include "ceres/internal/scoped_ptr.h"
Packit ea1746
Packit ea1746
namespace ceres {
Packit ea1746
namespace internal {
Packit ea1746
Packit ea1746
// Classes implementing the SchurEliminatorBase interface implement
Packit ea1746
// variable elimination for linear least squares problems. Assuming
Packit ea1746
// that the input linear system Ax = b can be partitioned into
Packit ea1746
//
Packit ea1746
//  E y + F z = b
Packit ea1746
//
Packit ea1746
// Where x = [y;z] is a partition of the variables.  The paritioning
Packit ea1746
// of the variables is such that, E'E is a block diagonal matrix. Or
Packit ea1746
// in other words, the parameter blocks in E form an independent set
Packit ea1746
// of the of the graph implied by the block matrix A'A. Then, this
Packit ea1746
// class provides the functionality to compute the Schur complement
Packit ea1746
// system
Packit ea1746
//
Packit ea1746
//   S z = r
Packit ea1746
//
Packit ea1746
// where
Packit ea1746
//
Packit ea1746
//   S = F'F - F'E (E'E)^{-1} E'F and r = F'b - F'E(E'E)^(-1) E'b
Packit ea1746
//
Packit ea1746
// This is the Eliminate operation, i.e., construct the linear system
Packit ea1746
// obtained by eliminating the variables in E.
Packit ea1746
//
Packit ea1746
// The eliminator also provides the reverse functionality, i.e. given
Packit ea1746
// values for z it can back substitute for the values of y, by solving the
Packit ea1746
// linear system
Packit ea1746
//
Packit ea1746
//  Ey = b - F z
Packit ea1746
//
Packit ea1746
// which is done by observing that
Packit ea1746
//
Packit ea1746
//  y = (E'E)^(-1) [E'b - E'F z]
Packit ea1746
//
Packit ea1746
// The eliminator has a number of requirements.
Packit ea1746
//
Packit ea1746
// The rows of A are ordered so that for every variable block in y,
Packit ea1746
// all the rows containing that variable block occur as a vertically
Packit ea1746
// contiguous block. i.e the matrix A looks like
Packit ea1746
//
Packit ea1746
//              E                 F                   chunk
Packit ea1746
//  A = [ y1   0   0   0 |  z1    0    0   0    z5]     1
Packit ea1746
//      [ y1   0   0   0 |  z1   z2    0   0     0]     1
Packit ea1746
//      [  0  y2   0   0 |   0    0   z3   0     0]     2
Packit ea1746
//      [  0   0  y3   0 |  z1   z2   z3  z4    z5]     3
Packit ea1746
//      [  0   0  y3   0 |  z1    0    0   0    z5]     3
Packit ea1746
//      [  0   0   0  y4 |   0    0    0   0    z5]     4
Packit ea1746
//      [  0   0   0  y4 |   0   z2    0   0     0]     4
Packit ea1746
//      [  0   0   0  y4 |   0    0    0   0     0]     4
Packit ea1746
//      [  0   0   0   0 |  z1    0    0   0     0] non chunk blocks
Packit ea1746
//      [  0   0   0   0 |   0    0   z3  z4    z5] non chunk blocks
Packit ea1746
//
Packit ea1746
// This structure should be reflected in the corresponding
Packit ea1746
// CompressedRowBlockStructure object associated with A. The linear
Packit ea1746
// system Ax = b should either be well posed or the array D below
Packit ea1746
// should be non-null and the diagonal matrix corresponding to it
Packit ea1746
// should be non-singular. For simplicity of exposition only the case
Packit ea1746
// with a null D is described.
Packit ea1746
//
Packit ea1746
// The usual way to do the elimination is as follows. Starting with
Packit ea1746
//
Packit ea1746
//  E y + F z = b
Packit ea1746
//
Packit ea1746
// we can form the normal equations,
Packit ea1746
//
Packit ea1746
//  E'E y + E'F z = E'b
Packit ea1746
//  F'E y + F'F z = F'b
Packit ea1746
//
Packit ea1746
// multiplying both sides of the first equation by (E'E)^(-1) and then
Packit ea1746
// by F'E we get
Packit ea1746
//
Packit ea1746
//  F'E y + F'E (E'E)^(-1) E'F z =  F'E (E'E)^(-1) E'b
Packit ea1746
//  F'E y +                F'F z =  F'b
Packit ea1746
//
Packit ea1746
// now subtracting the two equations we get
Packit ea1746
//
Packit ea1746
// [FF' - F'E (E'E)^(-1) E'F] z = F'b - F'E(E'E)^(-1) E'b
Packit ea1746
//
Packit ea1746
// Instead of forming the normal equations and operating on them as
Packit ea1746
// general sparse matrices, the algorithm here deals with one
Packit ea1746
// parameter block in y at a time. The rows corresponding to a single
Packit ea1746
// parameter block yi are known as a chunk, and the algorithm operates
Packit ea1746
// on one chunk at a time. The mathematics remains the same since the
Packit ea1746
// reduced linear system can be shown to be the sum of the reduced
Packit ea1746
// linear systems for each chunk. This can be seen by observing two
Packit ea1746
// things.
Packit ea1746
//
Packit ea1746
//  1. E'E is a block diagonal matrix.
Packit ea1746
//
Packit ea1746
//  2. When E'F is computed, only the terms within a single chunk
Packit ea1746
//  interact, i.e for y1 column blocks when transposed and multiplied
Packit ea1746
//  with F, the only non-zero contribution comes from the blocks in
Packit ea1746
//  chunk1.
Packit ea1746
//
Packit ea1746
// Thus, the reduced linear system
Packit ea1746
//
Packit ea1746
//  FF' - F'E (E'E)^(-1) E'F
Packit ea1746
//
Packit ea1746
// can be re-written as
Packit ea1746
//
Packit ea1746
//  sum_k F_k F_k' - F_k'E_k (E_k'E_k)^(-1) E_k' F_k
Packit ea1746
//
Packit ea1746
// Where the sum is over chunks and E_k'E_k is dense matrix of size y1
Packit ea1746
// x y1.
Packit ea1746
//
Packit ea1746
// Advanced usage. Uptil now it has been assumed that the user would
Packit ea1746
// be interested in all of the Schur Complement S. However, it is also
Packit ea1746
// possible to use this eliminator to obtain an arbitrary submatrix of
Packit ea1746
// the full Schur complement. When the eliminator is generating the
Packit ea1746
// blocks of S, it asks the RandomAccessBlockMatrix instance passed to
Packit ea1746
// it if it has storage for that block. If it does, the eliminator
Packit ea1746
// computes/updates it, if not it is skipped. This is useful when one
Packit ea1746
// is interested in constructing a preconditioner based on the Schur
Packit ea1746
// Complement, e.g., computing the block diagonal of S so that it can
Packit ea1746
// be used as a preconditioner for an Iterative Substructuring based
Packit ea1746
// solver [See Agarwal et al, Bundle Adjustment in the Large, ECCV
Packit ea1746
// 2008 for an example of such use].
Packit ea1746
//
Packit ea1746
// Example usage: Please see schur_complement_solver.cc
Packit ea1746
class SchurEliminatorBase {
Packit ea1746
 public:
Packit ea1746
  virtual ~SchurEliminatorBase() {}
Packit ea1746
Packit ea1746
  // Initialize the eliminator. It is the user's responsibilty to call
Packit ea1746
  // this function before calling Eliminate or BackSubstitute. It is
Packit ea1746
  // also the caller's responsibilty to ensure that the
Packit ea1746
  // CompressedRowBlockStructure object passed to this method is the
Packit ea1746
  // same one (or is equivalent to) the one associated with the
Packit ea1746
  // BlockSparseMatrix objects below.
Packit ea1746
  //
Packit ea1746
  // assume_full_rank_ete controls how the eliminator inverts with the
Packit ea1746
  // diagonal blocks corresponding to e blocks in A'A. If
Packit ea1746
  // assume_full_rank_ete is true, then a Cholesky factorization is
Packit ea1746
  // used to compute the inverse, otherwise a singular value
Packit ea1746
  // decomposition is used to compute the pseudo inverse.
Packit ea1746
  virtual void Init(int num_eliminate_blocks,
Packit ea1746
                    bool assume_full_rank_ete,
Packit ea1746
                    const CompressedRowBlockStructure* bs) = 0;
Packit ea1746
Packit ea1746
  // Compute the Schur complement system from the augmented linear
Packit ea1746
  // least squares problem [A;D] x = [b;0]. The left hand side and the
Packit ea1746
  // right hand side of the reduced linear system are returned in lhs
Packit ea1746
  // and rhs respectively.
Packit ea1746
  //
Packit ea1746
  // It is the caller's responsibility to construct and initialize
Packit ea1746
  // lhs. Depending upon the structure of the lhs object passed here,
Packit ea1746
  // the full or a submatrix of the Schur complement will be computed.
Packit ea1746
  //
Packit ea1746
  // Since the Schur complement is a symmetric matrix, only the upper
Packit ea1746
  // triangular part of the Schur complement is computed.
Packit ea1746
  virtual void Eliminate(const BlockSparseMatrix* A,
Packit ea1746
                         const double* b,
Packit ea1746
                         const double* D,
Packit ea1746
                         BlockRandomAccessMatrix* lhs,
Packit ea1746
                         double* rhs) = 0;
Packit ea1746
Packit ea1746
  // Given values for the variables z in the F block of A, solve for
Packit ea1746
  // the optimal values of the variables y corresponding to the E
Packit ea1746
  // block in A.
Packit ea1746
  virtual void BackSubstitute(const BlockSparseMatrix* A,
Packit ea1746
                              const double* b,
Packit ea1746
                              const double* D,
Packit ea1746
                              const double* z,
Packit ea1746
                              double* y) = 0;
Packit ea1746
  // Factory
Packit ea1746
  static SchurEliminatorBase* Create(const LinearSolver::Options& options);
Packit ea1746
};
Packit ea1746
Packit ea1746
// Templated implementation of the SchurEliminatorBase interface. The
Packit ea1746
// templating is on the sizes of the row, e and f blocks sizes in the
Packit ea1746
// input matrix. In many problems, the sizes of one or more of these
Packit ea1746
// blocks are constant, in that case, its worth passing these
Packit ea1746
// parameters as template arguments so that they are visible to the
Packit ea1746
// compiler and can be used for compile time optimization of the low
Packit ea1746
// level linear algebra routines.
Packit ea1746
//
Packit ea1746
// This implementation is mulithreaded using OpenMP. The level of
Packit ea1746
// parallelism is controlled by LinearSolver::Options::num_threads.
Packit ea1746
template 
Packit ea1746
          int kEBlockSize = Eigen::Dynamic,
Packit ea1746
          int kFBlockSize = Eigen::Dynamic >
Packit ea1746
class SchurEliminator : public SchurEliminatorBase {
Packit ea1746
 public:
Packit ea1746
  explicit SchurEliminator(const LinearSolver::Options& options)
Packit ea1746
      : num_threads_(options.num_threads) {
Packit ea1746
  }
Packit ea1746
Packit ea1746
  // SchurEliminatorBase Interface
Packit ea1746
  virtual ~SchurEliminator();
Packit ea1746
  virtual void Init(int num_eliminate_blocks,
Packit ea1746
                    bool assume_full_rank_ete,
Packit ea1746
                    const CompressedRowBlockStructure* bs);
Packit ea1746
  virtual void Eliminate(const BlockSparseMatrix* A,
Packit ea1746
                         const double* b,
Packit ea1746
                         const double* D,
Packit ea1746
                         BlockRandomAccessMatrix* lhs,
Packit ea1746
                         double* rhs);
Packit ea1746
  virtual void BackSubstitute(const BlockSparseMatrix* A,
Packit ea1746
                              const double* b,
Packit ea1746
                              const double* D,
Packit ea1746
                              const double* z,
Packit ea1746
                              double* y);
Packit ea1746
Packit ea1746
 private:
Packit ea1746
  // Chunk objects store combinatorial information needed to
Packit ea1746
  // efficiently eliminate a whole chunk out of the least squares
Packit ea1746
  // problem. Consider the first chunk in the example matrix above.
Packit ea1746
  //
Packit ea1746
  //      [ y1   0   0   0 |  z1    0    0   0    z5]
Packit ea1746
  //      [ y1   0   0   0 |  z1   z2    0   0     0]
Packit ea1746
  //
Packit ea1746
  // One of the intermediate quantities that needs to be calculated is
Packit ea1746
  // for each row the product of the y block transposed with the
Packit ea1746
  // non-zero z block, and the sum of these blocks across rows. A
Packit ea1746
  // temporary array "buffer_" is used for computing and storing them
Packit ea1746
  // and the buffer_layout maps the indices of the z-blocks to
Packit ea1746
  // position in the buffer_ array.  The size of the chunk is the
Packit ea1746
  // number of row blocks/residual blocks for the particular y block
Packit ea1746
  // being considered.
Packit ea1746
  //
Packit ea1746
  // For the example chunk shown above,
Packit ea1746
  //
Packit ea1746
  // size = 2
Packit ea1746
  //
Packit ea1746
  // The entries of buffer_layout will be filled in the following order.
Packit ea1746
  //
Packit ea1746
  // buffer_layout[z1] = 0
Packit ea1746
  // buffer_layout[z5] = y1 * z1
Packit ea1746
  // buffer_layout[z2] = y1 * z1 + y1 * z5
Packit ea1746
  typedef std::map<int, int> BufferLayoutType;
Packit ea1746
  struct Chunk {
Packit ea1746
    Chunk() : size(0) {}
Packit ea1746
    int size;
Packit ea1746
    int start;
Packit ea1746
    BufferLayoutType buffer_layout;
Packit ea1746
  };
Packit ea1746
Packit ea1746
  void ChunkDiagonalBlockAndGradient(
Packit ea1746
      const Chunk& chunk,
Packit ea1746
      const BlockSparseMatrix* A,
Packit ea1746
      const double* b,
Packit ea1746
      int row_block_counter,
Packit ea1746
      typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* eet,
Packit ea1746
      double* g,
Packit ea1746
      double* buffer,
Packit ea1746
      BlockRandomAccessMatrix* lhs);
Packit ea1746
Packit ea1746
  void UpdateRhs(const Chunk& chunk,
Packit ea1746
                 const BlockSparseMatrix* A,
Packit ea1746
                 const double* b,
Packit ea1746
                 int row_block_counter,
Packit ea1746
                 const double* inverse_ete_g,
Packit ea1746
                 double* rhs);
Packit ea1746
Packit ea1746
  void ChunkOuterProduct(const CompressedRowBlockStructure* bs,
Packit ea1746
                         const Matrix& inverse_eet,
Packit ea1746
                         const double* buffer,
Packit ea1746
                         const BufferLayoutType& buffer_layout,
Packit ea1746
                         BlockRandomAccessMatrix* lhs);
Packit ea1746
  void EBlockRowOuterProduct(const BlockSparseMatrix* A,
Packit ea1746
                             int row_block_index,
Packit ea1746
                             BlockRandomAccessMatrix* lhs);
Packit ea1746
Packit ea1746
Packit ea1746
  void NoEBlockRowsUpdate(const BlockSparseMatrix* A,
Packit ea1746
                             const double* b,
Packit ea1746
                             int row_block_counter,
Packit ea1746
                             BlockRandomAccessMatrix* lhs,
Packit ea1746
                             double* rhs);
Packit ea1746
Packit ea1746
  void NoEBlockRowOuterProduct(const BlockSparseMatrix* A,
Packit ea1746
                               int row_block_index,
Packit ea1746
                               BlockRandomAccessMatrix* lhs);
Packit ea1746
Packit ea1746
  int num_threads_;
Packit ea1746
  int num_eliminate_blocks_;
Packit ea1746
  bool assume_full_rank_ete_;
Packit ea1746
Packit ea1746
  // Block layout of the columns of the reduced linear system. Since
Packit ea1746
  // the f blocks can be of varying size, this vector stores the
Packit ea1746
  // position of each f block in the row/col of the reduced linear
Packit ea1746
  // system. Thus lhs_row_layout_[i] is the row/col position of the
Packit ea1746
  // i^th f block.
Packit ea1746
  std::vector<int> lhs_row_layout_;
Packit ea1746
Packit ea1746
  // Combinatorial structure of the chunks in A. For more information
Packit ea1746
  // see the documentation of the Chunk object above.
Packit ea1746
  std::vector<Chunk> chunks_;
Packit ea1746
Packit ea1746
  // TODO(sameeragarwal): The following two arrays contain per-thread
Packit ea1746
  // storage. They should be refactored into a per thread struct.
Packit ea1746
Packit ea1746
  // Buffer to store the products of the y and z blocks generated
Packit ea1746
  // during the elimination phase. buffer_ is of size num_threads *
Packit ea1746
  // buffer_size_. Each thread accesses the chunk
Packit ea1746
  //
Packit ea1746
  //   [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_]
Packit ea1746
  //
Packit ea1746
  scoped_array<double> buffer_;
Packit ea1746
Packit ea1746
  // Buffer to store per thread matrix matrix products used by
Packit ea1746
  // ChunkOuterProduct. Like buffer_ it is of size num_threads *
Packit ea1746
  // buffer_size_. Each thread accesses the chunk
Packit ea1746
  //
Packit ea1746
  //   [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_ -1]
Packit ea1746
  //
Packit ea1746
  scoped_array<double> chunk_outer_product_buffer_;
Packit ea1746
Packit ea1746
  int buffer_size_;
Packit ea1746
  int uneliminated_row_begins_;
Packit ea1746
Packit ea1746
  // Locks for the blocks in the right hand side of the reduced linear
Packit ea1746
  // system.
Packit ea1746
  std::vector<Mutex*> rhs_locks_;
Packit ea1746
};
Packit ea1746
Packit ea1746
}  // namespace internal
Packit ea1746
}  // namespace ceres
Packit ea1746
Packit ea1746
#endif  // CERES_INTERNAL_SCHUR_ELIMINATOR_H_