|
Packit |
ea1746 |
// Ceres Solver - A fast non-linear least squares minimizer
|
|
Packit |
ea1746 |
// Copyright 2017 Google Inc. All rights reserved.
|
|
Packit |
ea1746 |
// http://ceres-solver.org/
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// Redistribution and use in source and binary forms, with or without
|
|
Packit |
ea1746 |
// modification, are permitted provided that the following conditions are met:
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// * Redistributions of source code must retain the above copyright notice,
|
|
Packit |
ea1746 |
// this list of conditions and the following disclaimer.
|
|
Packit |
ea1746 |
// * Redistributions in binary form must reproduce the above copyright notice,
|
|
Packit |
ea1746 |
// this list of conditions and the following disclaimer in the documentation
|
|
Packit |
ea1746 |
// and/or other materials provided with the distribution.
|
|
Packit |
ea1746 |
// * Neither the name of Google Inc. nor the names of its contributors may be
|
|
Packit |
ea1746 |
// used to endorse or promote products derived from this software without
|
|
Packit |
ea1746 |
// specific prior written permission.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
Packit |
ea1746 |
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
Packit |
ea1746 |
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
Packit |
ea1746 |
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
Packit |
ea1746 |
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
Packit |
ea1746 |
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
Packit |
ea1746 |
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
Packit |
ea1746 |
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
Packit |
ea1746 |
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
Packit |
ea1746 |
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
Packit |
ea1746 |
// POSSIBILITY OF SUCH DAMAGE.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// Author: sameeragarwal@google.com (Sameer Agarwal)
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
#include "ceres/inner_product_computer.h"
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
#include <algorithm>
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
namespace ceres {
|
|
Packit |
ea1746 |
namespace internal {
|
|
Packit |
ea1746 |
namespace {
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// Compute the product (in MATLAB notation)
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// c(0:a_cols, 0:b_cols) = a' * b
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// Where:
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// a is ab_rows x a_cols
|
|
Packit |
ea1746 |
// b is ab_rows x b_cols
|
|
Packit |
ea1746 |
// c is a_cos x c_col_stride
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// a, b and c are row-major matrices.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// Performance note:
|
|
Packit |
ea1746 |
// ----------------
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// Technically this function is a repeat of a similarly named function
|
|
Packit |
ea1746 |
// in small_blas.h but its performance is considerably better than
|
|
Packit |
ea1746 |
// that of the version there due to the way it accesses memory.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// TODO(sameeragarwal): Measure and tune the performance of
|
|
Packit |
ea1746 |
// small_blas.h based on the insights gained here.
|
|
Packit |
ea1746 |
EIGEN_STRONG_INLINE void MatrixTransposeMatrixMultiply(const int ab_rows,
|
|
Packit |
ea1746 |
const double* a,
|
|
Packit |
ea1746 |
const int a_cols,
|
|
Packit |
ea1746 |
const double* b,
|
|
Packit |
ea1746 |
const int b_cols,
|
|
Packit |
ea1746 |
double* c,
|
|
Packit |
ea1746 |
int c_cols) {
|
|
Packit |
ea1746 |
// Compute c as the sum of ab_rows, rank 1 outer products of the
|
|
Packit |
ea1746 |
// corresponding rows of a and b.
|
|
Packit |
ea1746 |
for (int r = 0; r < ab_rows; ++r) {
|
|
Packit |
ea1746 |
double* c_r = c;
|
|
Packit |
ea1746 |
for (int i1 = 0; i1 < a_cols; ++i1) {
|
|
Packit |
ea1746 |
const double a_v = a[i1];
|
|
Packit |
ea1746 |
for (int i2 = 0; i2 < b_cols; ++i2) {
|
|
Packit |
ea1746 |
c_r[i2] += a_v * b[i2];
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
c_r += c_cols;
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
a += a_cols;
|
|
Packit |
ea1746 |
b += b_cols;
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
} // namespace
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// Create the CompressedRowSparseMatrix matrix that will contain the
|
|
Packit |
ea1746 |
// inner product.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// storage_type controls whether the result matrix contains the upper
|
|
Packit |
ea1746 |
// or the lower triangular part of the product.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// num_nonzeros is the number of non-zeros in the result matrix.
|
|
Packit |
ea1746 |
CompressedRowSparseMatrix* InnerProductComputer::CreateResultMatrix(
|
|
Packit |
ea1746 |
const CompressedRowSparseMatrix::StorageType storage_type,
|
|
Packit |
ea1746 |
const int num_nonzeros) {
|
|
Packit |
ea1746 |
CompressedRowSparseMatrix* matrix =
|
|
Packit |
ea1746 |
new CompressedRowSparseMatrix(m_.num_cols(), m_.num_cols(), num_nonzeros);
|
|
Packit |
ea1746 |
matrix->set_storage_type(storage_type);
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
const CompressedRowBlockStructure* bs = m_.block_structure();
|
|
Packit |
ea1746 |
const std::vector<Block>& blocks = bs->cols;
|
|
Packit |
ea1746 |
matrix->mutable_row_blocks()->resize(blocks.size());
|
|
Packit |
ea1746 |
matrix->mutable_col_blocks()->resize(blocks.size());
|
|
Packit |
ea1746 |
for (int i = 0; i < blocks.size(); ++i) {
|
|
Packit |
ea1746 |
(*(matrix->mutable_row_blocks()))[i] = blocks[i].size;
|
|
Packit |
ea1746 |
(*(matrix->mutable_col_blocks()))[i] = blocks[i].size;
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
return matrix;
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// Given the set of product terms in the inner product, return the
|
|
Packit |
ea1746 |
// total number of non-zeros in the result and for each row block of
|
|
Packit |
ea1746 |
// the result matrix, compute the number of non-zeros in any one row
|
|
Packit |
ea1746 |
// of the row block.
|
|
Packit |
ea1746 |
int InnerProductComputer::ComputeNonzeros(
|
|
Packit |
ea1746 |
const std::vector<InnerProductComputer::ProductTerm>& product_terms,
|
|
Packit |
ea1746 |
std::vector<int>* row_nnz) {
|
|
Packit |
ea1746 |
const CompressedRowBlockStructure* bs = m_.block_structure();
|
|
Packit |
ea1746 |
const std::vector<Block>& blocks = bs->cols;
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
row_nnz->resize(blocks.size());
|
|
Packit |
ea1746 |
std::fill(row_nnz->begin(), row_nnz->end(), 0);
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// First product term.
|
|
Packit |
ea1746 |
(*row_nnz)[product_terms[0].row] = blocks[product_terms[0].col].size;
|
|
Packit |
ea1746 |
int num_nonzeros =
|
|
Packit |
ea1746 |
blocks[product_terms[0].row].size * blocks[product_terms[0].col].size;
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// Remaining product terms.
|
|
Packit |
ea1746 |
for (int i = 1; i < product_terms.size(); ++i) {
|
|
Packit |
ea1746 |
const ProductTerm& previous = product_terms[i - 1];
|
|
Packit |
ea1746 |
const ProductTerm& current = product_terms[i];
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// Each (row, col) block counts only once.
|
|
Packit |
ea1746 |
// This check depends on product sorted on (row, col).
|
|
Packit |
ea1746 |
if (current.row != previous.row || current.col != previous.col) {
|
|
Packit |
ea1746 |
(*row_nnz)[current.row] += blocks[current.col].size;
|
|
Packit |
ea1746 |
num_nonzeros += blocks[current.row].size * blocks[current.col].size;
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
return num_nonzeros;
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
InnerProductComputer::InnerProductComputer(const BlockSparseMatrix& m,
|
|
Packit |
ea1746 |
const int start_row_block,
|
|
Packit |
ea1746 |
const int end_row_block)
|
|
Packit |
ea1746 |
: m_(m), start_row_block_(start_row_block), end_row_block_(end_row_block) {}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// Compute the sparsity structure of the product m.transpose() * m
|
|
Packit |
ea1746 |
// and create a CompressedRowSparseMatrix corresponding to it.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// Also compute the "program" vector, which for every term in the
|
|
Packit |
ea1746 |
// block outer product provides the information for the entry in the
|
|
Packit |
ea1746 |
// values array of the result matrix where it should be accumulated.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// Since the entries of the program are the same for rows with the
|
|
Packit |
ea1746 |
// same sparsity structure, the program only stores the result for one
|
|
Packit |
ea1746 |
// row per row block. The Compute function reuses this information for
|
|
Packit |
ea1746 |
// each row in the row block.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// product_storage_type controls the form of the output matrix. It
|
|
Packit |
ea1746 |
// can be LOWER_TRIANGULAR or UPPER_TRIANGULAR.
|
|
Packit |
ea1746 |
InnerProductComputer* InnerProductComputer::Create(
|
|
Packit |
ea1746 |
const BlockSparseMatrix& m,
|
|
Packit |
ea1746 |
CompressedRowSparseMatrix::StorageType product_storage_type) {
|
|
Packit |
ea1746 |
return InnerProductComputer::Create(
|
|
Packit |
ea1746 |
m, 0, m.block_structure()->rows.size(), product_storage_type);
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
InnerProductComputer* InnerProductComputer::Create(
|
|
Packit |
ea1746 |
const BlockSparseMatrix& m,
|
|
Packit |
ea1746 |
const int start_row_block,
|
|
Packit |
ea1746 |
const int end_row_block,
|
|
Packit |
ea1746 |
CompressedRowSparseMatrix::StorageType product_storage_type) {
|
|
Packit |
ea1746 |
CHECK(product_storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR ||
|
|
Packit |
ea1746 |
product_storage_type == CompressedRowSparseMatrix::UPPER_TRIANGULAR);
|
|
Packit |
ea1746 |
CHECK_GT(m.num_nonzeros(), 0)
|
|
Packit |
ea1746 |
<< "Congratulations, you found a bug in Ceres. Please report it.";
|
|
Packit |
ea1746 |
InnerProductComputer* inner_product_computer =
|
|
Packit |
ea1746 |
new InnerProductComputer(m, start_row_block, end_row_block);
|
|
Packit |
ea1746 |
inner_product_computer->Init(product_storage_type);
|
|
Packit |
ea1746 |
return inner_product_computer;
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
void InnerProductComputer::Init(
|
|
Packit |
ea1746 |
const CompressedRowSparseMatrix::StorageType product_storage_type) {
|
|
Packit |
ea1746 |
std::vector<InnerProductComputer::ProductTerm> product_terms;
|
|
Packit |
ea1746 |
const CompressedRowBlockStructure* bs = m_.block_structure();
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// Give input matrix m in Block Sparse format
|
|
Packit |
ea1746 |
// (row_block, col_block)
|
|
Packit |
ea1746 |
// represent each block multiplication
|
|
Packit |
ea1746 |
// (row_block, col_block1)' X (row_block, col_block2)
|
|
Packit |
ea1746 |
// by its product term:
|
|
Packit |
ea1746 |
// (col_block1, col_block2, index)
|
|
Packit |
ea1746 |
for (int row_block = start_row_block_; row_block < end_row_block_;
|
|
Packit |
ea1746 |
++row_block) {
|
|
Packit |
ea1746 |
const CompressedRow& row = bs->rows[row_block];
|
|
Packit |
ea1746 |
for (int c1 = 0; c1 < row.cells.size(); ++c1) {
|
|
Packit |
ea1746 |
const Cell& cell1 = row.cells[c1];
|
|
Packit |
ea1746 |
int c2_begin, c2_end;
|
|
Packit |
ea1746 |
if (product_storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR) {
|
|
Packit |
ea1746 |
c2_begin = 0;
|
|
Packit |
ea1746 |
c2_end = c1 + 1;
|
|
Packit |
ea1746 |
} else {
|
|
Packit |
ea1746 |
c2_begin = c1;
|
|
Packit |
ea1746 |
c2_end = row.cells.size();
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
for (int c2 = c2_begin; c2 < c2_end; ++c2) {
|
|
Packit |
ea1746 |
const Cell& cell2 = row.cells[c2];
|
|
Packit |
ea1746 |
product_terms.push_back(InnerProductComputer::ProductTerm(
|
|
Packit |
ea1746 |
cell1.block_id, cell2.block_id, product_terms.size()));
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
std::sort(product_terms.begin(), product_terms.end());
|
|
Packit |
ea1746 |
ComputeOffsetsAndCreateResultMatrix(product_storage_type, product_terms);
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
void InnerProductComputer::ComputeOffsetsAndCreateResultMatrix(
|
|
Packit |
ea1746 |
const CompressedRowSparseMatrix::StorageType product_storage_type,
|
|
Packit |
ea1746 |
const std::vector<InnerProductComputer::ProductTerm>& product_terms) {
|
|
Packit |
ea1746 |
const std::vector<Block>& col_blocks = m_.block_structure()->cols;
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
std::vector<int> row_block_nnz;
|
|
Packit |
ea1746 |
const int num_nonzeros = ComputeNonzeros(product_terms, &row_block_nnz);
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
result_.reset(CreateResultMatrix(product_storage_type, num_nonzeros));
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// Populate the row non-zero counts in the result matrix.
|
|
Packit |
ea1746 |
int* crsm_rows = result_->mutable_rows();
|
|
Packit |
ea1746 |
crsm_rows[0] = 0;
|
|
Packit |
ea1746 |
for (int i = 0; i < col_blocks.size(); ++i) {
|
|
Packit |
ea1746 |
for (int j = 0; j < col_blocks[i].size; ++j, ++crsm_rows) {
|
|
Packit |
ea1746 |
*(crsm_rows + 1) = *crsm_rows + row_block_nnz[i];
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// The following macro FILL_CRSM_COL_BLOCK is key to understanding
|
|
Packit |
ea1746 |
// how this class works.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// It does two things.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// Sets the value for the current term in the result_offsets_ array
|
|
Packit |
ea1746 |
// and populates the cols array of the result matrix.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// row_block and col_block as the names imply, refer to the row and
|
|
Packit |
ea1746 |
// column blocks of the current term.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// row_nnz is the number of nonzeros in the result_matrix at the
|
|
Packit |
ea1746 |
// beginning of the first row of row_block.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// col_nnz is the number of nonzeros in the first row of the row
|
|
Packit |
ea1746 |
// block that occur before the current column block, i.e. this is
|
|
Packit |
ea1746 |
// sum of the sizes of all the column blocks in this row block that
|
|
Packit |
ea1746 |
// came before this column block.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// Given these two numbers and the total number of nonzeros in this
|
|
Packit |
ea1746 |
// row (nnz_in_row), we can now populate the cols array as follows:
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// nnz + j * nnz_in_row is the beginning of the j^th row.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// nnz + j * nnz_in_row + col_nnz is the beginning of the column
|
|
Packit |
ea1746 |
// block in the j^th row.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// nnz + j * nnz_in_row + col_nnz + k is then the j^th row and the
|
|
Packit |
ea1746 |
// k^th column of the product block, whose value is
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// col_blocks[col_block].position + k, which is the column number of
|
|
Packit |
ea1746 |
// the k^th column of the current column block.
|
|
Packit |
ea1746 |
#define FILL_CRSM_COL_BLOCK \
|
|
Packit |
ea1746 |
const int row_block = current->row; \
|
|
Packit |
ea1746 |
const int col_block = current->col; \
|
|
Packit |
ea1746 |
const int nnz_in_row = row_block_nnz[row_block]; \
|
|
Packit |
ea1746 |
int* crsm_cols = result_->mutable_cols(); \
|
|
Packit |
ea1746 |
result_offsets_[current->index] = nnz + col_nnz; \
|
|
Packit |
ea1746 |
for (int j = 0; j < col_blocks[row_block].size; ++j) { \
|
|
Packit |
ea1746 |
for (int k = 0; k < col_blocks[col_block].size; ++k) { \
|
|
Packit |
ea1746 |
crsm_cols[nnz + j * nnz_in_row + col_nnz + k] = \
|
|
Packit |
ea1746 |
col_blocks[col_block].position + k; \
|
|
Packit |
ea1746 |
} \
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
result_offsets_.resize(product_terms.size());
|
|
Packit |
ea1746 |
int col_nnz = 0;
|
|
Packit |
ea1746 |
int nnz = 0;
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// Process the first term.
|
|
Packit |
ea1746 |
const InnerProductComputer::ProductTerm* current = &product_terms[0];
|
|
Packit |
ea1746 |
FILL_CRSM_COL_BLOCK;
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// Process the rest of the terms.
|
|
Packit |
ea1746 |
for (int i = 1; i < product_terms.size(); ++i) {
|
|
Packit |
ea1746 |
current = &product_terms[i];
|
|
Packit |
ea1746 |
const InnerProductComputer::ProductTerm* previous = &product_terms[i - 1];
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// If the current term is the same as the previous term, then it
|
|
Packit |
ea1746 |
// stores its product at the same location as the previous term.
|
|
Packit |
ea1746 |
if (previous->row == current->row && previous->col == current->col) {
|
|
Packit |
ea1746 |
result_offsets_[current->index] = result_offsets_[previous->index];
|
|
Packit |
ea1746 |
continue;
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
if (previous->row == current->row) {
|
|
Packit |
ea1746 |
// if the current and previous terms are in the same row block,
|
|
Packit |
ea1746 |
// then they differ in the column block, in which case advance
|
|
Packit |
ea1746 |
// col_nnz by the column size of the prevous term.
|
|
Packit |
ea1746 |
col_nnz += col_blocks[previous->col].size;
|
|
Packit |
ea1746 |
} else {
|
|
Packit |
ea1746 |
// If we have moved to a new row-block , then col_nnz is zero,
|
|
Packit |
ea1746 |
// and nnz is set to the beginning of the row block.
|
|
Packit |
ea1746 |
col_nnz = 0;
|
|
Packit |
ea1746 |
nnz += row_block_nnz[previous->row] * col_blocks[previous->row].size;
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
FILL_CRSM_COL_BLOCK;
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// Use the results_offsets_ array to numerically compute the product
|
|
Packit |
ea1746 |
// m' * m and store it in result_.
|
|
Packit |
ea1746 |
//
|
|
Packit |
ea1746 |
// TODO(sameeragarwal): Multithreading support.
|
|
Packit |
ea1746 |
void InnerProductComputer::Compute() {
|
|
Packit |
ea1746 |
const double* m_values = m_.values();
|
|
Packit |
ea1746 |
const CompressedRowBlockStructure* bs = m_.block_structure();
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
const CompressedRowSparseMatrix::StorageType storage_type =
|
|
Packit |
ea1746 |
result_->storage_type();
|
|
Packit |
ea1746 |
result_->SetZero();
|
|
Packit |
ea1746 |
double* values = result_->mutable_values();
|
|
Packit |
ea1746 |
const int* rows = result_->rows();
|
|
Packit |
ea1746 |
int cursor = 0;
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
// Iterate row blocks.
|
|
Packit |
ea1746 |
for (int r = start_row_block_; r < end_row_block_; ++r) {
|
|
Packit |
ea1746 |
const CompressedRow& m_row = bs->rows[r];
|
|
Packit |
ea1746 |
for (int c1 = 0; c1 < m_row.cells.size(); ++c1) {
|
|
Packit |
ea1746 |
const Cell& cell1 = m_row.cells[c1];
|
|
Packit |
ea1746 |
const int c1_size = bs->cols[cell1.block_id].size;
|
|
Packit |
ea1746 |
const int row_nnz = rows[bs->cols[cell1.block_id].position + 1] -
|
|
Packit |
ea1746 |
rows[bs->cols[cell1.block_id].position];
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
int c2_begin, c2_end;
|
|
Packit |
ea1746 |
if (storage_type == CompressedRowSparseMatrix::LOWER_TRIANGULAR) {
|
|
Packit |
ea1746 |
c2_begin = 0;
|
|
Packit |
ea1746 |
c2_end = c1 + 1;
|
|
Packit |
ea1746 |
} else {
|
|
Packit |
ea1746 |
c2_begin = c1;
|
|
Packit |
ea1746 |
c2_end = m_row.cells.size();
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
for (int c2 = c2_begin; c2 < c2_end; ++c2, ++cursor) {
|
|
Packit |
ea1746 |
const Cell& cell2 = m_row.cells[c2];
|
|
Packit |
ea1746 |
const int c2_size = bs->cols[cell2.block_id].size;
|
|
Packit |
ea1746 |
MatrixTransposeMatrixMultiply(m_row.block.size,
|
|
Packit |
ea1746 |
m_values + cell1.position, c1_size,
|
|
Packit |
ea1746 |
m_values + cell2.position, c2_size,
|
|
Packit |
ea1746 |
values + result_offsets_[cursor],
|
|
Packit |
ea1746 |
row_nnz);
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
CHECK_EQ(cursor, result_offsets_.size());
|
|
Packit |
ea1746 |
}
|
|
Packit |
ea1746 |
|
|
Packit |
ea1746 |
} // namespace internal
|
|
Packit |
ea1746 |
} // namespace ceres
|