/*
* Copyright (c) 2020 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/referenceCountRebuild.c#6 $
*/
#include "referenceCountRebuild.h"
#include "logger.h"
#include "memoryAlloc.h"
#include "blockMap.h"
#include "blockMapInternals.h"
#include "blockMapPage.h"
#include "forest.h"
#include "constants.h"
#include "numUtils.h"
#include "refCounts.h"
#include "slabDepot.h"
#include "vdoInternal.h"
#include "vdoPageCache.h"
/**
* A reference count rebuild completion.
* Note that the page completions kept in this structure are not immediately
* freed, so the corresponding pages will be locked down in the page cache
* until the rebuild frees them.
**/
typedef struct {
/** completion header */
VDOCompletion completion;
/** the completion for flushing the block map */
VDOCompletion subTaskCompletion;
/** the thread on which all block map operations must be done */
ThreadID logicalThreadID;
/** the admin thread */
ThreadID adminThreadID;
/** the block map */
BlockMap *blockMap;
/** the slab depot */
SlabDepot *depot;
/** whether this recovery has been aborted */
bool aborted;
/** whether we are currently launching the initial round of requests */
bool launching;
/** The number of logical blocks observed used */
BlockCount *logicalBlocksUsed;
/** The number of block map data blocks */
BlockCount *blockMapDataBlocks;
/** the next page to fetch */
PageCount pageToFetch;
/** the number of leaf pages in the block map */
PageCount leafPages;
/** the last slot of the block map */
BlockMapSlot lastSlot;
/** number of pending (non-ready) requests*/
PageCount outstanding;
/** number of page completions */
PageCount pageCount;
/** array of requested, potentially ready page completions */
VDOPageCompletion pageCompletions[];
} RebuildCompletion;
/**
* Convert a VDOCompletion to a RebuildCompletion.
*
* @param completion The completion to convert
*
* @return The completion as a RebuildCompletion
**/
__attribute__((warn_unused_result))
static inline RebuildCompletion *asRebuildCompletion(VDOCompletion *completion)
{
STATIC_ASSERT(offsetof(RebuildCompletion, completion) == 0);
assertCompletionType(completion->type, REFERENCE_COUNT_REBUILD_COMPLETION);
return (RebuildCompletion *) completion;
}
/**
* Free a RebuildCompletion and null out the reference to it.
*
* @param completionPtr a pointer to the completion to free
**/
static void freeRebuildCompletion(VDOCompletion **completionPtr)
{
VDOCompletion *completion = *completionPtr;
if (completion == NULL) {
return;
}
RebuildCompletion *rebuild = asRebuildCompletion(completion);
destroyEnqueueable(&rebuild->subTaskCompletion);
destroyEnqueueable(completion);
FREE(rebuild);
*completionPtr = NULL;
}
/**
* Free the RebuildCompletion and notify the parent that the block map
* rebuild is done. This callback is registered in rebuildBlockMap().
*
* @param completion The RebuildCompletion
**/
static void finishRebuild(VDOCompletion *completion)
{
int result = completion->result;
VDOCompletion *parent = completion->parent;
freeRebuildCompletion(&completion);
finishCompletion(parent, result);
}
/**
* Make a new rebuild completion.
*
* @param [in] vdo The VDO
* @param [in] logicalBlocksUsed A pointer to hold the logical blocks used
* @param [in] blockMapDataBlocks A pointer to hold the number of block map
* data blocks
* @param [in] parent The parent of the rebuild completion
* @param [out] rebuildPtr The new block map rebuild completion
*
* @return a success or error code
**/
static int makeRebuildCompletion(VDO *vdo,
BlockCount *logicalBlocksUsed,
BlockCount *blockMapDataBlocks,
VDOCompletion *parent,
RebuildCompletion **rebuildPtr)
{
BlockMap *blockMap = getBlockMap(vdo);
PageCount pageCount
= minPageCount(getConfiguredCacheSize(vdo) >> 1,
MAXIMUM_SIMULTANEOUS_BLOCK_MAP_RESTORATION_READS);
RebuildCompletion *rebuild;
int result = ALLOCATE_EXTENDED(RebuildCompletion, pageCount,
VDOPageCompletion, __func__, &rebuild);
if (result != UDS_SUCCESS) {
return result;
}
result = initializeEnqueueableCompletion(&rebuild->completion,
REFERENCE_COUNT_REBUILD_COMPLETION,
vdo->layer);
if (result != VDO_SUCCESS) {
VDOCompletion *completion = &rebuild->completion;
freeRebuildCompletion(&completion);
return result;
}
result = initializeEnqueueableCompletion(&rebuild->subTaskCompletion,
SUB_TASK_COMPLETION, vdo->layer);
if (result != VDO_SUCCESS) {
VDOCompletion *completion = &rebuild->completion;
freeRebuildCompletion(&completion);
return result;
}
rebuild->blockMap = blockMap;
rebuild->depot = vdo->depot;
rebuild->logicalBlocksUsed = logicalBlocksUsed;
rebuild->blockMapDataBlocks = blockMapDataBlocks;
rebuild->pageCount = pageCount;
rebuild->leafPages = computeBlockMapPageCount(blockMap->entryCount);
const ThreadConfig *threadConfig = getThreadConfig(vdo);
rebuild->logicalThreadID = getLogicalZoneThread(threadConfig, 0);
rebuild->adminThreadID = getAdminThread(threadConfig);
ASSERT_LOG_ONLY((getCallbackThreadID() == rebuild->logicalThreadID),
"%s must be called on logical thread %u (not %u)", __func__,
rebuild->logicalThreadID, getCallbackThreadID());
prepareCompletion(&rebuild->completion, finishRebuild, finishRebuild,
rebuild->logicalThreadID, parent);
*rebuildPtr = rebuild;
return VDO_SUCCESS;
}
/**
* Flush the block map now that all the reference counts are rebuilt. This
* callback is registered in finishIfDone().
*
* @param completion The sub-task completion
**/
static void flushBlockMapUpdates(VDOCompletion *completion)
{
logInfo("Flushing block map changes");
prepareToFinishParent(completion, completion->parent);
drainBlockMap(asRebuildCompletion(completion->parent)->blockMap,
ADMIN_STATE_RECOVERING, completion);
}
/**
* Check whether the rebuild is done. If it succeeded, continue by flushing the
* block map.
*
* @param rebuild The rebuild completion
*
* @return <code>true</code> if the rebuild is complete
**/
static bool finishIfDone(RebuildCompletion *rebuild)
{
if (rebuild->launching || (rebuild->outstanding > 0)) {
return false;
}
if (rebuild->aborted) {
completeCompletion(&rebuild->completion);
return true;
}
if (rebuild->pageToFetch < rebuild->leafPages) {
return false;
}
prepareCompletion(&rebuild->subTaskCompletion, flushBlockMapUpdates,
finishParentCallback, rebuild->adminThreadID, rebuild);
invokeCallback(&rebuild->subTaskCompletion);
return true;
}
/**
* Record that there has been an error during the rebuild.
*
* @param rebuild The rebuild completion
* @param result The error result to use, if one is not already saved
**/
static void abortRebuild(RebuildCompletion *rebuild, int result)
{
rebuild->aborted = true;
setCompletionResult(&rebuild->completion, result);
}
/**
* Handle an error loading a page.
*
* @param completion The VDOPageCompletion
**/
static void handlePageLoadError(VDOCompletion *completion)
{
RebuildCompletion *rebuild = asRebuildCompletion(completion->parent);
rebuild->outstanding--;
abortRebuild(rebuild, completion->result);
releaseVDOPageCompletion(completion);
finishIfDone(rebuild);
}
/**
* Rebuild reference counts from a block map page.
*
* @param rebuild The rebuild completion
* @param completion The page completion holding the page
*
* @return VDO_SUCCESS or an error
**/
static int rebuildReferenceCountsFromPage(RebuildCompletion *rebuild,
VDOCompletion *completion)
{
BlockMapPage *page = dereferenceWritableVDOPage(completion);
int result = ASSERT(page != NULL, "page available");
if (result != VDO_SUCCESS) {
return result;
}
if (!isBlockMapPageInitialized(page)) {
return VDO_SUCCESS;
}
// Remove any bogus entries which exist beyond the end of the logical space.
if (getBlockMapPagePBN(page) == rebuild->lastSlot.pbn) {
for (SlotNumber slot = rebuild->lastSlot.slot;
slot < BLOCK_MAP_ENTRIES_PER_PAGE; slot++) {
DataLocation mapping = unpackBlockMapEntry(&page->entries[slot]);
if (isMappedLocation(&mapping)) {
page->entries[slot] = packPBN(ZERO_BLOCK, MAPPING_STATE_UNMAPPED);
requestVDOPageWrite(completion);
}
}
}
// Inform the slab depot of all entries on this page.
for (SlotNumber slot = 0; slot < BLOCK_MAP_ENTRIES_PER_PAGE; slot++) {
DataLocation mapping = unpackBlockMapEntry(&page->entries[slot]);
if (!isValidLocation(&mapping)) {
// This entry is invalid, so remove it from the page.
page->entries[slot] = packPBN(ZERO_BLOCK, MAPPING_STATE_UNMAPPED);
requestVDOPageWrite(completion);
continue;
}
if (!isMappedLocation(&mapping)) {
continue;
}
(*rebuild->logicalBlocksUsed)++;
if (mapping.pbn == ZERO_BLOCK) {
continue;
}
if (!isPhysicalDataBlock(rebuild->depot, mapping.pbn)) {
// This is a nonsense mapping. Remove it from the map so we're at least
// consistent and mark the page dirty.
page->entries[slot] = packPBN(ZERO_BLOCK, MAPPING_STATE_UNMAPPED);
requestVDOPageWrite(completion);
continue;
}
Slab *slab = getSlab(rebuild->depot, mapping.pbn);
int result = adjustReferenceCountForRebuild(slab->referenceCounts,
mapping.pbn, DATA_INCREMENT);
if (result != VDO_SUCCESS) {
logErrorWithStringError(result,
"Could not adjust reference count for PBN"
" %llu, slot %u mapped to PBN %llu",
getBlockMapPagePBN(page), slot, mapping.pbn);
page->entries[slot] = packPBN(ZERO_BLOCK, MAPPING_STATE_UNMAPPED);
requestVDOPageWrite(completion);
}
}
return VDO_SUCCESS;
}
/**********************************************************************/
static void fetchPage(RebuildCompletion *rebuild, VDOCompletion *completion);
/**
* Process a page which has just been loaded. This callback is registered by
* fetchPage().
*
* @param completion The VDOPageCompletion for the fetched page
**/
static void pageLoaded(VDOCompletion *completion)
{
RebuildCompletion *rebuild = asRebuildCompletion(completion->parent);
rebuild->outstanding--;
int result = rebuildReferenceCountsFromPage(rebuild, completion);
if (result != VDO_SUCCESS) {
abortRebuild(rebuild, result);
}
releaseVDOPageCompletion(completion);
if (finishIfDone(rebuild)) {
return;
}
// Advance progress to the next page, and fetch the next page we
// haven't yet requested.
fetchPage(rebuild, completion);
}
/**
* Fetch a page from the block map.
*
* @param rebuild the RebuildCompletion
* @param completion the page completion to use
**/
static void fetchPage(RebuildCompletion *rebuild, VDOCompletion *completion)
{
while (rebuild->pageToFetch < rebuild->leafPages) {
PhysicalBlockNumber pbn = findBlockMapPagePBN(rebuild->blockMap,
rebuild->pageToFetch++);
if (pbn == ZERO_BLOCK) {
continue;
}
if (!isPhysicalDataBlock(rebuild->depot, pbn)) {
abortRebuild(rebuild, VDO_BAD_MAPPING);
if (finishIfDone(rebuild)) {
return;
}
continue;
}
initVDOPageCompletion(((VDOPageCompletion *) completion),
rebuild->blockMap->zones[0].pageCache,
pbn, true, &rebuild->completion,
pageLoaded, handlePageLoadError);
rebuild->outstanding++;
getVDOPageAsync(completion);
return;
}
}
/**
* Rebuild reference counts from the leaf block map pages now that reference
* counts have been rebuilt from the interior tree pages (which have been
* loaded in the process). This callback is registered in
* rebuildReferenceCounts().
*
* @param completion The sub-task completion
**/
static void rebuildFromLeaves(VDOCompletion *completion)
{
RebuildCompletion *rebuild = asRebuildCompletion(completion->parent);
*rebuild->logicalBlocksUsed = 0;
// The PBN calculation doesn't work until the tree pages have been loaded,
// so we can't set this value at the start of rebuild.
rebuild->lastSlot = (BlockMapSlot) {
.slot = rebuild->blockMap->entryCount % BLOCK_MAP_ENTRIES_PER_PAGE,
.pbn = findBlockMapPagePBN(rebuild->blockMap, rebuild->leafPages - 1),
};
// Prevent any page from being processed until all pages have been launched.
rebuild->launching = true;
for (PageCount i = 0; i < rebuild->pageCount; i++) {
fetchPage(rebuild, &rebuild->pageCompletions[i].completion);
}
rebuild->launching = false;
finishIfDone(rebuild);
}
/**
* Process a single entry from the block map tree.
*
* <p>Implements EntryCallback.
*
* @param pbn A pbn which holds a block map tree page
* @param completion The parent completion of the traversal
*
* @return VDO_SUCCESS or an error
**/
static int processEntry(PhysicalBlockNumber pbn, VDOCompletion *completion)
{
RebuildCompletion *rebuild = asRebuildCompletion(completion->parent);
if ((pbn == ZERO_BLOCK) || !isPhysicalDataBlock(rebuild->depot, pbn)) {
return logErrorWithStringError(VDO_BAD_CONFIGURATION,
"PBN %llu out of range",
pbn);
}
Slab *slab = getSlab(rebuild->depot, pbn);
int result = adjustReferenceCountForRebuild(slab->referenceCounts, pbn,
BLOCK_MAP_INCREMENT);
if (result != VDO_SUCCESS) {
return logErrorWithStringError(result,
"Could not adjust reference count for "
"block map tree PBN %llu",
pbn);
}
(*rebuild->blockMapDataBlocks)++;
return VDO_SUCCESS;
}
/**********************************************************************/
void rebuildReferenceCounts(VDO *vdo,
VDOCompletion *parent,
BlockCount *logicalBlocksUsed,
BlockCount *blockMapDataBlocks)
{
RebuildCompletion *rebuild;
int result = makeRebuildCompletion(vdo, logicalBlocksUsed,
blockMapDataBlocks, parent, &rebuild);
if (result != VDO_SUCCESS) {
finishCompletion(parent, result);
return;
}
// Completion chaining from page cache hits can lead to stack overflow
// during the rebuild, so clear out the cache before this rebuild phase.
result = invalidateVDOPageCache(rebuild->blockMap->zones[0].pageCache);
if (result != VDO_SUCCESS) {
finishCompletion(parent, result);
return;
}
// First traverse the block map trees.
*rebuild->blockMapDataBlocks = 0;
VDOCompletion *completion = &rebuild->subTaskCompletion;
prepareCompletion(completion, rebuildFromLeaves, finishParentCallback,
rebuild->logicalThreadID, rebuild);
traverseForest(rebuild->blockMap, processEntry, completion);
}