|
Packit Service |
310c69 |
/*
|
|
Packit Service |
310c69 |
* Copyright (c) 2020 Red Hat, Inc.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* This program is free software; you can redistribute it and/or
|
|
Packit Service |
310c69 |
* modify it under the terms of the GNU General Public License
|
|
Packit Service |
310c69 |
* as published by the Free Software Foundation; either version 2
|
|
Packit Service |
310c69 |
* of the License, or (at your option) any later version.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* This program is distributed in the hope that it will be useful,
|
|
Packit Service |
310c69 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit Service |
310c69 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
Packit Service |
310c69 |
* GNU General Public License for more details.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* You should have received a copy of the GNU General Public License
|
|
Packit Service |
310c69 |
* along with this program; if not, write to the Free Software
|
|
Packit Service |
310c69 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
Packit Service |
310c69 |
* 02110-1301, USA.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/vdoRecovery.c#16 $
|
|
Packit Service |
310c69 |
*/
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
#include "vdoRecoveryInternals.h"
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
#include "logger.h"
|
|
Packit Service |
310c69 |
#include "memoryAlloc.h"
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
#include "blockAllocator.h"
|
|
Packit Service |
310c69 |
#include "blockAllocatorInternals.h"
|
|
Packit Service |
310c69 |
#include "blockMapInternals.h"
|
|
Packit Service |
310c69 |
#include "blockMapPage.h"
|
|
Packit Service |
310c69 |
#include "blockMapRecovery.h"
|
|
Packit Service |
310c69 |
#include "completion.h"
|
|
Packit Service |
310c69 |
#include "numUtils.h"
|
|
Packit Service |
310c69 |
#include "packedRecoveryJournalBlock.h"
|
|
Packit Service |
310c69 |
#include "recoveryJournal.h"
|
|
Packit Service |
310c69 |
#include "recoveryUtils.h"
|
|
Packit Service |
310c69 |
#include "slab.h"
|
|
Packit Service |
310c69 |
#include "slabDepot.h"
|
|
Packit Service |
310c69 |
#include "slabJournal.h"
|
|
Packit Service |
310c69 |
#include "slabJournalInternals.h"
|
|
Packit Service |
310c69 |
#include "vdoInternal.h"
|
|
Packit Service |
310c69 |
#include "waitQueue.h"
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
enum {
|
|
Packit Service |
310c69 |
// The int map needs capacity of twice the number of VIOs in the system.
|
|
Packit Service |
310c69 |
INT_MAP_CAPACITY = MAXIMUM_USER_VIOS * 2,
|
|
Packit Service |
310c69 |
// There can be as many missing decrefs as there are VIOs in the system.
|
|
Packit Service |
310c69 |
MAXIMUM_SYNTHESIZED_DECREFS = MAXIMUM_USER_VIOS,
|
|
Packit Service |
310c69 |
};
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
typedef struct missingDecref {
|
|
Packit Service |
310c69 |
/** A waiter for queueing this object */
|
|
Packit Service |
310c69 |
Waiter waiter;
|
|
Packit Service |
310c69 |
/** The parent of this object */
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery;
|
|
Packit Service |
310c69 |
/** Whether this decref is complete */
|
|
Packit Service |
310c69 |
bool complete;
|
|
Packit Service |
310c69 |
/** The slot for which the last decref was lost */
|
|
Packit Service |
310c69 |
BlockMapSlot slot;
|
|
Packit Service |
310c69 |
/** The penultimate block map entry for this LBN */
|
|
Packit Service |
310c69 |
DataLocation penultimateMapping;
|
|
Packit Service |
310c69 |
/** The page completion used to fetch the block map page for this LBN */
|
|
Packit Service |
310c69 |
VDOPageCompletion pageCompletion;
|
|
Packit Service |
310c69 |
/** The journal point which will be used for this entry */
|
|
Packit Service |
310c69 |
JournalPoint journalPoint;
|
|
Packit Service |
310c69 |
/** The slab journal to which this entry will be applied */
|
|
Packit Service |
310c69 |
SlabJournal *slabJournal;
|
|
Packit Service |
310c69 |
} MissingDecref;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Convert a Waiter to the missing decref of which it is a part.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param waiter The Waiter to convert
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @return The MissingDecref wrapping the Waiter
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
__attribute__((warn_unused_result))
|
|
Packit Service |
310c69 |
static inline MissingDecref *asMissingDecref(Waiter *waiter)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
STATIC_ASSERT(offsetof(MissingDecref, waiter) == 0);
|
|
Packit Service |
310c69 |
return (MissingDecref *) waiter;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Enqueue a MissingDecref. If the enqueue fails, enter read-only mode.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param queue The queue on which to enqueue the decref
|
|
Packit Service |
310c69 |
* @param decref The MissingDecref to enqueue
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @return VDO_SUCCESS or an error
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static int enqueueMissingDecref(WaitQueue *queue, MissingDecref *decref)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
int result = enqueueWaiter(queue, &decref->waiter);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
enterReadOnlyMode(decref->recovery->vdo->readOnlyNotifier, result);
|
|
Packit Service |
310c69 |
setCompletionResult(&decref->recovery->completion, result);
|
|
Packit Service |
310c69 |
FREE(decref);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Convert a BlockMapSlot into a unique uint64_t.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param slot The block map slot to convert.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @return a one-to-one mappable uint64_t.
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static uint64_t slotAsNumber(BlockMapSlot slot)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
return (((uint64_t) slot.pbn << 10) + slot.slot);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Create a MissingDecref and enqueue it to wait for a determination of its
|
|
Packit Service |
310c69 |
* penultimate mapping.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param [in] recovery The parent recovery completion
|
|
Packit Service |
310c69 |
* @param [in] entry The recovery journal entry for the increment which is
|
|
Packit Service |
310c69 |
* missing a decref
|
|
Packit Service |
310c69 |
* @param [out] decrefPtr A pointer to hold the new MissingDecref
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @return VDO_SUCCESS or an error code
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
__attribute__((warn_unused_result))
|
|
Packit Service |
310c69 |
static int makeMissingDecref(RecoveryCompletion *recovery,
|
|
Packit Service |
310c69 |
RecoveryJournalEntry entry,
|
|
Packit Service |
310c69 |
MissingDecref **decrefPtr)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
MissingDecref *decref;
|
|
Packit Service |
310c69 |
int result = ALLOCATE(1, MissingDecref, __func__, &decref);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
decref->recovery = recovery;
|
|
Packit Service |
310c69 |
result = enqueueMissingDecref(&recovery->missingDecrefs[0], decref);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/*
|
|
Packit Service |
310c69 |
* Each synthsized decref needs a unique journal point. Otherwise, in the
|
|
Packit Service |
310c69 |
* event of a crash, we would be unable to tell which synthesized decrefs had
|
|
Packit Service |
310c69 |
* already been committed in the slab journals. Instead of using real
|
|
Packit Service |
310c69 |
* recovery journal space for this, we can use fake journal points between
|
|
Packit Service |
310c69 |
* the last currently valid entry in the tail block and the first journal
|
|
Packit Service |
310c69 |
* entry in the next block. We can't overflow the entry count since the
|
|
Packit Service |
310c69 |
* number of synthesized decrefs is bounded by the DataVIO limit.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* It is vital that any given missing decref always have the same fake
|
|
Packit Service |
310c69 |
* journal point since a failed recovery may be retried with a different
|
|
Packit Service |
310c69 |
* number of zones after having written out some slab journal blocks. Since
|
|
Packit Service |
310c69 |
* the missing decrefs are always read out of the journal in the same order,
|
|
Packit Service |
310c69 |
* we can assign them a journal point when they are read. Their subsequent
|
|
Packit Service |
310c69 |
* use will ensure that, for any given slab journal, they are applied in
|
|
Packit Service |
310c69 |
* the order dictated by these assigned journal points.
|
|
Packit Service |
310c69 |
*/
|
|
Packit Service |
310c69 |
decref->slot = entry.slot;
|
|
Packit Service |
310c69 |
decref->journalPoint = recovery->nextSynthesizedJournalPoint;
|
|
Packit Service |
310c69 |
recovery->nextSynthesizedJournalPoint.entryCount++;
|
|
Packit Service |
310c69 |
recovery->missingDecrefCount++;
|
|
Packit Service |
310c69 |
recovery->incompleteDecrefCount++;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
*decrefPtr = decref;
|
|
Packit Service |
310c69 |
return VDO_SUCCESS;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Move the given recovery point forward by one entry.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param point The recovery point to alter
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void incrementRecoveryPoint(RecoveryPoint *point)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
point->entryCount++;
|
|
Packit Service |
310c69 |
if ((point->sectorCount == (SECTORS_PER_BLOCK - 1))
|
|
Packit Service |
310c69 |
&& (point->entryCount == RECOVERY_JOURNAL_ENTRIES_PER_LAST_SECTOR)) {
|
|
Packit Service |
310c69 |
point->sequenceNumber++;
|
|
Packit Service |
310c69 |
point->sectorCount = 1;
|
|
Packit Service |
310c69 |
point->entryCount = 0;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (point->entryCount == RECOVERY_JOURNAL_ENTRIES_PER_SECTOR) {
|
|
Packit Service |
310c69 |
point->sectorCount++;
|
|
Packit Service |
310c69 |
point->entryCount = 0;
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Move the given recovery point backwards by one entry.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param point The recovery point to alter
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void decrementRecoveryPoint(RecoveryPoint *point)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
STATIC_ASSERT(RECOVERY_JOURNAL_ENTRIES_PER_LAST_SECTOR > 0);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if ((point->sectorCount <= 1) && (point->entryCount == 0)) {
|
|
Packit Service |
310c69 |
point->sequenceNumber--;
|
|
Packit Service |
310c69 |
point->sectorCount = SECTORS_PER_BLOCK - 1;
|
|
Packit Service |
310c69 |
point->entryCount = RECOVERY_JOURNAL_ENTRIES_PER_LAST_SECTOR - 1;
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (point->entryCount == 0) {
|
|
Packit Service |
310c69 |
point->sectorCount--;
|
|
Packit Service |
310c69 |
point->entryCount = RECOVERY_JOURNAL_ENTRIES_PER_SECTOR - 1;
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
point->entryCount--;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Check whether the first point precedes the second point.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param first The first recovery point
|
|
Packit Service |
310c69 |
* @param second The second recovery point
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @return true if the first point precedes the second point
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
__attribute__((warn_unused_result))
|
|
Packit Service |
310c69 |
static bool beforeRecoveryPoint(const RecoveryPoint *first,
|
|
Packit Service |
310c69 |
const RecoveryPoint *second)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
if (first->sequenceNumber < second->sequenceNumber) {
|
|
Packit Service |
310c69 |
return true;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (first->sequenceNumber > second->sequenceNumber) {
|
|
Packit Service |
310c69 |
return false;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (first->sectorCount < second->sectorCount) {
|
|
Packit Service |
310c69 |
return true;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
return ((first->sectorCount == second->sectorCount)
|
|
Packit Service |
310c69 |
&& (first->entryCount < second->entryCount));
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Prepare the sub-task completion.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param recovery The RecoveryCompletion whose sub-task completion is to
|
|
Packit Service |
310c69 |
* be prepared
|
|
Packit Service |
310c69 |
* @param callback The callback to register for the next sub-task
|
|
Packit Service |
310c69 |
* @param errorHandler The error handler for the next sub-task
|
|
Packit Service |
310c69 |
* @param zoneType The type of zone on which the callback or errorHandler
|
|
Packit Service |
310c69 |
* should run
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void prepareSubTask(RecoveryCompletion *recovery,
|
|
Packit Service |
310c69 |
VDOAction callback,
|
|
Packit Service |
310c69 |
VDOAction errorHandler,
|
|
Packit Service |
310c69 |
ZoneType zoneType)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
const ThreadConfig *threadConfig = getThreadConfig(recovery->vdo);
|
|
Packit Service |
310c69 |
ThreadID threadID;
|
|
Packit Service |
310c69 |
switch (zoneType) {
|
|
Packit Service |
310c69 |
case ZONE_TYPE_LOGICAL:
|
|
Packit Service |
310c69 |
// All blockmap access is done on single thread, so use logical zone 0.
|
|
Packit Service |
310c69 |
threadID = getLogicalZoneThread(threadConfig, 0);
|
|
Packit Service |
310c69 |
break;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
case ZONE_TYPE_PHYSICAL:
|
|
Packit Service |
310c69 |
threadID = recovery->allocator->threadID;
|
|
Packit Service |
310c69 |
break;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
case ZONE_TYPE_ADMIN:
|
|
Packit Service |
310c69 |
default:
|
|
Packit Service |
310c69 |
threadID = getAdminThread(threadConfig);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
prepareCompletion(&recovery->subTaskCompletion, callback, errorHandler,
|
|
Packit Service |
310c69 |
threadID, recovery);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**********************************************************************/
|
|
Packit Service |
310c69 |
int makeRecoveryCompletion(VDO *vdo, RecoveryCompletion **recoveryPtr)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
const ThreadConfig *threadConfig = getThreadConfig(vdo);
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery;
|
|
Packit Service |
310c69 |
int result = ALLOCATE_EXTENDED(RecoveryCompletion,
|
|
Packit Service |
310c69 |
threadConfig->physicalZoneCount, RingNode,
|
|
Packit Service |
310c69 |
__func__, &recovery);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
recovery->vdo = vdo;
|
|
Packit Service |
310c69 |
for (ZoneCount z = 0; z < threadConfig->physicalZoneCount; z++) {
|
|
Packit Service |
310c69 |
initializeWaitQueue(&recovery->missingDecrefs[z]);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
result = initializeEnqueueableCompletion(&recovery->completion,
|
|
Packit Service |
310c69 |
RECOVERY_COMPLETION, vdo->layer);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
freeRecoveryCompletion(&recovery);
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
result = initializeEnqueueableCompletion(&recovery->subTaskCompletion,
|
|
Packit Service |
310c69 |
SUB_TASK_COMPLETION, vdo->layer);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
freeRecoveryCompletion(&recovery);
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
result = makeIntMap(INT_MAP_CAPACITY, 0, &recovery->slotEntryMap);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
freeRecoveryCompletion(&recovery);
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
*recoveryPtr = recovery;
|
|
Packit Service |
310c69 |
return VDO_SUCCESS;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* A waiter callback to free MissingDecrefs.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* Implements WaiterCallback.
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void freeMissingDecref(Waiter *waiter,
|
|
Packit Service |
310c69 |
void *context __attribute__((unused)))
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
FREE(asMissingDecref(waiter));
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**********************************************************************/
|
|
Packit Service |
310c69 |
void freeRecoveryCompletion(RecoveryCompletion **recoveryPtr)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = *recoveryPtr;
|
|
Packit Service |
310c69 |
if (recovery == NULL) {
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
freeIntMap(&recovery->slotEntryMap);
|
|
Packit Service |
310c69 |
const ThreadConfig *threadConfig = getThreadConfig(recovery->vdo);
|
|
Packit Service |
310c69 |
for (ZoneCount z = 0; z < threadConfig->physicalZoneCount; z++) {
|
|
Packit Service |
310c69 |
notifyAllWaiters(&recovery->missingDecrefs[z], freeMissingDecref, NULL);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
FREE(recovery->journalData);
|
|
Packit Service |
310c69 |
FREE(recovery->entries);
|
|
Packit Service |
310c69 |
destroyEnqueueable(&recovery->subTaskCompletion);
|
|
Packit Service |
310c69 |
destroyEnqueueable(&recovery->completion);
|
|
Packit Service |
310c69 |
FREE(recovery);
|
|
Packit Service |
310c69 |
*recoveryPtr = NULL;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Finish recovering, free the recovery completion and notify the parent.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The recovery completion
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void finishRecovery(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
VDOCompletion *parent = completion->parent;
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = asRecoveryCompletion(completion);
|
|
Packit Service |
310c69 |
VDO *vdo = recovery->vdo;
|
|
Packit Service |
310c69 |
uint64_t recoveryCount = ++vdo->completeRecoveries;
|
|
Packit Service |
310c69 |
initializeRecoveryJournalPostRecovery(vdo->recoveryJournal,
|
|
Packit Service |
310c69 |
recoveryCount, recovery->highestTail);
|
|
Packit Service |
310c69 |
freeRecoveryCompletion(&recovery);
|
|
Packit Service |
310c69 |
logInfo("Rebuild complete.");
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// Now that we've freed the recovery completion and its vast array of
|
|
Packit Service |
310c69 |
// journal entries, we can allocate refcounts.
|
|
Packit Service |
310c69 |
int result = allocateSlabRefCounts(vdo->depot);
|
|
Packit Service |
310c69 |
finishCompletion(parent, result);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Handle a recovery error.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The recovery completion
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void abortRecovery(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
VDOCompletion *parent = completion->parent;
|
|
Packit Service |
310c69 |
int result = completion->result;
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = asRecoveryCompletion(completion);
|
|
Packit Service |
310c69 |
freeRecoveryCompletion(&recovery);
|
|
Packit Service |
310c69 |
logWarning("Recovery aborted");
|
|
Packit Service |
310c69 |
finishCompletion(parent, result);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Abort a recovery if there is an error.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param result The result to check
|
|
Packit Service |
310c69 |
* @param recovery The recovery completion
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @return true if the result was an error
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
__attribute__((warn_unused_result))
|
|
Packit Service |
310c69 |
static bool abortRecoveryOnError(int result, RecoveryCompletion *recovery)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
if (result == VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
return false;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
finishCompletion(&recovery->completion, result);
|
|
Packit Service |
310c69 |
return true;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Unpack the recovery journal entry associated with the given recovery point.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param recovery The recovery completion
|
|
Packit Service |
310c69 |
* @param point The recovery point
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @return The unpacked contents of the matching recovery journal entry
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static RecoveryJournalEntry getEntry(const RecoveryCompletion *recovery,
|
|
Packit Service |
310c69 |
const RecoveryPoint *point)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryJournal *journal = recovery->vdo->recoveryJournal;
|
|
Packit Service |
310c69 |
PhysicalBlockNumber blockNumber
|
|
Packit Service |
310c69 |
= getRecoveryJournalBlockNumber(journal, point->sequenceNumber);
|
|
Packit Service |
310c69 |
off_t sectorOffset
|
|
Packit Service |
310c69 |
= (blockNumber * VDO_BLOCK_SIZE) + (point->sectorCount * VDO_SECTOR_SIZE);
|
|
Packit Service |
310c69 |
PackedJournalSector *sector
|
|
Packit Service |
310c69 |
= (PackedJournalSector *) &recovery->journalData[sectorOffset];
|
|
Packit Service |
310c69 |
return unpackRecoveryJournalEntry(§or->entries[point->entryCount]);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Create an array of all valid journal entries, in order, and store it in the
|
|
Packit Service |
310c69 |
* recovery completion.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param recovery The recovery completion
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @return VDO_SUCCESS or an error code
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static int extractJournalEntries(RecoveryCompletion *recovery)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
// Allocate a NumberedBlockMapping array just large enough to transcribe
|
|
Packit Service |
310c69 |
// every increment PackedRecoveryJournalEntry from every valid journal block.
|
|
Packit Service |
310c69 |
int result = ALLOCATE(recovery->increfCount, NumberedBlockMapping, __func__,
|
|
Packit Service |
310c69 |
&recovery->entries);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
RecoveryPoint recoveryPoint = {
|
|
Packit Service |
310c69 |
.sequenceNumber = recovery->blockMapHead,
|
|
Packit Service |
310c69 |
.sectorCount = 1,
|
|
Packit Service |
310c69 |
.entryCount = 0,
|
|
Packit Service |
310c69 |
};
|
|
Packit Service |
310c69 |
while (beforeRecoveryPoint(&recoveryPoint, &recovery->tailRecoveryPoint)) {
|
|
Packit Service |
310c69 |
RecoveryJournalEntry entry = getEntry(recovery, &recoveryPoint);
|
|
Packit Service |
310c69 |
result = validateRecoveryJournalEntry(recovery->vdo, &entry);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
enterReadOnlyMode(recovery->vdo->readOnlyNotifier, result);
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (isIncrementOperation(entry.operation)) {
|
|
Packit Service |
310c69 |
recovery->entries[recovery->entryCount] = (NumberedBlockMapping) {
|
|
Packit Service |
310c69 |
.blockMapSlot = entry.slot,
|
|
Packit Service |
310c69 |
.blockMapEntry = packPBN(entry.mapping.pbn, entry.mapping.state),
|
|
Packit Service |
310c69 |
.number = recovery->entryCount,
|
|
Packit Service |
310c69 |
};
|
|
Packit Service |
310c69 |
recovery->entryCount++;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
incrementRecoveryPoint(&recoveryPoint);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
result = ASSERT((recovery->entryCount <= recovery->increfCount),
|
|
Packit Service |
310c69 |
"approximate incref count is an upper bound");
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
enterReadOnlyMode(recovery->vdo->readOnlyNotifier, result);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Extract journal entries and recover the block map. This callback is
|
|
Packit Service |
310c69 |
* registered in startSuperBlockSave().
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The sub-task completion
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void launchBlockMapRecovery(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = asRecoveryCompletion(completion->parent);
|
|
Packit Service |
310c69 |
VDO *vdo = recovery->vdo;
|
|
Packit Service |
310c69 |
assertOnLogicalZoneThread(vdo, 0, __func__);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// Extract the journal entries for the block map recovery.
|
|
Packit Service |
310c69 |
int result = extractJournalEntries(recovery);
|
|
Packit Service |
310c69 |
if (abortRecoveryOnError(result, recovery)) {
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
prepareToFinishParent(completion, &recovery->completion);
|
|
Packit Service |
310c69 |
recoverBlockMap(vdo, recovery->entryCount, recovery->entries, completion);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Finish flushing all slab journals and start a write of the super block.
|
|
Packit Service |
310c69 |
* This callback is registered in addSynthesizedEntries().
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The sub-task completion
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void startSuperBlockSave(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = asRecoveryCompletion(completion->parent);
|
|
Packit Service |
310c69 |
VDO *vdo = recovery->vdo;
|
|
Packit Service |
310c69 |
assertOnAdminThread(vdo, __func__);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
logInfo("Saving recovery progress");
|
|
Packit Service |
310c69 |
vdo->state = VDO_REPLAYING;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// The block map access which follows the super block save must be done
|
|
Packit Service |
310c69 |
// on a logical thread.
|
|
Packit Service |
310c69 |
prepareSubTask(recovery, launchBlockMapRecovery, finishParentCallback,
|
|
Packit Service |
310c69 |
ZONE_TYPE_LOGICAL);
|
|
Packit Service |
310c69 |
saveVDOComponentsAsync(vdo, completion);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* The callback from loading the slab depot. It will update the logical blocks
|
|
Packit Service |
310c69 |
* and block map data blocks counts in the recovery journal and then drain the
|
|
Packit Service |
310c69 |
* slab depot in order to commit the recovered slab journals. It is registered
|
|
Packit Service |
310c69 |
* in applyToDepot().
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The sub-task completion
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void finishRecoveringDepot(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = asRecoveryCompletion(completion->parent);
|
|
Packit Service |
310c69 |
VDO *vdo = recovery->vdo;
|
|
Packit Service |
310c69 |
assertOnAdminThread(vdo, __func__);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
logInfo("Replayed %zu journal entries into slab journals",
|
|
Packit Service |
310c69 |
recovery->entriesAddedToSlabJournals);
|
|
Packit Service |
310c69 |
logInfo("Synthesized %zu missing journal entries",
|
|
Packit Service |
310c69 |
recovery->missingDecrefCount);
|
|
Packit Service |
310c69 |
vdo->recoveryJournal->logicalBlocksUsed = recovery->logicalBlocksUsed;
|
|
Packit Service |
310c69 |
vdo->recoveryJournal->blockMapDataBlocks = recovery->blockMapDataBlocks;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
prepareSubTask(recovery, startSuperBlockSave, finishParentCallback,
|
|
Packit Service |
310c69 |
ZONE_TYPE_ADMIN);
|
|
Packit Service |
310c69 |
drainSlabDepot(vdo->depot, ADMIN_STATE_RECOVERING, completion);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* The error handler for recovering slab journals. It will skip any remaining
|
|
Packit Service |
310c69 |
* recovery on the current zone and propagate the error. It is registered in
|
|
Packit Service |
310c69 |
* addSlabJournalEntries() and addSynthesizedEntries().
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The completion of the block allocator being recovered
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void handleAddSlabJournalEntryError(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = asRecoveryCompletion(completion->parent);
|
|
Packit Service |
310c69 |
notifySlabJournalsAreRecovered(recovery->allocator, completion->result);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Add synthesized entries into slab journals, waiting when necessary.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The allocator completion
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void addSynthesizedEntries(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = asRecoveryCompletion(completion->parent);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// Get ready in case we need to enqueue again
|
|
Packit Service |
310c69 |
prepareCompletion(completion, addSynthesizedEntries,
|
|
Packit Service |
310c69 |
handleAddSlabJournalEntryError,
|
|
Packit Service |
310c69 |
completion->callbackThreadID, recovery);
|
|
Packit Service |
310c69 |
WaitQueue *missingDecrefs
|
|
Packit Service |
310c69 |
= &recovery->missingDecrefs[recovery->allocator->zoneNumber];
|
|
Packit Service |
310c69 |
while (hasWaiters(missingDecrefs)) {
|
|
Packit Service |
310c69 |
MissingDecref *decref = asMissingDecref(getFirstWaiter(missingDecrefs));
|
|
Packit Service |
310c69 |
if (!attemptReplayIntoSlabJournal(decref->slabJournal,
|
|
Packit Service |
310c69 |
decref->penultimateMapping.pbn,
|
|
Packit Service |
310c69 |
DATA_DECREMENT, &decref->journalPoint,
|
|
Packit Service |
310c69 |
completion)) {
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
dequeueNextWaiter(missingDecrefs);
|
|
Packit Service |
310c69 |
FREE(decref);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
notifySlabJournalsAreRecovered(recovery->allocator, VDO_SUCCESS);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Determine the LBNs used count as of the end of the journal (but
|
|
Packit Service |
310c69 |
* not including any changes to that count from entries that will be
|
|
Packit Service |
310c69 |
* synthesized later).
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param recovery The recovery completion
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @return VDO_SUCCESS or an error
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static int computeUsages(RecoveryCompletion *recovery)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryJournal *journal = recovery->vdo->recoveryJournal;
|
|
Packit Service |
310c69 |
PackedJournalHeader *tailHeader
|
|
Packit Service |
310c69 |
= getJournalBlockHeader(journal, recovery->journalData, recovery->tail);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
RecoveryBlockHeader unpacked;
|
|
Packit Service |
310c69 |
unpackRecoveryBlockHeader(tailHeader, &unpacked);
|
|
Packit Service |
310c69 |
recovery->logicalBlocksUsed = unpacked.logicalBlocksUsed;
|
|
Packit Service |
310c69 |
recovery->blockMapDataBlocks = unpacked.blockMapDataBlocks;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
RecoveryPoint recoveryPoint = {
|
|
Packit Service |
310c69 |
.sequenceNumber = recovery->tail,
|
|
Packit Service |
310c69 |
.sectorCount = 1,
|
|
Packit Service |
310c69 |
.entryCount = 0,
|
|
Packit Service |
310c69 |
};
|
|
Packit Service |
310c69 |
while (beforeRecoveryPoint(&recoveryPoint, &recovery->tailRecoveryPoint)) {
|
|
Packit Service |
310c69 |
RecoveryJournalEntry entry = getEntry(recovery, &recoveryPoint);
|
|
Packit Service |
310c69 |
if (isMappedLocation(&entry.mapping)) {
|
|
Packit Service |
310c69 |
switch (entry.operation) {
|
|
Packit Service |
310c69 |
case DATA_INCREMENT:
|
|
Packit Service |
310c69 |
recovery->logicalBlocksUsed++;
|
|
Packit Service |
310c69 |
break;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
case DATA_DECREMENT:
|
|
Packit Service |
310c69 |
recovery->logicalBlocksUsed--;
|
|
Packit Service |
310c69 |
break;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
case BLOCK_MAP_INCREMENT:
|
|
Packit Service |
310c69 |
recovery->blockMapDataBlocks++;
|
|
Packit Service |
310c69 |
break;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
default:
|
|
Packit Service |
310c69 |
return logErrorWithStringError(VDO_CORRUPT_JOURNAL,
|
|
Packit Service |
310c69 |
"Recovery journal entry at "
|
|
Packit Service |
310c69 |
"sequence number %" PRIu64
|
|
Packit Service |
310c69 |
", sector %u, entry %u had invalid "
|
|
Packit Service |
310c69 |
"operation %u",
|
|
Packit Service |
310c69 |
recoveryPoint.sequenceNumber,
|
|
Packit Service |
310c69 |
recoveryPoint.sectorCount,
|
|
Packit Service |
310c69 |
recoveryPoint.entryCount,
|
|
Packit Service |
310c69 |
entry.operation);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
incrementRecoveryPoint(&recoveryPoint);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
return VDO_SUCCESS;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Advance the current recovery and journal points.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param recovery The RecoveryCompletion whose points are to be
|
|
Packit Service |
310c69 |
* advanced
|
|
Packit Service |
310c69 |
* @param entriesPerBlock The number of entries in a recovery journal block
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void advancePoints(RecoveryCompletion *recovery,
|
|
Packit Service |
310c69 |
JournalEntryCount entriesPerBlock)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
incrementRecoveryPoint(&recovery->nextRecoveryPoint);
|
|
Packit Service |
310c69 |
advanceJournalPoint(&recovery->nextJournalPoint, entriesPerBlock);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Replay recovery journal entries into the slab journals of the allocator
|
|
Packit Service |
310c69 |
* currently being recovered, waiting for slab journal tailblock space when
|
|
Packit Service |
310c69 |
* necessary. This method is its own callback.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The allocator completion
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void addSlabJournalEntries(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = asRecoveryCompletion(completion->parent);
|
|
Packit Service |
310c69 |
VDO *vdo = recovery->vdo;
|
|
Packit Service |
310c69 |
RecoveryJournal *journal = vdo->recoveryJournal;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// Get ready in case we need to enqueue again.
|
|
Packit Service |
310c69 |
prepareCompletion(completion, addSlabJournalEntries,
|
|
Packit Service |
310c69 |
handleAddSlabJournalEntryError,
|
|
Packit Service |
310c69 |
completion->callbackThreadID, recovery);
|
|
Packit Service |
310c69 |
for (RecoveryPoint *recoveryPoint = &recovery->nextRecoveryPoint;
|
|
Packit Service |
310c69 |
beforeRecoveryPoint(recoveryPoint, &recovery->tailRecoveryPoint);
|
|
Packit Service |
310c69 |
advancePoints(recovery, journal->entriesPerBlock)) {
|
|
Packit Service |
310c69 |
RecoveryJournalEntry entry = getEntry(recovery, recoveryPoint);
|
|
Packit Service |
310c69 |
int result = validateRecoveryJournalEntry(vdo, &entry);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
enterReadOnlyMode(journal->readOnlyNotifier, result);
|
|
Packit Service |
310c69 |
finishCompletion(completion, result);
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (entry.mapping.pbn == ZERO_BLOCK) {
|
|
Packit Service |
310c69 |
continue;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
Slab *slab = getSlab(vdo->depot, entry.mapping.pbn);
|
|
Packit Service |
310c69 |
if (slab->allocator != recovery->allocator) {
|
|
Packit Service |
310c69 |
continue;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (!attemptReplayIntoSlabJournal(slab->journal, entry.mapping.pbn,
|
|
Packit Service |
310c69 |
entry.operation,
|
|
Packit Service |
310c69 |
&recovery->nextJournalPoint,
|
|
Packit Service |
310c69 |
completion)) {
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
recovery->entriesAddedToSlabJournals++;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
logInfo("Recreating missing journal entries for zone %u",
|
|
Packit Service |
310c69 |
recovery->allocator->zoneNumber);
|
|
Packit Service |
310c69 |
addSynthesizedEntries(completion);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**********************************************************************/
|
|
Packit Service |
310c69 |
void replayIntoSlabJournals(BlockAllocator *allocator,
|
|
Packit Service |
310c69 |
VDOCompletion *completion,
|
|
Packit Service |
310c69 |
void *context)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = context;
|
|
Packit Service |
310c69 |
assertOnPhysicalZoneThread(recovery->vdo, allocator->zoneNumber, __func__);
|
|
Packit Service |
310c69 |
if ((recovery->journalData == NULL) || isReplaying(recovery->vdo)) {
|
|
Packit Service |
310c69 |
// there's nothing to replay
|
|
Packit Service |
310c69 |
notifySlabJournalsAreRecovered(allocator, VDO_SUCCESS);
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
recovery->allocator = allocator;
|
|
Packit Service |
310c69 |
recovery->nextRecoveryPoint = (RecoveryPoint) {
|
|
Packit Service |
310c69 |
.sequenceNumber = recovery->slabJournalHead,
|
|
Packit Service |
310c69 |
.sectorCount = 1,
|
|
Packit Service |
310c69 |
.entryCount = 0,
|
|
Packit Service |
310c69 |
};
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
recovery->nextJournalPoint = (JournalPoint) {
|
|
Packit Service |
310c69 |
.sequenceNumber = recovery->slabJournalHead,
|
|
Packit Service |
310c69 |
.entryCount = 0,
|
|
Packit Service |
310c69 |
};
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
logInfo("Replaying entries into slab journals for zone %u",
|
|
Packit Service |
310c69 |
allocator->zoneNumber);
|
|
Packit Service |
310c69 |
completion->parent = recovery;
|
|
Packit Service |
310c69 |
addSlabJournalEntries(completion);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* A waiter callback to enqueue a MissingDecref on the queue for the physical
|
|
Packit Service |
310c69 |
* zone in which it will be applied.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* Implements WaiterCallback.
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void queueOnPhysicalZone(Waiter *waiter, void *context)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
MissingDecref *decref = asMissingDecref(waiter);
|
|
Packit Service |
310c69 |
DataLocation mapping = decref->penultimateMapping;
|
|
Packit Service |
310c69 |
if (isMappedLocation(&mapping)) {
|
|
Packit Service |
310c69 |
decref->recovery->logicalBlocksUsed--;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (mapping.pbn == ZERO_BLOCK) {
|
|
Packit Service |
310c69 |
// Decrefs of zero are not applied to slab journals.
|
|
Packit Service |
310c69 |
FREE(decref);
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
decref->slabJournal = getSlabJournal((SlabDepot *) context, mapping.pbn);
|
|
Packit Service |
310c69 |
ZoneCount zoneNumber = decref->slabJournal->slab->allocator->zoneNumber;
|
|
Packit Service |
310c69 |
enqueueMissingDecref(&decref->recovery->missingDecrefs[zoneNumber], decref);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Queue each missing decref on the slab journal to which it is to be applied
|
|
Packit Service |
310c69 |
* then load the slab depot. This callback is registered in
|
|
Packit Service |
310c69 |
* findSlabJournalEntries().
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The sub-task completion
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void applyToDepot(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = asRecoveryCompletion(completion->parent);
|
|
Packit Service |
310c69 |
assertOnAdminThread(recovery->vdo, __func__);
|
|
Packit Service |
310c69 |
prepareSubTask(recovery, finishRecoveringDepot, finishParentCallback,
|
|
Packit Service |
310c69 |
ZONE_TYPE_ADMIN);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
SlabDepot *depot = getSlabDepot(recovery->vdo);
|
|
Packit Service |
310c69 |
notifyAllWaiters(&recovery->missingDecrefs[0], queueOnPhysicalZone, depot);
|
|
Packit Service |
310c69 |
if (abortRecoveryOnError(recovery->completion.result, recovery)) {
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
loadSlabDepot(depot, ADMIN_STATE_LOADING_FOR_RECOVERY, completion, recovery);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Validate the location of the penultimate mapping for a MissingDecref. If it
|
|
Packit Service |
310c69 |
* is valid, enqueue it for the appropriate physical zone or account for it.
|
|
Packit Service |
310c69 |
* Otherwise, dispose of it and signal an error.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param decref The decref whose penultimate mapping has just been found
|
|
Packit Service |
310c69 |
* @param location The penultimate mapping
|
|
Packit Service |
310c69 |
* @param errorCode The error code to use if the location is invalid
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static int recordMissingDecref(MissingDecref *decref,
|
|
Packit Service |
310c69 |
DataLocation location,
|
|
Packit Service |
310c69 |
int errorCode)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = decref->recovery;
|
|
Packit Service |
310c69 |
recovery->incompleteDecrefCount--;
|
|
Packit Service |
310c69 |
if (isValidLocation(&location)
|
|
Packit Service |
310c69 |
&& isPhysicalDataBlock(recovery->vdo->depot, location.pbn)) {
|
|
Packit Service |
310c69 |
decref->penultimateMapping = location;
|
|
Packit Service |
310c69 |
decref->complete = true;
|
|
Packit Service |
310c69 |
return VDO_SUCCESS;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// The location was invalid
|
|
Packit Service |
310c69 |
enterReadOnlyMode(recovery->vdo->readOnlyNotifier, errorCode);
|
|
Packit Service |
310c69 |
setCompletionResult(&recovery->completion, errorCode);
|
|
Packit Service |
310c69 |
logErrorWithStringError(errorCode,
|
|
Packit Service |
310c69 |
"Invalid mapping for pbn %llu with state %u",
|
|
Packit Service |
310c69 |
location.pbn, location.state);
|
|
Packit Service |
310c69 |
return errorCode;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Find the block map slots with missing decrefs.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* To find the slots missing decrefs, we iterate through the journal in reverse
|
|
Packit Service |
310c69 |
* so we see decrefs before increfs; if we see an incref before its paired
|
|
Packit Service |
310c69 |
* decref, we instantly know this incref is missing its decref.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* Simultaneously, we attempt to determine the missing decref. If there is a
|
|
Packit Service |
310c69 |
* missing decref, and at least two increfs for that slot, we know we should
|
|
Packit Service |
310c69 |
* decref the PBN from the penultimate incref. Otherwise, there is only one
|
|
Packit Service |
310c69 |
* incref for that slot: we must synthesize the decref out of the block map
|
|
Packit Service |
310c69 |
* instead of the recovery journal.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param recovery The recovery completion
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @return VDO_SUCCESS or an error code
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
__attribute__((warn_unused_result))
|
|
Packit Service |
310c69 |
static int findMissingDecrefs(RecoveryCompletion *recovery)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
IntMap *slotEntryMap = recovery->slotEntryMap;
|
|
Packit Service |
310c69 |
// This placeholder decref is used to mark lbns for which we have observed a
|
|
Packit Service |
310c69 |
// decref but not the paired incref (going backwards through the journal).
|
|
Packit Service |
310c69 |
MissingDecref foundDecref;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// A buffer is allocated based on the number of incRef entries found, so use
|
|
Packit Service |
310c69 |
// the earliest head.
|
|
Packit Service |
310c69 |
SequenceNumber head = minSequenceNumber(recovery->blockMapHead,
|
|
Packit Service |
310c69 |
recovery->slabJournalHead);
|
|
Packit Service |
310c69 |
RecoveryPoint headPoint = {
|
|
Packit Service |
310c69 |
.sequenceNumber = head,
|
|
Packit Service |
310c69 |
.sectorCount = 1,
|
|
Packit Service |
310c69 |
.entryCount = 0,
|
|
Packit Service |
310c69 |
};
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// Set up for the first fake journal point that will be used for a
|
|
Packit Service |
310c69 |
// synthesized entry.
|
|
Packit Service |
310c69 |
recovery->nextSynthesizedJournalPoint = (JournalPoint) {
|
|
Packit Service |
310c69 |
.sequenceNumber = recovery->tail,
|
|
Packit Service |
310c69 |
.entryCount = recovery->vdo->recoveryJournal->entriesPerBlock,
|
|
Packit Service |
310c69 |
};
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
RecoveryPoint recoveryPoint = recovery->tailRecoveryPoint;
|
|
Packit Service |
310c69 |
while (beforeRecoveryPoint(&headPoint, &recoveryPoint)) {
|
|
Packit Service |
310c69 |
decrementRecoveryPoint(&recoveryPoint);
|
|
Packit Service |
310c69 |
RecoveryJournalEntry entry = getEntry(recovery, &recoveryPoint);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (!isIncrementOperation(entry.operation)) {
|
|
Packit Service |
310c69 |
// Observe that we've seen a decref before its incref, but only if
|
|
Packit Service |
310c69 |
// the IntMap does not contain an unpaired incref for this lbn.
|
|
Packit Service |
310c69 |
int result = intMapPut(slotEntryMap, slotAsNumber(entry.slot),
|
|
Packit Service |
310c69 |
&foundDecref, false, NULL);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
continue;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
recovery->increfCount++;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
MissingDecref *decref
|
|
Packit Service |
310c69 |
= intMapRemove(slotEntryMap, slotAsNumber(entry.slot));
|
|
Packit Service |
310c69 |
if (entry.operation == BLOCK_MAP_INCREMENT) {
|
|
Packit Service |
310c69 |
if (decref != NULL) {
|
|
Packit Service |
310c69 |
return logErrorWithStringError(VDO_CORRUPT_JOURNAL,
|
|
Packit Service |
310c69 |
"decref found for block map block %"
|
|
Packit Service |
310c69 |
PRIu64 " with state %u",
|
|
Packit Service |
310c69 |
entry.mapping.pbn, entry.mapping.state);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// There are no decrefs for block map pages, so they can't be missing.
|
|
Packit Service |
310c69 |
continue;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (decref == &foundDecref) {
|
|
Packit Service |
310c69 |
// This incref already had a decref in the intmap, so we know it is
|
|
Packit Service |
310c69 |
// not missing its decref.
|
|
Packit Service |
310c69 |
continue;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (decref == NULL) {
|
|
Packit Service |
310c69 |
// This incref is missing a decref. Add a missing decref object.
|
|
Packit Service |
310c69 |
int result = makeMissingDecref(recovery, entry, &decref);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
result = intMapPut(slotEntryMap, slotAsNumber(entry.slot), decref,
|
|
Packit Service |
310c69 |
false, NULL);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
continue;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/*
|
|
Packit Service |
310c69 |
* This MissingDecref was left here by an incref without a decref.
|
|
Packit Service |
310c69 |
* We now know what its penultimate mapping is, and all entries
|
|
Packit Service |
310c69 |
* before here in the journal are paired, decref before incref, so
|
|
Packit Service |
310c69 |
* we needn't remember it in the intmap any longer.
|
|
Packit Service |
310c69 |
*/
|
|
Packit Service |
310c69 |
int result = recordMissingDecref(decref, entry.mapping,
|
|
Packit Service |
310c69 |
VDO_CORRUPT_JOURNAL);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
return VDO_SUCCESS;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Process a fetched block map page for a missing decref. This callback is
|
|
Packit Service |
310c69 |
* registered in findSlabJournalEntries().
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The page completion which has just finished loading
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void processFetchedPage(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
MissingDecref *currentDecref = completion->parent;
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = currentDecref->recovery;
|
|
Packit Service |
310c69 |
assertOnLogicalZoneThread(recovery->vdo, 0, __func__);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
const BlockMapPage *page = dereferenceReadableVDOPage(completion);
|
|
Packit Service |
310c69 |
DataLocation location
|
|
Packit Service |
310c69 |
= unpackBlockMapEntry(&page->entries[currentDecref->slot.slot]);
|
|
Packit Service |
310c69 |
releaseVDOPageCompletion(completion);
|
|
Packit Service |
310c69 |
recordMissingDecref(currentDecref, location, VDO_BAD_MAPPING);
|
|
Packit Service |
310c69 |
if (recovery->incompleteDecrefCount == 0) {
|
|
Packit Service |
310c69 |
completeCompletion(&recovery->subTaskCompletion);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Handle an error fetching a block map page for a missing decref.
|
|
Packit Service |
310c69 |
* This error handler is registered in findSlabJournalEntries().
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The page completion which has just finished loading
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void handleFetchError(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
MissingDecref *decref = completion->parent;
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = decref->recovery;
|
|
Packit Service |
310c69 |
assertOnLogicalZoneThread(recovery->vdo, 0, __func__);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// If we got a VDO_OUT_OF_RANGE error, it is because the pbn we read from
|
|
Packit Service |
310c69 |
// the journal was bad, so convert the error code
|
|
Packit Service |
310c69 |
setCompletionResult(&recovery->subTaskCompletion,
|
|
Packit Service |
310c69 |
((completion->result == VDO_OUT_OF_RANGE)
|
|
Packit Service |
310c69 |
? VDO_CORRUPT_JOURNAL : completion->result));
|
|
Packit Service |
310c69 |
releaseVDOPageCompletion(completion);
|
|
Packit Service |
310c69 |
if (--recovery->incompleteDecrefCount == 0) {
|
|
Packit Service |
310c69 |
completeCompletion(&recovery->subTaskCompletion);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* The waiter callback to requeue a missing decref and launch its page fetch.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* Implements WaiterCallback.
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void launchFetch(Waiter *waiter, void *context)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
MissingDecref *decref = asMissingDecref(waiter);
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = decref->recovery;
|
|
Packit Service |
310c69 |
if (enqueueMissingDecref(&recovery->missingDecrefs[0], decref)
|
|
Packit Service |
310c69 |
!= VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (decref->complete) {
|
|
Packit Service |
310c69 |
// We've already found the mapping for this decref, no fetch needed.
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
BlockMapZone *zone = context;
|
|
Packit Service |
310c69 |
initVDOPageCompletion(&decref->pageCompletion, zone->pageCache,
|
|
Packit Service |
310c69 |
decref->slot.pbn, false, decref, processFetchedPage,
|
|
Packit Service |
310c69 |
handleFetchError);
|
|
Packit Service |
310c69 |
getVDOPageAsync(&decref->pageCompletion.completion);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Find all entries which need to be replayed into the slab journals.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The sub-task completion
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void findSlabJournalEntries(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = asRecoveryCompletion(completion->parent);
|
|
Packit Service |
310c69 |
VDO *vdo = recovery->vdo;
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// We need to be on logical zone 0's thread since we are going to use its
|
|
Packit Service |
310c69 |
// page cache.
|
|
Packit Service |
310c69 |
assertOnLogicalZoneThread(vdo, 0, __func__);
|
|
Packit Service |
310c69 |
int result = findMissingDecrefs(recovery);
|
|
Packit Service |
310c69 |
if (abortRecoveryOnError(result, recovery)) {
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
prepareSubTask(recovery, applyToDepot, finishParentCallback,
|
|
Packit Service |
310c69 |
ZONE_TYPE_ADMIN);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/*
|
|
Packit Service |
310c69 |
* Increment the incompleteDecrefCount so that the fetch callback can't
|
|
Packit Service |
310c69 |
* complete the sub-task while we are still processing the queue of missing
|
|
Packit Service |
310c69 |
* decrefs.
|
|
Packit Service |
310c69 |
*/
|
|
Packit Service |
310c69 |
if (recovery->incompleteDecrefCount++ > 0) {
|
|
Packit Service |
310c69 |
// Fetch block map pages to fill in the incomplete missing decrefs.
|
|
Packit Service |
310c69 |
notifyAllWaiters(&recovery->missingDecrefs[0], launchFetch,
|
|
Packit Service |
310c69 |
getBlockMapZone(getBlockMap(vdo), 0));
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (--recovery->incompleteDecrefCount == 0) {
|
|
Packit Service |
310c69 |
completeCompletion(completion);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Find the contiguous range of journal blocks.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param recovery The recovery completion
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @return true if there were valid journal blocks
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static bool findContiguousRange(RecoveryCompletion *recovery)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryJournal *journal = recovery->vdo->recoveryJournal;
|
|
Packit Service |
310c69 |
SequenceNumber head
|
|
Packit Service |
310c69 |
= minSequenceNumber(recovery->blockMapHead, recovery->slabJournalHead);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
bool foundEntries = false;
|
|
Packit Service |
310c69 |
for (SequenceNumber i = head; i <= recovery->highestTail; i++) {
|
|
Packit Service |
310c69 |
recovery->tail = i;
|
|
Packit Service |
310c69 |
recovery->tailRecoveryPoint = (RecoveryPoint) {
|
|
Packit Service |
310c69 |
.sequenceNumber = i,
|
|
Packit Service |
310c69 |
.sectorCount = 0,
|
|
Packit Service |
310c69 |
.entryCount = 0,
|
|
Packit Service |
310c69 |
};
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
PackedJournalHeader *packedHeader
|
|
Packit Service |
310c69 |
= getJournalBlockHeader(journal, recovery->journalData, i);
|
|
Packit Service |
310c69 |
RecoveryBlockHeader header;
|
|
Packit Service |
310c69 |
unpackRecoveryBlockHeader(packedHeader, &header);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (!isExactRecoveryJournalBlock(journal, &header, i)
|
|
Packit Service |
310c69 |
|| (header.entryCount > journal->entriesPerBlock)) {
|
|
Packit Service |
310c69 |
// A bad block header was found so this must be the end of the journal.
|
|
Packit Service |
310c69 |
break;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
JournalEntryCount blockEntries = header.entryCount;
|
|
Packit Service |
310c69 |
// Examine each sector in turn to determine the last valid sector.
|
|
Packit Service |
310c69 |
for (uint8_t j = 1; j < SECTORS_PER_BLOCK; j++) {
|
|
Packit Service |
310c69 |
PackedJournalSector *sector = getJournalBlockSector(packedHeader, j);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// A bad sector means that this block was torn.
|
|
Packit Service |
310c69 |
if (!isValidRecoveryJournalSector(&header, sector)) {
|
|
Packit Service |
310c69 |
break;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
JournalEntryCount sectorEntries = minBlock(sector->entryCount,
|
|
Packit Service |
310c69 |
blockEntries);
|
|
Packit Service |
310c69 |
if (sectorEntries > 0) {
|
|
Packit Service |
310c69 |
foundEntries = true;
|
|
Packit Service |
310c69 |
recovery->tailRecoveryPoint.sectorCount++;
|
|
Packit Service |
310c69 |
recovery->tailRecoveryPoint.entryCount = sectorEntries;
|
|
Packit Service |
310c69 |
blockEntries -= sectorEntries;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// If this sector is short, the later sectors can't matter.
|
|
Packit Service |
310c69 |
if ((sectorEntries < RECOVERY_JOURNAL_ENTRIES_PER_SECTOR)
|
|
Packit Service |
310c69 |
|| (blockEntries == 0)) {
|
|
Packit Service |
310c69 |
break;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// If this block was not filled, or if it tore, no later block can matter.
|
|
Packit Service |
310c69 |
if ((header.entryCount != journal->entriesPerBlock)
|
|
Packit Service |
310c69 |
|| (blockEntries > 0)) {
|
|
Packit Service |
310c69 |
break;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// Set the tail to the last valid tail block, if there is one.
|
|
Packit Service |
310c69 |
if (foundEntries && (recovery->tailRecoveryPoint.sectorCount == 0)) {
|
|
Packit Service |
310c69 |
recovery->tail--;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
return foundEntries;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Count the number of increment entries in the journal.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param recovery The recovery completion
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static int countIncrementEntries(RecoveryCompletion *recovery)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryPoint recoveryPoint = {
|
|
Packit Service |
310c69 |
.sequenceNumber = recovery->blockMapHead,
|
|
Packit Service |
310c69 |
.sectorCount = 1,
|
|
Packit Service |
310c69 |
.entryCount = 0,
|
|
Packit Service |
310c69 |
};
|
|
Packit Service |
310c69 |
while (beforeRecoveryPoint(&recoveryPoint, &recovery->tailRecoveryPoint)) {
|
|
Packit Service |
310c69 |
RecoveryJournalEntry entry = getEntry(recovery, &recoveryPoint);
|
|
Packit Service |
310c69 |
int result = validateRecoveryJournalEntry(recovery->vdo, &entry);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
enterReadOnlyMode(recovery->vdo->readOnlyNotifier, result);
|
|
Packit Service |
310c69 |
return result;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
if (isIncrementOperation(entry.operation)) {
|
|
Packit Service |
310c69 |
recovery->increfCount++;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
incrementRecoveryPoint(&recoveryPoint);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
return VDO_SUCCESS;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**
|
|
Packit Service |
310c69 |
* Determine the limits of the valid recovery journal and prepare to replay
|
|
Packit Service |
310c69 |
* into the slab journals and block map.
|
|
Packit Service |
310c69 |
*
|
|
Packit Service |
310c69 |
* @param completion The sub-task completion
|
|
Packit Service |
310c69 |
**/
|
|
Packit Service |
310c69 |
static void prepareToApplyJournalEntries(VDOCompletion *completion)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery = asRecoveryCompletion(completion->parent);
|
|
Packit Service |
310c69 |
VDO *vdo = recovery->vdo;
|
|
Packit Service |
310c69 |
RecoveryJournal *journal = vdo->recoveryJournal;
|
|
Packit Service |
310c69 |
logInfo("Finished reading recovery journal");
|
|
Packit Service |
310c69 |
bool foundEntries = findHeadAndTail(journal, recovery->journalData,
|
|
Packit Service |
310c69 |
&recovery->highestTail,
|
|
Packit Service |
310c69 |
&recovery->blockMapHead,
|
|
Packit Service |
310c69 |
&recovery->slabJournalHead);
|
|
Packit Service |
310c69 |
if (foundEntries) {
|
|
Packit Service |
310c69 |
foundEntries = findContiguousRange(recovery);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// Both reap heads must be behind the tail.
|
|
Packit Service |
310c69 |
if ((recovery->blockMapHead > recovery->tail)
|
|
Packit Service |
310c69 |
|| (recovery->slabJournalHead > recovery->tail)) {
|
|
Packit Service |
310c69 |
int result = logErrorWithStringError(VDO_CORRUPT_JOURNAL,
|
|
Packit Service |
310c69 |
"Journal tail too early. "
|
|
Packit Service |
310c69 |
"block map head: %" PRIu64
|
|
Packit Service |
310c69 |
", slab journal head: %" PRIu64
|
|
Packit Service |
310c69 |
", tail: %llu",
|
|
Packit Service |
310c69 |
recovery->blockMapHead,
|
|
Packit Service |
310c69 |
recovery->slabJournalHead,
|
|
Packit Service |
310c69 |
recovery->tail);
|
|
Packit Service |
310c69 |
finishCompletion(&recovery->completion, result);
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (!foundEntries) {
|
|
Packit Service |
310c69 |
// This message must be recognizable by VDOTest::RebuildBase.
|
|
Packit Service |
310c69 |
logInfo("Replaying 0 recovery entries into block map");
|
|
Packit Service |
310c69 |
// We still need to load the SlabDepot.
|
|
Packit Service |
310c69 |
FREE(recovery->journalData);
|
|
Packit Service |
310c69 |
recovery->journalData = NULL;
|
|
Packit Service |
310c69 |
prepareSubTask(recovery, finishParentCallback, finishParentCallback,
|
|
Packit Service |
310c69 |
ZONE_TYPE_ADMIN);
|
|
Packit Service |
310c69 |
loadSlabDepot(getSlabDepot(vdo), ADMIN_STATE_LOADING_FOR_RECOVERY,
|
|
Packit Service |
310c69 |
completion, recovery);
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
logInfo("Highest-numbered recovery journal block has sequence number"
|
|
Packit Service |
310c69 |
" %llu, and the highest-numbered usable block is %"
|
|
Packit Service |
310c69 |
PRIu64, recovery->highestTail, recovery->tail);
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
if (isReplaying(vdo)) {
|
|
Packit Service |
310c69 |
// We need to know how many entries the block map rebuild completion will
|
|
Packit Service |
310c69 |
// need to hold.
|
|
Packit Service |
310c69 |
int result = countIncrementEntries(recovery);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
finishCompletion(&recovery->completion, result);
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
// We need to access the block map from a logical zone.
|
|
Packit Service |
310c69 |
prepareSubTask(recovery, launchBlockMapRecovery, finishParentCallback,
|
|
Packit Service |
310c69 |
ZONE_TYPE_LOGICAL);
|
|
Packit Service |
310c69 |
loadSlabDepot(vdo->depot, ADMIN_STATE_LOADING_FOR_RECOVERY, completion,
|
|
Packit Service |
310c69 |
recovery);
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
int result = computeUsages(recovery);
|
|
Packit Service |
310c69 |
if (abortRecoveryOnError(result, recovery)) {
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
prepareSubTask(recovery, findSlabJournalEntries, finishParentCallback,
|
|
Packit Service |
310c69 |
ZONE_TYPE_LOGICAL);
|
|
Packit Service |
310c69 |
invokeCallback(completion);
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
/**********************************************************************/
|
|
Packit Service |
310c69 |
void launchRecovery(VDO *vdo, VDOCompletion *parent)
|
|
Packit Service |
310c69 |
{
|
|
Packit Service |
310c69 |
// Note: This message must be recognizable by Permabit::VDODeviceBase.
|
|
Packit Service |
310c69 |
logWarning("Device was dirty, rebuilding reference counts");
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
RecoveryCompletion *recovery;
|
|
Packit Service |
310c69 |
int result = makeRecoveryCompletion(vdo, &recovery);
|
|
Packit Service |
310c69 |
if (result != VDO_SUCCESS) {
|
|
Packit Service |
310c69 |
finishCompletion(parent, result);
|
|
Packit Service |
310c69 |
return;
|
|
Packit Service |
310c69 |
}
|
|
Packit Service |
310c69 |
|
|
Packit Service |
310c69 |
VDOCompletion *completion = &recovery->completion;
|
|
Packit Service |
310c69 |
prepareCompletion(completion, finishRecovery, abortRecovery,
|
|
Packit Service |
310c69 |
parent->callbackThreadID, parent);
|
|
Packit Service |
310c69 |
prepareSubTask(recovery, prepareToApplyJournalEntries, finishParentCallback,
|
|
Packit Service |
310c69 |
ZONE_TYPE_ADMIN);
|
|
Packit Service |
310c69 |
loadJournalAsync(vdo->recoveryJournal, &recovery->subTaskCompletion,
|
|
Packit Service |
310c69 |
&recovery->journalData);
|
|
Packit Service |
310c69 |
}
|