/*
* Copyright (c) 2020 Red Hat, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*
* $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/slab.c#9 $
*/
#include "slab.h"
#include "logger.h"
#include "memoryAlloc.h"
#include "adminState.h"
#include "blockAllocatorInternals.h"
#include "completion.h"
#include "constants.h"
#include "numUtils.h"
#include "pbnLock.h"
#include "recoveryJournal.h"
#include "refCounts.h"
#include "slabDepot.h"
#include "slabJournal.h"
#include "slabJournalInternals.h"
#include "slabSummary.h"
/**********************************************************************/
int configureSlab(BlockCount slabSize,
BlockCount slabJournalBlocks,
SlabConfig *slabConfig)
{
if (slabJournalBlocks >= slabSize) {
return VDO_BAD_CONFIGURATION;
}
/*
* This calculation should technically be a recurrence, but the total number
* of metadata blocks is currently less than a single block of refCounts, so
* we'd gain at most one data block in each slab with more iteration.
*/
BlockCount refBlocks
= getSavedReferenceCountSize(slabSize - slabJournalBlocks);
BlockCount metaBlocks = (refBlocks + slabJournalBlocks);
// Make sure test code hasn't configured slabs to be too small.
if (metaBlocks >= slabSize) {
return VDO_BAD_CONFIGURATION;
}
/*
* If the slab size is very small, assume this must be a unit test and
* override the number of data blocks to be a power of two (wasting blocks
* in the slab). Many tests need their dataBlocks fields to be the exact
* capacity of the configured volume, and that used to fall out since they
* use a power of two for the number of data blocks, the slab size was a
* power of two, and every block in a slab was a data block.
*
* XXX Try to figure out some way of structuring testParameters and unit
* tests so this hack isn't needed without having to edit several unit tests
* every time the metadata size changes by one block.
*/
BlockCount dataBlocks = slabSize - metaBlocks;
if ((slabSize < 1024) && !isPowerOfTwo(dataBlocks)) {
dataBlocks = ((BlockCount) 1 << logBaseTwo(dataBlocks));
}
/*
* Configure the slab journal thresholds. The flush threshold is 168 of 224
* blocks in production, or 3/4ths, so we use this ratio for all sizes.
*/
BlockCount flushingThreshold = ((slabJournalBlocks * 3) + 3) / 4;
/*
* The blocking threshold should be far enough from the the flushing
* threshold to not produce delays, but far enough from the end of the
* journal to allow multiple successive recovery failures.
*/
BlockCount remaining = slabJournalBlocks - flushingThreshold;
BlockCount blockingThreshold = flushingThreshold + ((remaining * 5) / 7);
/*
* The scrubbing threshold should be at least 2048 entries before the end of
* the journal.
*/
BlockCount minimalExtraSpace
= 1 + (MAXIMUM_USER_VIOS / SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK);
BlockCount scrubbingThreshold = blockingThreshold;
if (slabJournalBlocks > minimalExtraSpace) {
scrubbingThreshold = slabJournalBlocks - minimalExtraSpace;
}
if (blockingThreshold > scrubbingThreshold) {
blockingThreshold = scrubbingThreshold;
}
*slabConfig = (SlabConfig) {
.slabBlocks = slabSize,
.dataBlocks = dataBlocks,
.referenceCountBlocks = refBlocks,
.slabJournalBlocks = slabJournalBlocks,
.slabJournalFlushingThreshold = flushingThreshold,
.slabJournalBlockingThreshold = blockingThreshold,
.slabJournalScrubbingThreshold = scrubbingThreshold
};
return VDO_SUCCESS;
}
/**********************************************************************/
PhysicalBlockNumber getSlabJournalStartBlock(const SlabConfig *slabConfig,
PhysicalBlockNumber origin)
{
return origin + slabConfig->dataBlocks + slabConfig->referenceCountBlocks;
}
/**********************************************************************/
int makeSlab(PhysicalBlockNumber slabOrigin,
BlockAllocator *allocator,
PhysicalBlockNumber translation,
RecoveryJournal *recoveryJournal,
SlabCount slabNumber,
bool isNew,
Slab **slabPtr)
{
Slab *slab;
int result = ALLOCATE(1, Slab, __func__, &slab);
if (result != VDO_SUCCESS) {
return result;
}
const SlabConfig *slabConfig = getSlabConfig(allocator->depot);
slab->allocator = allocator;
slab->start = slabOrigin;
slab->end = slab->start + slabConfig->slabBlocks;
slab->slabNumber = slabNumber;
initializeRing(&slab->ringNode);
slab->refCountsOrigin = slabOrigin + slabConfig->dataBlocks + translation;
slab->journalOrigin = (getSlabJournalStartBlock(slabConfig, slabOrigin)
+ translation);
result = makeSlabJournal(allocator, slab, recoveryJournal, &slab->journal);
if (result != VDO_SUCCESS) {
freeSlab(&slab);
return result;
}
if (isNew) {
slab->state.state = ADMIN_STATE_NEW;
result = allocateRefCountsForSlab(slab);
if (result != VDO_SUCCESS) {
freeSlab(&slab);
return result;
}
}
*slabPtr = slab;
return VDO_SUCCESS;
}
/**********************************************************************/
int allocateRefCountsForSlab(Slab *slab)
{
BlockAllocator *allocator = slab->allocator;
const SlabConfig *slabConfig = getSlabConfig(allocator->depot);
int result = ASSERT(slab->referenceCounts == NULL,
"Slab %u doesn't allocate refcounts twice",
slab->slabNumber);
if (result != VDO_SUCCESS) {
return result;
}
return makeRefCounts(slabConfig->dataBlocks, slab, slab->refCountsOrigin,
allocator->readOnlyNotifier, &slab->referenceCounts);
}
/**********************************************************************/
void freeSlab(Slab **slabPtr)
{
Slab *slab = *slabPtr;
if (slab == NULL) {
return;
}
unspliceRingNode(&slab->ringNode);
freeSlabJournal(&slab->journal);
freeRefCounts(&slab->referenceCounts);
FREE(slab);
*slabPtr = NULL;
}
/**********************************************************************/
ZoneCount getSlabZoneNumber(Slab *slab)
{
return slab->allocator->zoneNumber;
}
/**********************************************************************/
void markSlabReplaying(Slab *slab)
{
if (slab->status == SLAB_REBUILT) {
slab->status = SLAB_REPLAYING;
}
}
/**********************************************************************/
void markSlabUnrecovered(Slab *slab)
{
slab->status = SLAB_REQUIRES_SCRUBBING;
}
/**********************************************************************/
BlockCount getSlabFreeBlockCount(const Slab *slab)
{
return getUnreferencedBlockCount(slab->referenceCounts);
}
/**********************************************************************/
int modifySlabReferenceCount(Slab *slab,
const JournalPoint *journalPoint,
ReferenceOperation operation)
{
if (slab == NULL) {
return VDO_SUCCESS;
}
/*
* If the slab is unrecovered, preserve the refCount state and let scrubbing
* correct the refCount. Note that the slab journal has already captured all
* refCount updates.
*/
if (isUnrecoveredSlab(slab)) {
SequenceNumber entryLock = journalPoint->sequenceNumber;
adjustSlabJournalBlockReference(slab->journal, entryLock, -1);
return VDO_SUCCESS;
}
bool freeStatusChanged;
int result = adjustReferenceCount(slab->referenceCounts, operation,
journalPoint, &freeStatusChanged);
if (result != VDO_SUCCESS) {
return result;
}
if (freeStatusChanged) {
adjustFreeBlockCount(slab, !isIncrementOperation(operation.type));
}
return VDO_SUCCESS;
}
/**********************************************************************/
int acquireProvisionalReference(Slab *slab,
PhysicalBlockNumber pbn,
PBNLock *lock)
{
if (hasProvisionalReference(lock)) {
return VDO_SUCCESS;
}
int result = provisionallyReferenceBlock(slab->referenceCounts, pbn, lock);
if (result != VDO_SUCCESS) {
return result;
}
if (hasProvisionalReference(lock)) {
adjustFreeBlockCount(slab, false);
}
return VDO_SUCCESS;
}
/**********************************************************************/
int slabBlockNumberFromPBN(Slab *slab,
PhysicalBlockNumber physicalBlockNumber,
SlabBlockNumber *slabBlockNumberPtr)
{
if (physicalBlockNumber < slab->start) {
return VDO_OUT_OF_RANGE;
}
uint64_t slabBlockNumber = physicalBlockNumber - slab->start;
if (slabBlockNumber >= getSlabConfig(slab->allocator->depot)->dataBlocks) {
return VDO_OUT_OF_RANGE;
}
*slabBlockNumberPtr = slabBlockNumber;
return VDO_SUCCESS;
}
/**********************************************************************/
bool shouldSaveFullyBuiltSlab(const Slab *slab)
{
// Write out the refCounts if the slab has written them before, or it has
// any non-zero reference counts, or there are any slab journal blocks.
BlockCount dataBlocks = getSlabConfig(slab->allocator->depot)->dataBlocks;
return (mustLoadRefCounts(slab->allocator->summary, slab->slabNumber)
|| (getSlabFreeBlockCount(slab) != dataBlocks)
|| !isSlabJournalBlank(slab->journal));
}
/**
* Initiate a slab action.
*
* Implements AdminInitiator.
**/
static void initiateSlabAction(AdminState *state)
{
Slab *slab = container_of(state, Slab, state);
if (isDraining(state)) {
if (state->state == ADMIN_STATE_SCRUBBING) {
slab->status = SLAB_REBUILDING;
}
drainSlabJournal(slab->journal);
if (slab->referenceCounts != NULL) {
drainRefCounts(slab->referenceCounts);
}
checkIfSlabDrained(slab);
return;
}
if (isLoading(state)) {
decodeSlabJournal(slab->journal);
return;
}
if (isResuming(state)) {
queueSlab(slab);
finishResuming(state);
return;
}
finishOperationWithResult(state, VDO_INVALID_ADMIN_STATE);
}
/**********************************************************************/
void startSlabAction(Slab *slab,
AdminStateCode operation,
VDOCompletion *parent)
{
startOperationWithWaiter(&slab->state, operation, parent,
initiateSlabAction);
}
/**********************************************************************/
void notifySlabJournalIsLoaded(Slab *slab, int result)
{
if ((result == VDO_SUCCESS) && isCleanLoad(&slab->state)) {
// Since this is a normal or new load, we don't need the memory to read and
// process the recovery journal, so we can allocate reference counts now.
result = allocateRefCountsForSlab(slab);
}
finishLoadingWithResult(&slab->state, result);
}
/**********************************************************************/
bool isSlabOpen(Slab *slab)
{
return (!isQuiescing(&slab->state) && !isQuiescent(&slab->state));
}
/**********************************************************************/
bool isSlabDraining(Slab *slab)
{
return isDraining(&slab->state);
}
/**********************************************************************/
void checkIfSlabDrained(Slab *slab)
{
if (isDraining(&slab->state)
&& !isSlabJournalActive(slab->journal)
&& ((slab->referenceCounts == NULL)
|| !areRefCountsActive(slab->referenceCounts))) {
finishDrainingWithResult(&slab->state,
(isReadOnly(slab->allocator->readOnlyNotifier)
? VDO_READ_ONLY : VDO_SUCCESS));
}
}
/**********************************************************************/
void notifySlabJournalIsDrained(Slab *slab, int result)
{
if (slab->referenceCounts == NULL) {
// This can happen when shutting down a VDO that was in read-only mode when
// loaded.
notifyRefCountsAreDrained(slab, result);
return;
}
setOperationResult(&slab->state, result);
drainRefCounts(slab->referenceCounts);
}
/**********************************************************************/
void notifyRefCountsAreDrained(Slab *slab, int result)
{
finishDrainingWithResult(&slab->state, result);
}
/**********************************************************************/
bool isSlabResuming(Slab *slab)
{
return isResuming(&slab->state);
}
/**********************************************************************/
void finishScrubbingSlab(Slab *slab)
{
slab->status = SLAB_REBUILT;
queueSlab(slab);
reopenSlabJournal(slab->journal);
}
/**********************************************************************/
static const char *statusToString(SlabRebuildStatus status)
{
switch (status) {
case SLAB_REBUILT:
return "REBUILT";
case SLAB_REQUIRES_SCRUBBING:
return "SCRUBBING";
case SLAB_REQUIRES_HIGH_PRIORITY_SCRUBBING:
return "PRIORITY_SCRUBBING";
case SLAB_REBUILDING:
return "REBUILDING";
case SLAB_REPLAYING:
return "REPLAYING";
default:
return "UNKNOWN";
}
}
/**********************************************************************/
void dumpSlab(const Slab *slab)
{
if (slab->referenceCounts != NULL) {
// Terse because there are a lot of slabs to dump and syslog is lossy.
logInfo("slab %u: P%u, %llu free",
slab->slabNumber, slab->priority, getSlabFreeBlockCount(slab));
} else {
logInfo("slab %u: status %s", slab->slabNumber,
statusToString(slab->status));
}
dumpSlabJournal(slab->journal);
if (slab->referenceCounts != NULL) {
dumpRefCounts(slab->referenceCounts);
} else {
logInfo("refCounts is null");
}
}