/* * Copyright (c) 2020 Red Hat, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/slabScrubber.c#6 $ */ #include "slabScrubberInternals.h" #include "logger.h" #include "memoryAlloc.h" #include "adminState.h" #include "blockAllocator.h" #include "constants.h" #include "readOnlyNotifier.h" #include "recoveryJournal.h" #include "refCounts.h" #include "refCountsInternals.h" #include "slab.h" #include "slabJournalInternals.h" /** * Allocate the buffer and extent used for reading the slab journal when * scrubbing a slab. * * @param scrubber The slab scrubber for which to allocate * @param layer The physical layer on which the scrubber resides * @param slabJournalSize The size of a slab journal * * @return VDO_SUCCESS or an error **/ __attribute__((warn_unused_result)) static int allocateExtentAndBuffer(SlabScrubber *scrubber, PhysicalLayer *layer, BlockCount slabJournalSize) { size_t bufferSize = VDO_BLOCK_SIZE * slabJournalSize; int result = ALLOCATE(bufferSize, char, __func__, &scrubber->journalData); if (result != VDO_SUCCESS) { return result; } return createExtent(layer, VIO_TYPE_SLAB_JOURNAL, VIO_PRIORITY_METADATA, slabJournalSize, scrubber->journalData, &scrubber->extent); } /**********************************************************************/ int makeSlabScrubber(PhysicalLayer *layer, BlockCount slabJournalSize, ReadOnlyNotifier *readOnlyNotifier, SlabScrubber **scrubberPtr) { SlabScrubber *scrubber; int result = ALLOCATE(1, SlabScrubber, __func__, &scrubber); if (result != VDO_SUCCESS) { return result; } result = allocateExtentAndBuffer(scrubber, layer, slabJournalSize); if (result != VDO_SUCCESS) { freeSlabScrubber(&scrubber); return result; } initializeCompletion(&scrubber->completion, SLAB_SCRUBBER_COMPLETION, layer); initializeRing(&scrubber->highPrioritySlabs); initializeRing(&scrubber->slabs); scrubber->readOnlyNotifier = readOnlyNotifier; scrubber->adminState.state = ADMIN_STATE_SUSPENDED; *scrubberPtr = scrubber; return VDO_SUCCESS; } /** * Free the extent and buffer used for reading slab journals. * * @param scrubber The scrubber **/ static void freeExtentAndBuffer(SlabScrubber *scrubber) { freeExtent(&scrubber->extent); if (scrubber->journalData != NULL) { FREE(scrubber->journalData); scrubber->journalData = NULL; } } /**********************************************************************/ void freeSlabScrubber(SlabScrubber **scrubberPtr) { if (*scrubberPtr == NULL) { return; } SlabScrubber *scrubber = *scrubberPtr; freeExtentAndBuffer(scrubber); FREE(scrubber); *scrubberPtr = NULL; } /** * Get the next slab to scrub. * * @param scrubber The slab scrubber * * @return The next slab to scrub or NULL if there are none **/ static Slab *getNextSlab(SlabScrubber *scrubber) { if (!isRingEmpty(&scrubber->highPrioritySlabs)) { return slabFromRingNode(scrubber->highPrioritySlabs.next); } if (!isRingEmpty(&scrubber->slabs)) { return slabFromRingNode(scrubber->slabs.next); } return NULL; } /**********************************************************************/ bool hasSlabsToScrub(SlabScrubber *scrubber) { return (getNextSlab(scrubber) != NULL); } /**********************************************************************/ SlabCount getScrubberSlabCount(const SlabScrubber *scrubber) { return relaxedLoad64(&scrubber->slabCount); } /**********************************************************************/ void registerSlabForScrubbing(SlabScrubber *scrubber, Slab *slab, bool highPriority) { ASSERT_LOG_ONLY((slab->status != SLAB_REBUILT), "slab to be scrubbed is unrecovered"); if (slab->status != SLAB_REQUIRES_SCRUBBING) { return; } unspliceRingNode(&slab->ringNode); if (!slab->wasQueuedForScrubbing) { relaxedAdd64(&scrubber->slabCount, 1); slab->wasQueuedForScrubbing = true; } if (highPriority) { slab->status = SLAB_REQUIRES_HIGH_PRIORITY_SCRUBBING; pushRingNode(&scrubber->highPrioritySlabs, &slab->ringNode); return; } pushRingNode(&scrubber->slabs, &slab->ringNode); } /** * Stop scrubbing, either because there are no more slabs to scrub or because * there's been an error. * * @param scrubber The scrubber **/ static void finishScrubbing(SlabScrubber *scrubber) { if (!hasSlabsToScrub(scrubber)) { freeExtentAndBuffer(scrubber); } // Inform whoever is waiting that scrubbing has completed. completeCompletion(&scrubber->completion); bool notify = hasWaiters(&scrubber->waiters); // Note that the scrubber has stopped, and inform anyone who might be waiting // for that to happen. if (!finishDraining(&scrubber->adminState)) { scrubber->adminState.state = ADMIN_STATE_SUSPENDED; } /* * We can't notify waiters until after we've finished draining or they'll * just requeue. Fortunately if there were waiters, we can't have been freed * yet. */ if (notify) { notifyAllWaiters(&scrubber->waiters, NULL, NULL); } } /**********************************************************************/ static void scrubNextSlab(SlabScrubber *scrubber); /** * Notify the scrubber that a slab has been scrubbed. This callback is * registered in applyJournalEntries(). * * @param completion The slab rebuild completion **/ static void slabScrubbed(VDOCompletion *completion) { SlabScrubber *scrubber = completion->parent; finishScrubbingSlab(scrubber->slab); relaxedAdd64(&scrubber->slabCount, -1); scrubNextSlab(scrubber); } /** * Abort scrubbing due to an error. * * @param scrubber The slab scrubber * @param result The error **/ static void abortScrubbing(SlabScrubber *scrubber, int result) { enterReadOnlyMode(scrubber->readOnlyNotifier, result); setCompletionResult(&scrubber->completion, result); scrubNextSlab(scrubber); } /** * Handle errors while rebuilding a slab. * * @param completion The slab rebuild completion **/ static void handleScrubberError(VDOCompletion *completion) { abortScrubbing(completion->parent, completion->result); } /** * Apply all the entries in a block to the reference counts. * * @param block A block with entries to apply * @param entryCount The number of entries to apply * @param blockNumber The sequence number of the block * @param slab The slab to apply the entries to * * @return VDO_SUCCESS or an error code **/ static int applyBlockEntries(PackedSlabJournalBlock *block, JournalEntryCount entryCount, SequenceNumber blockNumber, Slab *slab) { JournalPoint entryPoint = { .sequenceNumber = blockNumber, .entryCount = 0, }; SlabBlockNumber maxSBN = slab->end - slab->start; while (entryPoint.entryCount < entryCount) { SlabJournalEntry entry = decodeSlabJournalEntry(block, entryPoint.entryCount); if (entry.sbn > maxSBN) { // This entry is out of bounds. return logErrorWithStringError(VDO_CORRUPT_JOURNAL, "Slab journal entry" " (%llu, %u) had invalid offset" " %u in slab (size %u blocks)", blockNumber, entryPoint.entryCount, entry.sbn, maxSBN); } int result = replayReferenceCountChange(slab->referenceCounts, &entryPoint, entry); if (result != VDO_SUCCESS) { logErrorWithStringError(result, "Slab journal entry (%llu, %u)" " (%s of offset %" PRIu32 ") could not be" " applied in slab %u", blockNumber, entryPoint.entryCount, getJournalOperationName(entry.operation), entry.sbn, slab->slabNumber); return result; } entryPoint.entryCount++; } return VDO_SUCCESS; } /** * Find the relevant extent of the slab journal and apply all valid entries. * This is a callback registered in startScrubbing(). * * @param completion The metadata read extent completion **/ static void applyJournalEntries(VDOCompletion *completion) { SlabScrubber *scrubber = completion->parent; Slab *slab = scrubber->slab; SlabJournal *journal = slab->journal; RefCounts *referenceCounts = slab->referenceCounts; // Find the boundaries of the useful part of the journal. SequenceNumber tail = journal->tail; TailBlockOffset endIndex = getSlabJournalBlockOffset(journal, tail - 1); char *endData = scrubber->journalData + (endIndex * VDO_BLOCK_SIZE); PackedSlabJournalBlock *endBlock = (PackedSlabJournalBlock *) endData; SequenceNumber head = getUInt64LE(endBlock->header.fields.head); TailBlockOffset headIndex = getSlabJournalBlockOffset(journal, head); BlockCount index = headIndex; JournalPoint refCountsPoint = referenceCounts->slabJournalPoint; JournalPoint lastEntryApplied = refCountsPoint; for (SequenceNumber sequence = head; sequence < tail; sequence++) { char *blockData = scrubber->journalData + (index * VDO_BLOCK_SIZE); PackedSlabJournalBlock *block = (PackedSlabJournalBlock *) blockData; SlabJournalBlockHeader header; unpackSlabJournalBlockHeader(&block->header, &header); if ((header.nonce != slab->allocator->nonce) || (header.metadataType != VDO_METADATA_SLAB_JOURNAL) || (header.sequenceNumber != sequence) || (header.entryCount > journal->entriesPerBlock) || (header.hasBlockMapIncrements && (header.entryCount > journal->fullEntriesPerBlock))) { // The block is not what we expect it to be. logError("Slab journal block for slab %u was invalid", slab->slabNumber); abortScrubbing(scrubber, VDO_CORRUPT_JOURNAL); return; } int result = applyBlockEntries(block, header.entryCount, sequence, slab); if (result != VDO_SUCCESS) { abortScrubbing(scrubber, result); return; } lastEntryApplied.sequenceNumber = sequence; lastEntryApplied.entryCount = header.entryCount - 1; index++; if (index == journal->size) { index = 0; } } // At the end of rebuild, the refCounts should be accurate to the end // of the journal we just applied. int result = ASSERT(!beforeJournalPoint(&lastEntryApplied, &refCountsPoint), "Refcounts are not more accurate than the slab journal"); if (result != VDO_SUCCESS) { abortScrubbing(scrubber, result); return; } // Save out the rebuilt reference blocks. prepareCompletion(completion, slabScrubbed, handleScrubberError, completion->callbackThreadID, scrubber); startSlabAction(slab, ADMIN_STATE_SAVE_FOR_SCRUBBING, completion); } /** * Read the current slab's journal from disk now that it has been flushed. * This callback is registered in scrubNextSlab(). * * @param completion The scrubber's extent completion **/ static void startScrubbing(VDOCompletion *completion) { SlabScrubber *scrubber = completion->parent; Slab *slab = scrubber->slab; if (getSummarizedCleanliness(slab->allocator->summary, slab->slabNumber)) { slabScrubbed(completion); return; } prepareCompletion(&scrubber->extent->completion, applyJournalEntries, handleScrubberError, completion->callbackThreadID, completion->parent); readMetadataExtent(scrubber->extent, slab->journalOrigin); } /** * Scrub the next slab if there is one. * * @param scrubber The scrubber **/ static void scrubNextSlab(SlabScrubber *scrubber) { // Note: this notify call is always safe only because scrubbing can only // be started when the VDO is quiescent. notifyAllWaiters(&scrubber->waiters, NULL, NULL); if (isReadOnly(scrubber->readOnlyNotifier)) { setCompletionResult(&scrubber->completion, VDO_READ_ONLY); finishScrubbing(scrubber); return; } Slab *slab = getNextSlab(scrubber); if ((slab == NULL) || (scrubber->highPriorityOnly && isRingEmpty(&scrubber->highPrioritySlabs))) { scrubber->highPriorityOnly = false; finishScrubbing(scrubber); return; } if (finishDraining(&scrubber->adminState)) { return; } unspliceRingNode(&slab->ringNode); scrubber->slab = slab; VDOCompletion *completion = extentAsCompletion(scrubber->extent); prepareCompletion(completion, startScrubbing, handleScrubberError, scrubber->completion.callbackThreadID, scrubber); startSlabAction(slab, ADMIN_STATE_SCRUBBING, completion); } /**********************************************************************/ void scrubSlabs(SlabScrubber *scrubber, void *parent, VDOAction *callback, VDOAction *errorHandler) { resumeIfQuiescent(&scrubber->adminState); ThreadID threadID = getCallbackThreadID(); prepareCompletion(&scrubber->completion, callback, errorHandler, threadID, parent); if (!hasSlabsToScrub(scrubber)) { finishScrubbing(scrubber); return; } scrubNextSlab(scrubber); } /**********************************************************************/ void scrubHighPrioritySlabs(SlabScrubber *scrubber, bool scrubAtLeastOne, VDOCompletion *parent, VDOAction *callback, VDOAction *errorHandler) { if (scrubAtLeastOne && isRingEmpty(&scrubber->highPrioritySlabs)) { Slab *slab = getNextSlab(scrubber); if (slab != NULL) { registerSlabForScrubbing(scrubber, slab, true); } } scrubber->highPriorityOnly = true; scrubSlabs(scrubber, parent, callback, errorHandler); } /**********************************************************************/ void stopScrubbing(SlabScrubber *scrubber, VDOCompletion *parent) { if (isQuiescent(&scrubber->adminState)) { completeCompletion(parent); } else { startDraining(&scrubber->adminState, ADMIN_STATE_SUSPENDING, parent, NULL); } } /**********************************************************************/ void resumeScrubbing(SlabScrubber *scrubber, VDOCompletion *parent) { if (!hasSlabsToScrub(scrubber)) { completeCompletion(parent); return; } int result = resumeIfQuiescent(&scrubber->adminState); if (result != VDO_SUCCESS) { finishCompletion(parent, result); return; } scrubNextSlab(scrubber); completeCompletion(parent); } /**********************************************************************/ int enqueueCleanSlabWaiter(SlabScrubber *scrubber, Waiter *waiter) { if (isReadOnly(scrubber->readOnlyNotifier)) { return VDO_READ_ONLY; } if (isQuiescent(&scrubber->adminState)) { return VDO_NO_SPACE; } return enqueueWaiter(&scrubber->waiters, waiter); } /**********************************************************************/ void dumpSlabScrubber(const SlabScrubber *scrubber) { logInfo("slabScrubber slabCount %u waiters %zu %s%s", getScrubberSlabCount(scrubber), countWaiters(&scrubber->waiters), getAdminStateName(&scrubber->adminState), scrubber->highPriorityOnly ? ", highPriorityOnly " : ""); }