/* * Copyright (c) 2020 Red Hat, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/blockAllocator.c#22 $ */ #include "blockAllocatorInternals.h" #include "logger.h" #include "memoryAlloc.h" #include "adminState.h" #include "heap.h" #include "numUtils.h" #include "priorityTable.h" #include "readOnlyNotifier.h" #include "refCounts.h" #include "slab.h" #include "slabDepotInternals.h" #include "slabIterator.h" #include "slabJournalEraser.h" #include "slabJournalInternals.h" #include "slabScrubber.h" #include "slabSummary.h" #include "vdoRecovery.h" #include "vio.h" #include "vioPool.h" /** * Assert that a block allocator function was called from the correct thread. * * @param threadID The allocator's thread id * @param functionName The name of the function **/ static inline void assertOnAllocatorThread(ThreadID threadID, const char *functionName) { ASSERT_LOG_ONLY((getCallbackThreadID() == threadID), "%s called on correct thread", functionName); } /** * Get the priority for a slab in the allocator's slab queue. Slabs are * essentially prioritized by an approximation of the number of free blocks in * the slab so slabs with lots of free blocks with be opened for allocation * before slabs that have few free blocks. * * @param slab The slab whose queue priority is desired * * @return the queue priority of the slab **/ static unsigned int calculateSlabPriority(Slab *slab) { BlockCount freeBlocks = getSlabFreeBlockCount(slab); // Slabs that are completely full must be the only ones with the lowest // priority: zero. if (freeBlocks == 0) { return 0; } /* * Slabs that have never been opened (empty, newly initialized, never been * written to) have lower priority than previously opened slabs that have a * signficant number of free blocks. This ranking causes VDO to avoid * writing physical blocks for the first time until there are very few free * blocks that have been previously written to. That policy makes VDO a * better client of any underlying storage that is thinly-provisioned * [VDOSTORY-123]. */ unsigned int unopenedSlabPriority = slab->allocator->unopenedSlabPriority; if (isSlabJournalBlank(slab->journal)) { return unopenedSlabPriority; } /* * For all other slabs, the priority is derived from the logarithm of the * number of free blocks. Slabs with the same order of magnitude of free * blocks have the same priority. With 2^23 blocks, the priority will range * from 1 to 25. The reserved unopenedSlabPriority divides the range and is * skipped by the logarithmic mapping. */ unsigned int priority = (1 + logBaseTwo(freeBlocks)); return ((priority < unopenedSlabPriority) ? priority : priority + 1); } /** * Add a slab to the priority queue of slabs available for allocation. * * @param slab The slab to prioritize **/ static void prioritizeSlab(Slab *slab) { ASSERT_LOG_ONLY(isRingEmpty(&slab->ringNode), "a slab must not already be on a ring when prioritizing"); slab->priority = calculateSlabPriority(slab); priorityTableEnqueue(slab->allocator->prioritizedSlabs, slab->priority, &slab->ringNode); } /**********************************************************************/ void registerSlabWithAllocator(BlockAllocator *allocator, Slab *slab) { allocator->slabCount++; allocator->lastSlab = slab->slabNumber; } /** * Get an iterator over all the slabs in the allocator. * * @param allocator The allocator * * @return An iterator over the allocator's slabs **/ static SlabIterator getSlabIterator(const BlockAllocator *allocator) { return iterateSlabs(allocator->depot->slabs, allocator->lastSlab, allocator->zoneNumber, allocator->depot->zoneCount); } /** * Notify a block allocator that the VDO has entered read-only mode. * * Implements ReadOnlyNotification. * * @param listener The block allocator * @param parent The completion to notify in order to acknowledge the * notification **/ static void notifyBlockAllocatorOfReadOnlyMode(void *listener, VDOCompletion *parent) { BlockAllocator *allocator = listener; assertOnAllocatorThread(allocator->threadID, __func__); SlabIterator iterator = getSlabIterator(allocator); while (hasNextSlab(&iterator)) { Slab *slab = nextSlab(&iterator); abortSlabJournalWaiters(slab->journal); } completeCompletion(parent); } /**********************************************************************/ int makeAllocatorPoolVIOs(PhysicalLayer *layer, void *parent, void *buffer, VIO **vioPtr) { return createVIO(layer, VIO_TYPE_SLAB_JOURNAL, VIO_PRIORITY_METADATA, parent, buffer, vioPtr); } /** * Allocate those component of the block allocator which are needed only at * load time, not at format time. * * @param allocator The allocator * @param layer The physical layer below this allocator * @param vioPoolSize The VIO pool size * * @return VDO_SUCCESS or an error **/ static int allocateComponents(BlockAllocator *allocator, PhysicalLayer *layer, BlockCount vioPoolSize) { /* * If createVIO is NULL, the block allocator is only being used to format * or audit the VDO. These only require the SuperBlock component, so we can * just skip allocating all the memory needed for runtime components. */ if (layer->createMetadataVIO == NULL) { return VDO_SUCCESS; } int result = registerReadOnlyListener(allocator->readOnlyNotifier, allocator, notifyBlockAllocatorOfReadOnlyMode, allocator->threadID); if (result != VDO_SUCCESS) { return result; } SlabDepot *depot = allocator->depot; result = initializeEnqueueableCompletion(&allocator->completion, BLOCK_ALLOCATOR_COMPLETION, layer); if (result != VDO_SUCCESS) { return result; } allocator->summary = getSlabSummaryForZone(depot, allocator->zoneNumber); result = makeVIOPool(layer, vioPoolSize, allocator->threadID, makeAllocatorPoolVIOs, NULL, &allocator->vioPool); if (result != VDO_SUCCESS) { return result; } BlockCount slabJournalSize = depot->slabConfig.slabJournalBlocks; result = makeSlabScrubber(layer, slabJournalSize, allocator->readOnlyNotifier, &allocator->slabScrubber); if (result != VDO_SUCCESS) { return result; } // The number of data blocks is the maximum number of free blocks that could // be used in calculateSlabPriority(). BlockCount maxFreeBlocks = depot->slabConfig.dataBlocks; unsigned int maxPriority = (2 + logBaseTwo(maxFreeBlocks)); result = makePriorityTable(maxPriority, &allocator->prioritizedSlabs); if (result != VDO_SUCCESS) { return result; } /* * VDOSTORY-123 requires that we try to open slabs that already have * allocated blocks in preference to slabs that have never been opened. For * reasons we have not been able to fully understand, performance tests on * SSD harvards have been very sensitive (50% reduction in test throughput) * to very slight differences in the timing and locality of block * allocation. Assigning a low priority to unopened slabs (maxPriority/2, * say) would be ideal for the story, but anything less than a very high * threshold (maxPriority - 1) hurts PMI results. * * This sets the free block threshold for preferring to open an unopened * slab to the binary floor of 3/4ths the total number of datablocks in a * slab, which will generally evaluate to about half the slab size, but * avoids degenerate behavior in unit tests where the number of data blocks * is artificially constrained to a power of two. */ allocator->unopenedSlabPriority = (1 + logBaseTwo((maxFreeBlocks * 3) / 4)); return VDO_SUCCESS; } /**********************************************************************/ int makeBlockAllocator(SlabDepot *depot, ZoneCount zoneNumber, ThreadID threadID, Nonce nonce, BlockCount vioPoolSize, PhysicalLayer *layer, ReadOnlyNotifier *readOnlyNotifier, BlockAllocator **allocatorPtr) { BlockAllocator *allocator; int result = ALLOCATE(1, BlockAllocator, __func__, &allocator); if (result != VDO_SUCCESS) { return result; } allocator->depot = depot; allocator->zoneNumber = zoneNumber; allocator->threadID = threadID; allocator->nonce = nonce; allocator->readOnlyNotifier = readOnlyNotifier; initializeRing(&allocator->dirtySlabJournals); result = allocateComponents(allocator, layer, vioPoolSize); if (result != VDO_SUCCESS) { freeBlockAllocator(&allocator); return result; } *allocatorPtr = allocator; return VDO_SUCCESS; } /**********************************************************************/ void freeBlockAllocator(BlockAllocator **blockAllocatorPtr) { BlockAllocator *allocator = *blockAllocatorPtr; if (allocator == NULL) { return; } freeSlabScrubber(&allocator->slabScrubber); freeVIOPool(&allocator->vioPool); freePriorityTable(&allocator->prioritizedSlabs); destroyEnqueueable(&allocator->completion); FREE(allocator); *blockAllocatorPtr = NULL; } /**********************************************************************/ int replaceVIOPool(BlockAllocator *allocator, size_t size, PhysicalLayer *layer) { freeVIOPool(&allocator->vioPool); return makeVIOPool(layer, size, allocator->threadID, makeAllocatorPoolVIOs, NULL, &allocator->vioPool); } /** * Get the maximum number of data blocks that can be allocated. * * @param allocator The block allocator to query * * @return The number of data blocks that can be allocated **/ __attribute__((warn_unused_result)) static inline BlockCount getDataBlockCount(const BlockAllocator *allocator) { return (allocator->slabCount * allocator->depot->slabConfig.dataBlocks); } /**********************************************************************/ BlockCount getAllocatedBlocks(const BlockAllocator *allocator) { return relaxedLoad64(&allocator->statistics.allocatedBlocks); } /**********************************************************************/ BlockCount getUnrecoveredSlabCount(const BlockAllocator *allocator) { return getScrubberSlabCount(allocator->slabScrubber); } /**********************************************************************/ void queueSlab(Slab *slab) { ASSERT_LOG_ONLY(isRingEmpty(&slab->ringNode), "a requeued slab must not already be on a ring"); BlockAllocator *allocator = slab->allocator; BlockCount freeBlocks = getSlabFreeBlockCount(slab); int result = ASSERT((freeBlocks <= allocator->depot->slabConfig.dataBlocks), "rebuilt slab %u must have a valid free block count" " (has %llu, expected maximum %llu)", slab->slabNumber, freeBlocks, allocator->depot->slabConfig.dataBlocks); if (result != VDO_SUCCESS) { enterReadOnlyMode(allocator->readOnlyNotifier, result); return; } if (isUnrecoveredSlab(slab)) { registerSlabForScrubbing(allocator->slabScrubber, slab, false); return; } if (!isSlabResuming(slab)) { // If the slab is resuming, we've already accounted for it here, so don't // do it again. relaxedAdd64(&allocator->statistics.allocatedBlocks, -freeBlocks); if (!isSlabJournalBlank(slab->journal)) { relaxedAdd64(&allocator->statistics.slabsOpened, 1); } } // All slabs are kept in a priority queue for allocation. prioritizeSlab(slab); } /**********************************************************************/ void adjustFreeBlockCount(Slab *slab, bool increment) { BlockAllocator *allocator = slab->allocator; // The sense of increment is reversed since allocations are being counted. relaxedAdd64(&allocator->statistics.allocatedBlocks, (increment ? -1 : 1)); // The open slab doesn't need to be reprioritized until it is closed. if (slab == allocator->openSlab) { return; } // The slab priority rarely changes; if no change, then don't requeue it. if (slab->priority == calculateSlabPriority(slab)) { return; } // Reprioritize the slab to reflect the new free block count by removing it // from the table and re-enqueuing it with the new priority. priorityTableRemove(allocator->prioritizedSlabs, &slab->ringNode); prioritizeSlab(slab); } /** * Allocate the next free physical block in a slab. * * The block allocated will have a provisional reference and the * reference must be either confirmed with a subsequent call to * incrementReferenceCount() or vacated with a subsequent call to * decrementReferenceCount(). * * @param [in] slab The slab * @param [out] blockNumberPtr A pointer to receive the allocated block number * * @return UDS_SUCCESS or an error code **/ static int allocateSlabBlock(Slab *slab, PhysicalBlockNumber *blockNumberPtr) { PhysicalBlockNumber pbn; int result = allocateUnreferencedBlock(slab->referenceCounts, &pbn); if (result != VDO_SUCCESS) { return result; } adjustFreeBlockCount(slab, false); *blockNumberPtr = pbn; return VDO_SUCCESS; } /**********************************************************************/ int allocateBlock(BlockAllocator *allocator, PhysicalBlockNumber *blockNumberPtr) { if (allocator->openSlab != NULL) { // Try to allocate the next block in the currently open slab. int result = allocateSlabBlock(allocator->openSlab, blockNumberPtr); if ((result == VDO_SUCCESS) || (result != VDO_NO_SPACE)) { return result; } // Put the exhausted open slab back into the priority table. prioritizeSlab(allocator->openSlab); } // Remove the highest priority slab from the priority table and make it // the open slab. allocator->openSlab = slabFromRingNode(priorityTableDequeue(allocator->prioritizedSlabs)); if (isSlabJournalBlank(allocator->openSlab->journal)) { relaxedAdd64(&allocator->statistics.slabsOpened, 1); dirtyAllReferenceBlocks(allocator->openSlab->referenceCounts); } else { relaxedAdd64(&allocator->statistics.slabsReopened, 1); } // Try allocating again. If we're out of space immediately after opening a // slab, then every slab must be fully allocated. return allocateSlabBlock(allocator->openSlab, blockNumberPtr); } /**********************************************************************/ void releaseBlockReference(BlockAllocator *allocator, PhysicalBlockNumber pbn, const char *why) { if (pbn == ZERO_BLOCK) { return; } Slab *slab = getSlab(allocator->depot, pbn); ReferenceOperation operation = { .type = DATA_DECREMENT, .pbn = pbn, }; int result = modifySlabReferenceCount(slab, NULL, operation); if (result != VDO_SUCCESS) { logErrorWithStringError(result, "Failed to release reference to %s " "physical block %llu", why, pbn); } } /** * This is a HeapComparator function that orders SlabStatuses using the * 'isClean' field as the primary key and the 'emptiness' field as the * secondary key. * * Slabs need to be pushed onto the rings in the same order they are to be * popped off. Popping should always get the most empty first, so pushing * should be from most empty to least empty. Thus, the comparator order is * the usual sense since Heap returns larger elements before smaller ones. * * @param item1 The first item to compare * @param item2 The second item to compare * * @return 1 if the first item is cleaner or emptier than the second; * 0 if the two items are equally clean and empty; -1 otherwise **/ static int compareSlabStatuses(const void *item1, const void *item2) { const SlabStatus *info1 = (const SlabStatus *) item1; const SlabStatus *info2 = (const SlabStatus *) item2; if (info1->isClean != info2->isClean) { return (info1->isClean ? 1 : -1); } if (info1->emptiness != info2->emptiness) { return ((info1->emptiness > info2->emptiness) ? 1 : -1); } return ((info1->slabNumber < info2->slabNumber) ? 1 : -1); } /** * Swap two SlabStatus structures. Implements HeapSwapper. **/ static void swapSlabStatuses(void *item1, void *item2) { SlabStatus *info1 = item1; SlabStatus *info2 = item2; SlabStatus temp = *info1; *info1 = *info2; *info2 = temp; } /** * Inform the allocator that a slab action has finished on some slab. This * callback is registered in applyToSlabs(). * * @param completion The allocator completion **/ static void slabActionCallback(VDOCompletion *completion) { BlockAllocator *allocator = container_of(completion, BlockAllocator, completion); SlabActor *actor = &allocator->slabActor; if (--actor->slabActionCount == 0) { actor->callback(completion); return; } resetCompletion(completion); } /** * Preserve the error from part of an administrative action and continue. * * @param completion The allocator completion **/ static void handleOperationError(VDOCompletion *completion) { BlockAllocator *allocator = (BlockAllocator *) completion; setOperationResult(&allocator->state, completion->result); completion->callback(completion); } /** * Perform an administrative action on each of an allocator's slabs in * parallel. * * @param allocator The allocator * @param callback The method to call when the action is complete on every * slab **/ static void applyToSlabs(BlockAllocator *allocator, VDOAction *callback) { prepareCompletion(&allocator->completion, slabActionCallback, handleOperationError, allocator->threadID, NULL); allocator->completion.requeue = false; // Since we are going to dequeue all of the slabs, the open slab will become // invalid, so clear it. allocator->openSlab = NULL; // Ensure that we don't finish before we're done starting. allocator->slabActor = (SlabActor) { .slabActionCount = 1, .callback = callback, }; SlabIterator iterator = getSlabIterator(allocator); while (hasNextSlab(&iterator)) { Slab *slab = nextSlab(&iterator); unspliceRingNode(&slab->ringNode); allocator->slabActor.slabActionCount++; startSlabAction(slab, allocator->state.state, &allocator->completion); } slabActionCallback(&allocator->completion); } /** * Inform the allocator that all load I/O has finished. * * @param completion The allocator completion **/ static void finishLoadingAllocator(VDOCompletion *completion) { BlockAllocator *allocator = (BlockAllocator *) completion; if (allocator->state.state == ADMIN_STATE_LOADING_FOR_RECOVERY) { void *context = getCurrentActionContext(allocator->depot->actionManager); replayIntoSlabJournals(allocator, completion, context); return; } finishLoading(&allocator->state); } /** * Initiate a load. * * Implements AdminInitiator. **/ static void initiateLoad(AdminState *state) { BlockAllocator *allocator = container_of(state, BlockAllocator, state); if (state->state == ADMIN_STATE_LOADING_FOR_REBUILD) { prepareCompletion(&allocator->completion, finishLoadingAllocator, handleOperationError, allocator->threadID, NULL); eraseSlabJournals(allocator->depot, getSlabIterator(allocator), &allocator->completion); return; } applyToSlabs(allocator, finishLoadingAllocator); } /**********************************************************************/ void loadBlockAllocator(void *context, ZoneCount zoneNumber, VDOCompletion *parent) { BlockAllocator *allocator = getBlockAllocatorForZone(context, zoneNumber); startLoading(&allocator->state, getCurrentManagerOperation(allocator->depot->actionManager), parent, initiateLoad); } /**********************************************************************/ void notifySlabJournalsAreRecovered(BlockAllocator *allocator, int result) { finishLoadingWithResult(&allocator->state, result); } /**********************************************************************/ int prepareSlabsForAllocation(BlockAllocator *allocator) { relaxedStore64(&allocator->statistics.allocatedBlocks, getDataBlockCount(allocator)); SlabDepot *depot = allocator->depot; SlabCount slabCount = depot->slabCount; SlabStatus *slabStatuses; int result = ALLOCATE(slabCount, SlabStatus, __func__, &slabStatuses); if (result != VDO_SUCCESS) { return result; } getSummarizedSlabStatuses(allocator->summary, slabCount, slabStatuses); // Sort the slabs by cleanliness, then by emptiness hint. Heap heap; initializeHeap(&heap, compareSlabStatuses, swapSlabStatuses, slabStatuses, slabCount, sizeof(SlabStatus)); buildHeap(&heap, slabCount); SlabStatus currentSlabStatus; while (popMaxHeapElement(&heap, ¤tSlabStatus)) { Slab *slab = depot->slabs[currentSlabStatus.slabNumber]; if (slab->allocator != allocator) { continue; } if ((depot->loadType == REBUILD_LOAD) || (!mustLoadRefCounts(allocator->summary, slab->slabNumber) && currentSlabStatus.isClean)) { queueSlab(slab); continue; } markSlabUnrecovered(slab); bool highPriority = ((currentSlabStatus.isClean && (depot->loadType == NORMAL_LOAD)) || requiresScrubbing(slab->journal)); registerSlabForScrubbing(allocator->slabScrubber, slab, highPriority); } FREE(slabStatuses); return VDO_SUCCESS; } /**********************************************************************/ void prepareAllocatorToAllocate(void *context, ZoneCount zoneNumber, VDOCompletion *parent) { BlockAllocator *allocator = getBlockAllocatorForZone(context, zoneNumber); int result = prepareSlabsForAllocation(allocator); if (result != VDO_SUCCESS) { finishCompletion(parent, result); return; } scrubHighPrioritySlabs(allocator->slabScrubber, isPriorityTableEmpty(allocator->prioritizedSlabs), parent, finishParentCallback, finishParentCallback); } /**********************************************************************/ void registerNewSlabsForAllocator(void *context, ZoneCount zoneNumber, VDOCompletion *parent) { BlockAllocator *allocator = getBlockAllocatorForZone(context, zoneNumber); SlabDepot *depot = allocator->depot; for (SlabCount i = depot->slabCount; i < depot->newSlabCount; i++) { Slab *slab = depot->newSlabs[i]; if (slab->allocator == allocator) { registerSlabWithAllocator(allocator, slab); } } completeCompletion(parent); } /** * Perform a step in draining the allocator. This method is its own callback. * * @param completion The allocator's completion **/ static void doDrainStep(VDOCompletion *completion) { BlockAllocator *allocator = (BlockAllocator *) completion; prepareForRequeue(&allocator->completion, doDrainStep, handleOperationError, allocator->threadID, NULL); switch (++allocator->drainStep) { case DRAIN_ALLOCATOR_STEP_SCRUBBER: stopScrubbing(allocator->slabScrubber, completion); return; case DRAIN_ALLOCATOR_STEP_SLABS: applyToSlabs(allocator, doDrainStep); return; case DRAIN_ALLOCATOR_STEP_SUMMARY: drainSlabSummaryZone(allocator->summary, allocator->state.state, completion); return; case DRAIN_ALLOCATOR_STEP_FINISHED: ASSERT_LOG_ONLY(!isVIOPoolBusy(allocator->vioPool), "VIO Pool not busy"); finishDrainingWithResult(&allocator->state, completion->result); return; default: finishDrainingWithResult(&allocator->state, UDS_BAD_STATE); } } /** * Initiate a drain. * * Implements AdminInitiator. **/ static void initiateDrain(AdminState *state) { BlockAllocator *allocator = container_of(state, BlockAllocator, state); allocator->drainStep = DRAIN_ALLOCATOR_START; doDrainStep(&allocator->completion); } /**********************************************************************/ void drainBlockAllocator(void *context, ZoneCount zoneNumber, VDOCompletion *parent) { BlockAllocator *allocator = getBlockAllocatorForZone(context, zoneNumber); startDraining(&allocator->state, getCurrentManagerOperation(allocator->depot->actionManager), parent, initiateDrain); } /** * Perform a step in resuming a quiescent allocator. This method is its own * callback. * * @param completion The allocator's completion **/ static void doResumeStep(VDOCompletion *completion) { BlockAllocator *allocator = (BlockAllocator *) completion; prepareForRequeue(&allocator->completion, doResumeStep, handleOperationError, allocator->threadID, NULL); switch (--allocator->drainStep) { case DRAIN_ALLOCATOR_STEP_SUMMARY: resumeSlabSummaryZone(allocator->summary, completion); return; case DRAIN_ALLOCATOR_STEP_SLABS: applyToSlabs(allocator, doResumeStep); return; case DRAIN_ALLOCATOR_STEP_SCRUBBER: resumeScrubbing(allocator->slabScrubber, completion); return; case DRAIN_ALLOCATOR_START: finishResumingWithResult(&allocator->state, completion->result); return; default: finishResumingWithResult(&allocator->state, UDS_BAD_STATE); } } /** * Initiate a resume. * * Implements AdminInitiator. **/ static void initiateResume(AdminState *state) { BlockAllocator *allocator = container_of(state, BlockAllocator, state); allocator->drainStep = DRAIN_ALLOCATOR_STEP_FINISHED; doResumeStep(&allocator->completion); } /**********************************************************************/ void resumeBlockAllocator(void *context, ZoneCount zoneNumber, VDOCompletion *parent) { BlockAllocator *allocator = getBlockAllocatorForZone(context, zoneNumber); startResuming(&allocator->state, getCurrentManagerOperation(allocator->depot->actionManager), parent, initiateResume); } /**********************************************************************/ void releaseTailBlockLocks(void *context, ZoneCount zoneNumber, VDOCompletion *parent) { BlockAllocator *allocator = getBlockAllocatorForZone(context, zoneNumber); RingNode *ring = &allocator->dirtySlabJournals; while (!isRingEmpty(ring)) { if (!releaseRecoveryJournalLock(slabJournalFromDirtyNode(ring->next), allocator->depot->activeReleaseRequest)) { break; } } completeCompletion(parent); } /**********************************************************************/ SlabSummaryZone *getSlabSummaryZone(const BlockAllocator *allocator) { return allocator->summary; } /**********************************************************************/ int acquireVIO(BlockAllocator *allocator, Waiter *waiter) { return acquireVIOFromPool(allocator->vioPool, waiter); } /**********************************************************************/ void returnVIO(BlockAllocator *allocator, VIOPoolEntry *entry) { returnVIOToPool(allocator->vioPool, entry); } /**********************************************************************/ void scrubAllUnrecoveredSlabsInZone(void *context, ZoneCount zoneNumber, VDOCompletion *parent) { BlockAllocator *allocator = getBlockAllocatorForZone(context, zoneNumber); scrubSlabs(allocator->slabScrubber, allocator->depot, notifyZoneFinishedScrubbing, noopCallback); completeCompletion(parent); } /**********************************************************************/ int enqueueForCleanSlab(BlockAllocator *allocator, Waiter *waiter) { return enqueueCleanSlabWaiter(allocator->slabScrubber, waiter); } /**********************************************************************/ void increaseScrubbingPriority(Slab *slab) { registerSlabForScrubbing(slab->allocator->slabScrubber, slab, true); } /**********************************************************************/ void allocateFromAllocatorLastSlab(BlockAllocator *allocator) { ASSERT_LOG_ONLY(allocator->openSlab == NULL, "mustn't have an open slab"); Slab *lastSlab = allocator->depot->slabs[allocator->lastSlab]; priorityTableRemove(allocator->prioritizedSlabs, &lastSlab->ringNode); allocator->openSlab = lastSlab; } /**********************************************************************/ BlockAllocatorStatistics getBlockAllocatorStatistics(const BlockAllocator *allocator) { const AtomicAllocatorStatistics *atoms = &allocator->statistics; return (BlockAllocatorStatistics) { .slabCount = allocator->slabCount, .slabsOpened = relaxedLoad64(&atoms->slabsOpened), .slabsReopened = relaxedLoad64(&atoms->slabsReopened), }; } /**********************************************************************/ SlabJournalStatistics getSlabJournalStatistics(const BlockAllocator *allocator) { const AtomicSlabJournalStatistics *atoms = &allocator->slabJournalStatistics; return (SlabJournalStatistics) { .diskFullCount = atomicLoad64(&atoms->diskFullCount), .flushCount = atomicLoad64(&atoms->flushCount), .blockedCount = atomicLoad64(&atoms->blockedCount), .blocksWritten = atomicLoad64(&atoms->blocksWritten), .tailBusyCount = atomicLoad64(&atoms->tailBusyCount), }; } /**********************************************************************/ RefCountsStatistics getRefCountsStatistics(const BlockAllocator *allocator) { const AtomicRefCountStatistics *atoms = &allocator->refCountStatistics; return (RefCountsStatistics) { .blocksWritten = atomicLoad64(&atoms->blocksWritten), }; } /**********************************************************************/ void dumpBlockAllocator(const BlockAllocator *allocator) { unsigned int pauseCounter = 0; logInfo("BlockAllocator zone %u", allocator->zoneNumber); SlabIterator iterator = getSlabIterator(allocator); while (hasNextSlab(&iterator)) { dumpSlab(nextSlab(&iterator)); // Wait for a while after each batch of 32 slabs dumped, allowing the // kernel log a chance to be flushed instead of being overrun. if (pauseCounter++ == 31) { pauseCounter = 0; pauseForLogger(); } } dumpSlabScrubber(allocator->slabScrubber); }