/* * Copyright (c) 2020 Red Hat, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/slabSummary.c#7 $ */ #include "slabSummary.h" #include "memoryAlloc.h" #include "adminState.h" #include "constants.h" #include "extent.h" #include "readOnlyNotifier.h" #include "slabSummaryInternals.h" #include "threadConfig.h" #include "types.h" // SIZING /**********************************************************************/ static BlockCount getSlabSummaryZoneSize(BlockSize blockSize) { SlabCount entriesPerBlock = blockSize / sizeof(SlabSummaryEntry); BlockCount blocksNeeded = MAX_SLABS / entriesPerBlock; return blocksNeeded; } /**********************************************************************/ BlockCount getSlabSummarySize(BlockSize blockSize) { return getSlabSummaryZoneSize(blockSize) * MAX_PHYSICAL_ZONES; } // FULLNESS HINT COMPUTATION /** * Translate a slab's free block count into a 'fullness hint' that can be * stored in a SlabSummaryEntry's 7 bits that are dedicated to its free count. * * Note: the number of free blocks must be strictly less than 2^23 blocks, * even though theoretically slabs could contain precisely 2^23 blocks; there * is an assumption that at least one block is used by metadata. This * assumption is necessary; otherwise, the fullness hint might overflow. * The fullness hint formula is roughly (fullness >> 16) & 0x7f, but * ((1 << 23) >> 16) & 0x7f is the same as (0 >> 16) & 0x7f, namely 0, which * is clearly a bad hint if it could indicate both 2^23 free blocks or 0 free * blocks. * * @param summary The summary which is being updated * @param freeBlocks The number of free blocks * * @return A fullness hint, which can be stored in 7 bits. **/ __attribute__((warn_unused_result)) static uint8_t computeFullnessHint(SlabSummary *summary, BlockCount freeBlocks) { ASSERT_LOG_ONLY((freeBlocks < (1 << 23)), "free blocks must be less than 2^23"); if (freeBlocks == 0) { return 0; } BlockCount hint = freeBlocks >> summary->hintShift; return ((hint == 0) ? 1 : hint); } /** * Translate a slab's free block hint into an approximate count, such that * computeFullnessHint() is the inverse function of getApproximateFreeBlocks() * (i.e. computeFullnessHint(getApproximateFreeBlocks(x)) == x). * * @param summary The summary from which the hint was obtained * @param freeBlockHint The hint read from the summary * * @return An approximation to the free block count **/ __attribute__((warn_unused_result)) static BlockCount getApproximateFreeBlocks(SlabSummary *summary, uint8_t freeBlockHint) { return ((BlockCount) freeBlockHint) << summary->hintShift; } // MAKE/FREE FUNCTIONS /**********************************************************************/ static void launchWrite(SlabSummaryBlock *summaryBlock); /** * Initialize a SlabSummaryBlock. * * @param layer The backing layer * @param summaryZone The parent SlabSummaryZone * @param threadID The ID of the thread of physical zone of this block * @param entries The entries this block manages * @param index The index of this block in its zone's summary * @param slabSummaryBlock The block to intialize * * @return VDO_SUCCESS or an error **/ static int initializeSlabSummaryBlock(PhysicalLayer *layer, SlabSummaryZone *summaryZone, ThreadID threadID, SlabSummaryEntry *entries, BlockCount index, SlabSummaryBlock *slabSummaryBlock) { int result = ALLOCATE(VDO_BLOCK_SIZE, char, __func__, &slabSummaryBlock->outgoingEntries); if (result != VDO_SUCCESS) { return result; } result = createVIO(layer, VIO_TYPE_SLAB_SUMMARY, VIO_PRIORITY_METADATA, slabSummaryBlock, slabSummaryBlock->outgoingEntries, &slabSummaryBlock->vio); if (result != VDO_SUCCESS) { return result; } slabSummaryBlock->vio->completion.callbackThreadID = threadID; slabSummaryBlock->zone = summaryZone; slabSummaryBlock->entries = entries; slabSummaryBlock->index = index; return VDO_SUCCESS; } /** * Create a new, empty SlabSummaryZone object. * * @param summary The summary to which the new zone will belong * @param layer The layer * @param zoneNumber The zone this is * @param threadID The ID of the thread for this zone * @param entries The buffer to hold the entries in this zone * * @return VDO_SUCCESS or an error **/ static int makeSlabSummaryZone(SlabSummary *summary, PhysicalLayer *layer, ZoneCount zoneNumber, ThreadID threadID, SlabSummaryEntry *entries) { int result = ALLOCATE_EXTENDED(SlabSummaryZone, summary->blocksPerZone, SlabSummaryBlock, __func__, &summary->zones[zoneNumber]); if (result != VDO_SUCCESS) { return result; } SlabSummaryZone *summaryZone = summary->zones[zoneNumber]; summaryZone->summary = summary; summaryZone->zoneNumber = zoneNumber; summaryZone->entries = entries; if (layer->createMetadataVIO == NULL) { // Blocks are only used for writing, and without a createVIO() call, // we'll never be writing anything. return VDO_SUCCESS; } // Initialize each block. for (BlockCount i = 0; i < summary->blocksPerZone; i++) { result = initializeSlabSummaryBlock(layer, summaryZone, threadID, entries, i, &summaryZone->summaryBlocks[i]); if (result != VDO_SUCCESS) { return result; } entries += summary->entriesPerBlock; } return VDO_SUCCESS; } /**********************************************************************/ int makeSlabSummary(PhysicalLayer *layer, Partition *partition, const ThreadConfig *threadConfig, unsigned int slabSizeShift, BlockCount maximumFreeBlocksPerSlab, ReadOnlyNotifier *readOnlyNotifier, SlabSummary **slabSummaryPtr) { BlockCount blocksPerZone = getSlabSummaryZoneSize(VDO_BLOCK_SIZE); SlabCount entriesPerBlock = MAX_SLABS / blocksPerZone; int result = ASSERT((entriesPerBlock * blocksPerZone) == MAX_SLABS, "block size must be a multiple of entry size"); if (result != VDO_SUCCESS) { return result; } if (partition == NULL) { // Don't make a slab summary for the formatter since it doesn't need it. return VDO_SUCCESS; } SlabSummary *summary; result = ALLOCATE_EXTENDED(SlabSummary, threadConfig->physicalZoneCount, SlabSummaryZone *, __func__, &summary); if (result != VDO_SUCCESS) { return result; } summary->zoneCount = threadConfig->physicalZoneCount; summary->readOnlyNotifier = readOnlyNotifier; summary->hintShift = (slabSizeShift > 6) ? (slabSizeShift - 6) : 0; summary->blocksPerZone = blocksPerZone; summary->entriesPerBlock = entriesPerBlock; size_t totalEntries = MAX_SLABS * MAX_PHYSICAL_ZONES; size_t entryBytes = totalEntries * sizeof(SlabSummaryEntry); result = layer->allocateIOBuffer(layer, entryBytes, "summary entries", (char **) &summary->entries); if (result != VDO_SUCCESS) { freeSlabSummary(&summary); return result; } // Initialize all the entries. uint8_t hint = computeFullnessHint(summary, maximumFreeBlocksPerSlab); for (size_t i = 0; i < totalEntries; i++) { // This default tail block offset must be reflected in // slabJournal.c::readSlabJournalTail(). summary->entries[i] = (SlabSummaryEntry) { .tailBlockOffset = 0, .fullnessHint = hint, .loadRefCounts = false, .isDirty = false, }; } setSlabSummaryOrigin(summary, partition); for (ZoneCount zone = 0; zone < summary->zoneCount; zone++) { result = makeSlabSummaryZone(summary, layer, zone, getPhysicalZoneThread(threadConfig, zone), summary->entries + (MAX_SLABS * zone)); if (result != VDO_SUCCESS) { freeSlabSummary(&summary); return result; } } *slabSummaryPtr = summary; return VDO_SUCCESS; } /**********************************************************************/ void freeSlabSummary(SlabSummary **slabSummaryPtr) { if (*slabSummaryPtr == NULL) { return; } SlabSummary *summary = *slabSummaryPtr; for (ZoneCount zone = 0; zone < summary->zoneCount; zone++) { SlabSummaryZone *summaryZone = summary->zones[zone]; if (summaryZone != NULL) { for (BlockCount i = 0; i < summary->blocksPerZone; i++) { freeVIO(&summaryZone->summaryBlocks[i].vio); FREE(summaryZone->summaryBlocks[i].outgoingEntries); } FREE(summaryZone); } } FREE(summary->entries); FREE(summary); *slabSummaryPtr = NULL; } /**********************************************************************/ SlabSummaryZone *getSummaryForZone(SlabSummary *summary, ZoneCount zone) { return summary->zones[zone]; } // WRITING FUNCTIONALITY /** * Check whether a summary zone has finished draining. * * @param summaryZone The zone to check **/ static void checkForDrainComplete(SlabSummaryZone *summaryZone) { if (!isDraining(&summaryZone->state) || (summaryZone->writeCount > 0)) { return; } finishOperationWithResult(&summaryZone->state, (isReadOnly(summaryZone->summary->readOnlyNotifier) ? VDO_READ_ONLY : VDO_SUCCESS)); } /** * Wake all the waiters in a given queue. If the VDO is in read-only mode they * will be given a VDO_READ_ONLY error code as their context, otherwise they * will be given VDO_SUCCESS. * * @param summaryZone The slab summary which owns the queue * @param queue The queue to notify **/ static void notifyWaiters(SlabSummaryZone *summaryZone, WaitQueue *queue) { int result = (isReadOnly(summaryZone->summary->readOnlyNotifier) ? VDO_READ_ONLY : VDO_SUCCESS); notifyAllWaiters(queue, NULL, &result); } /** * Finish processing a block which attempted to write, whether or not the * attempt succeeded. * * @param block The block **/ static void finishUpdatingSlabSummaryBlock(SlabSummaryBlock *block) { notifyWaiters(block->zone, &block->currentUpdateWaiters); block->writing = false; block->zone->writeCount--; if (hasWaiters(&block->nextUpdateWaiters)) { launchWrite(block); } else { checkForDrainComplete(block->zone); } } /** * This is the callback for a successful block write. * * @param completion The write VIO **/ static void finishUpdate(VDOCompletion *completion) { SlabSummaryBlock *block = completion->parent; atomicAdd64(&block->zone->summary->statistics.blocksWritten, 1); finishUpdatingSlabSummaryBlock(block); } /** * Handle an error writing a slab summary block. * * @param completion The write VIO **/ static void handleWriteError(VDOCompletion *completion) { SlabSummaryBlock *block = completion->parent; enterReadOnlyMode(block->zone->summary->readOnlyNotifier, completion->result); finishUpdatingSlabSummaryBlock(block); } /** * Write a slab summary block unless it is currently out for writing. * * @param [in] block The block that needs to be committed **/ static void launchWrite(SlabSummaryBlock *block) { if (block->writing) { return; } SlabSummaryZone *zone = block->zone; zone->writeCount++; transferAllWaiters(&block->nextUpdateWaiters, &block->currentUpdateWaiters); block->writing = true; SlabSummary *summary = zone->summary; if (isReadOnly(summary->readOnlyNotifier)) { finishUpdatingSlabSummaryBlock(block); return; } memcpy(block->outgoingEntries, block->entries, sizeof(SlabSummaryEntry) * summary->entriesPerBlock); // Flush before writing to ensure that the slab journal tail blocks and // reference updates covered by this summary update are stable (VDO-2332). PhysicalBlockNumber pbn = (summary->origin + (summary->blocksPerZone * zone->zoneNumber) + block->index); launchWriteMetadataVIOWithFlush(block->vio, pbn, finishUpdate, handleWriteError, true, false); } /** * Initiate a drain. * * Implements AdminInitiator. **/ static void initiateDrain(AdminState *state) { checkForDrainComplete(container_of(state, SlabSummaryZone, state)); } /**********************************************************************/ void drainSlabSummaryZone(SlabSummaryZone *summaryZone, AdminStateCode operation, VDOCompletion *parent) { startDraining(&summaryZone->state, operation, parent, initiateDrain); } /**********************************************************************/ void resumeSlabSummaryZone(SlabSummaryZone *summaryZone, VDOCompletion *parent) { finishCompletion(parent, resumeIfQuiescent(&summaryZone->state)); } // READ/UPDATE FUNCTIONS /** * Get the summary block, and offset into it, for storing the summary for a * slab. * * @param summaryZone The SlabSummaryZone being queried * @param slabNumber The slab whose summary location is sought * * @return A pointer to the SlabSummaryEntryBlock containing this * SlabSummaryEntry **/ static SlabSummaryBlock *getSummaryBlockForSlab(SlabSummaryZone *summaryZone, SlabCount slabNumber) { SlabCount entriesPerBlock = summaryZone->summary->entriesPerBlock; return &summaryZone->summaryBlocks[slabNumber / entriesPerBlock]; } /**********************************************************************/ void updateSlabSummaryEntry(SlabSummaryZone *summaryZone, Waiter *waiter, SlabCount slabNumber, TailBlockOffset tailBlockOffset, bool loadRefCounts, bool isClean, BlockCount freeBlocks) { SlabSummaryBlock *block = getSummaryBlockForSlab(summaryZone, slabNumber); int result; if (isReadOnly(summaryZone->summary->readOnlyNotifier)) { result = VDO_READ_ONLY; } else if (isDraining(&summaryZone->state) || isQuiescent(&summaryZone->state)) { result = VDO_INVALID_ADMIN_STATE; } else { uint8_t hint = computeFullnessHint(summaryZone->summary, freeBlocks); SlabSummaryEntry *entry = &summaryZone->entries[slabNumber]; *entry = (SlabSummaryEntry) { .tailBlockOffset = tailBlockOffset, .loadRefCounts = (entry->loadRefCounts || loadRefCounts), .isDirty = !isClean, .fullnessHint = hint, }; result = enqueueWaiter(&block->nextUpdateWaiters, waiter); } if (result != VDO_SUCCESS) { waiter->callback(waiter, &result); return; } launchWrite(block); } /**********************************************************************/ TailBlockOffset getSummarizedTailBlockOffset(SlabSummaryZone *summaryZone, SlabCount slabNumber) { return summaryZone->entries[slabNumber].tailBlockOffset; } /**********************************************************************/ bool mustLoadRefCounts(SlabSummaryZone *summaryZone, SlabCount slabNumber) { return summaryZone->entries[slabNumber].loadRefCounts; } /**********************************************************************/ bool getSummarizedCleanliness(SlabSummaryZone *summaryZone, SlabCount slabNumber) { return !summaryZone->entries[slabNumber].isDirty; } /**********************************************************************/ BlockCount getSummarizedFreeBlockCount(SlabSummaryZone *summaryZone, SlabCount slabNumber) { SlabSummaryEntry *entry = &summaryZone->entries[slabNumber]; return getApproximateFreeBlocks(summaryZone->summary, entry->fullnessHint); } /**********************************************************************/ void getSummarizedRefCountsState(SlabSummaryZone *summaryZone, SlabCount slabNumber, size_t *freeBlockHint, bool *isClean) { SlabSummaryEntry *entry = &summaryZone->entries[slabNumber]; *freeBlockHint = entry->fullnessHint; *isClean = !entry->isDirty; } /**********************************************************************/ void getSummarizedSlabStatuses(SlabSummaryZone *summaryZone, SlabCount slabCount, SlabStatus *statuses) { for (SlabCount i = 0; i < slabCount; i++) { statuses[i] = (SlabStatus) { .slabNumber = i, .isClean = !summaryZone->entries[i].isDirty, .emptiness = summaryZone->entries[i].fullnessHint }; } } // RESIZE FUNCTIONS /**********************************************************************/ void setSlabSummaryOrigin(SlabSummary *summary, Partition *partition) { summary->origin = getFixedLayoutPartitionOffset(partition); } // COMBINING FUNCTIONS (LOAD) /** * Clean up after saving out the combined slab summary. This callback is * registered in finishLoadingSummary() and loadSlabSummary(). * * @param completion The extent which was used to write the summary data **/ static void finishCombiningZones(VDOCompletion *completion) { SlabSummary *summary = completion->parent; int result = completion->result; VDOExtent *extent = asVDOExtent(completion); freeExtent(&extent); finishLoadingWithResult(&summary->zones[0]->state, result); } /**********************************************************************/ void combineZones(SlabSummary *summary) { // Combine all the old summary data into the portion of the buffer // corresponding to the first zone. ZoneCount zone = 0; if (summary->zonesToCombine > 1) { for (SlabCount entryNumber = 0; entryNumber < MAX_SLABS; entryNumber++) { if (zone != 0) { memcpy(summary->entries + entryNumber, summary->entries + (zone * MAX_SLABS) + entryNumber, sizeof(SlabSummaryEntry)); } zone++; if (zone == summary->zonesToCombine) { zone = 0; } } } // Copy the combined data to each zones's region of the buffer. for (zone = 1; zone < MAX_PHYSICAL_ZONES; zone++) { memcpy(summary->entries + (zone * MAX_SLABS), summary->entries, MAX_SLABS * sizeof(SlabSummaryEntry)); } } /** * Combine the slab summary data from all the previously written zones * and copy the combined summary to each partition's data region. Then write * the combined summary back out to disk. This callback is registered in * loadSlabSummary(). * * @param completion The extent which was used to read the summary data **/ static void finishLoadingSummary(VDOCompletion *completion) { SlabSummary *summary = completion->parent; VDOExtent *extent = asVDOExtent(completion); // Combine the zones so each zone is correct for all slabs. combineZones(summary); // Write the combined summary back out. extent->completion.callback = finishCombiningZones; writeMetadataExtent(extent, summary->origin); } /**********************************************************************/ void loadSlabSummary(SlabSummary *summary, AdminStateCode operation, ZoneCount zonesToCombine, VDOCompletion *parent) { SlabSummaryZone *zone = summary->zones[0]; if (!startLoading(&zone->state, operation, parent, NULL)) { return; } VDOExtent *extent; BlockCount blocks = summary->blocksPerZone * MAX_PHYSICAL_ZONES; int result = createExtent(parent->layer, VIO_TYPE_SLAB_SUMMARY, VIO_PRIORITY_METADATA, blocks, (char *) summary->entries, &extent); if (result != VDO_SUCCESS) { finishLoadingWithResult(&zone->state, result); return; } if ((operation == ADMIN_STATE_FORMATTING) || (operation == ADMIN_STATE_LOADING_FOR_REBUILD)) { prepareCompletion(&extent->completion, finishCombiningZones, finishCombiningZones, 0, summary); writeMetadataExtent(extent, summary->origin); return; } summary->zonesToCombine = zonesToCombine; prepareCompletion(&extent->completion, finishLoadingSummary, finishCombiningZones, 0, summary); readMetadataExtent(extent, summary->origin); } /**********************************************************************/ SlabSummaryStatistics getSlabSummaryStatistics(const SlabSummary *summary) { const AtomicSlabSummaryStatistics *atoms = &summary->statistics; return (SlabSummaryStatistics) { .blocksWritten = atomicLoad64(&atoms->blocksWritten), }; }