Blob Blame History Raw
/*
 * Copyright (c) 2020 Red Hat, Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA. 
 *
 * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/slabScrubber.c#6 $
 */

#include "slabScrubberInternals.h"

#include "logger.h"
#include "memoryAlloc.h"

#include "adminState.h"
#include "blockAllocator.h"
#include "constants.h"
#include "readOnlyNotifier.h"
#include "recoveryJournal.h"
#include "refCounts.h"
#include "refCountsInternals.h"
#include "slab.h"
#include "slabJournalInternals.h"

/**
 * Allocate the buffer and extent used for reading the slab journal when
 * scrubbing a slab.
 *
 * @param scrubber         The slab scrubber for which to allocate
 * @param layer            The physical layer on which the scrubber resides
 * @param slabJournalSize  The size of a slab journal
 *
 * @return VDO_SUCCESS or an error
 **/
__attribute__((warn_unused_result))
static int allocateExtentAndBuffer(SlabScrubber  *scrubber,
                                   PhysicalLayer *layer,
                                   BlockCount     slabJournalSize)
{
  size_t bufferSize = VDO_BLOCK_SIZE * slabJournalSize;
  int result = ALLOCATE(bufferSize, char, __func__, &scrubber->journalData);
  if (result != VDO_SUCCESS) {
    return result;
  }

  return createExtent(layer, VIO_TYPE_SLAB_JOURNAL, VIO_PRIORITY_METADATA,
                      slabJournalSize, scrubber->journalData,
                      &scrubber->extent);
}

/**********************************************************************/
int makeSlabScrubber(PhysicalLayer     *layer,
                     BlockCount         slabJournalSize,
                     ReadOnlyNotifier  *readOnlyNotifier,
                     SlabScrubber     **scrubberPtr)
{
  SlabScrubber *scrubber;
  int result = ALLOCATE(1, SlabScrubber, __func__, &scrubber);
  if (result != VDO_SUCCESS) {
    return result;
  }

  result = allocateExtentAndBuffer(scrubber, layer, slabJournalSize);
  if (result != VDO_SUCCESS) {
    freeSlabScrubber(&scrubber);
    return result;
  }

  initializeCompletion(&scrubber->completion, SLAB_SCRUBBER_COMPLETION, layer);
  initializeRing(&scrubber->highPrioritySlabs);
  initializeRing(&scrubber->slabs);
  scrubber->readOnlyNotifier = readOnlyNotifier;
  scrubber->adminState.state = ADMIN_STATE_SUSPENDED;
  *scrubberPtr               = scrubber;
  return VDO_SUCCESS;
}

/**
 * Free the extent and buffer used for reading slab journals.
 *
 * @param scrubber  The scrubber
 **/
static void freeExtentAndBuffer(SlabScrubber *scrubber)
{
  freeExtent(&scrubber->extent);
  if (scrubber->journalData != NULL) {
    FREE(scrubber->journalData);
    scrubber->journalData = NULL;
  }
}

/**********************************************************************/
void freeSlabScrubber(SlabScrubber **scrubberPtr)
{
  if (*scrubberPtr == NULL) {
    return;
  }

  SlabScrubber *scrubber = *scrubberPtr;
  freeExtentAndBuffer(scrubber);
  FREE(scrubber);
  *scrubberPtr = NULL;
}

/**
 * Get the next slab to scrub.
 *
 * @param scrubber  The slab scrubber
 *
 * @return The next slab to scrub or <code>NULL</code> if there are none
 **/
static Slab *getNextSlab(SlabScrubber *scrubber)
{
  if (!isRingEmpty(&scrubber->highPrioritySlabs)) {
    return slabFromRingNode(scrubber->highPrioritySlabs.next);
  }

  if (!isRingEmpty(&scrubber->slabs)) {
    return slabFromRingNode(scrubber->slabs.next);
  }

  return NULL;
}

/**********************************************************************/
bool hasSlabsToScrub(SlabScrubber *scrubber)
{
  return (getNextSlab(scrubber) != NULL);
}

/**********************************************************************/
SlabCount getScrubberSlabCount(const SlabScrubber *scrubber)
{
  return relaxedLoad64(&scrubber->slabCount);
}

/**********************************************************************/
void registerSlabForScrubbing(SlabScrubber *scrubber,
                              Slab         *slab,
                              bool          highPriority)
{
  ASSERT_LOG_ONLY((slab->status != SLAB_REBUILT),
                  "slab to be scrubbed is unrecovered");

  if (slab->status != SLAB_REQUIRES_SCRUBBING) {
    return;
  }

  unspliceRingNode(&slab->ringNode);
  if (!slab->wasQueuedForScrubbing) {
    relaxedAdd64(&scrubber->slabCount, 1);
    slab->wasQueuedForScrubbing = true;
  }

  if (highPriority) {
    slab->status = SLAB_REQUIRES_HIGH_PRIORITY_SCRUBBING;
    pushRingNode(&scrubber->highPrioritySlabs, &slab->ringNode);
    return;
  }

  pushRingNode(&scrubber->slabs, &slab->ringNode);
}

/**
 * Stop scrubbing, either because there are no more slabs to scrub or because
 * there's been an error.
 *
 * @param scrubber  The scrubber
 **/
static void finishScrubbing(SlabScrubber *scrubber)
{
  if (!hasSlabsToScrub(scrubber)) {
    freeExtentAndBuffer(scrubber);
  }

  // Inform whoever is waiting that scrubbing has completed.
  completeCompletion(&scrubber->completion);

  bool notify = hasWaiters(&scrubber->waiters);

  // Note that the scrubber has stopped, and inform anyone who might be waiting
  // for that to happen.
  if (!finishDraining(&scrubber->adminState)) {
    scrubber->adminState.state = ADMIN_STATE_SUSPENDED;
  }

  /*
   * We can't notify waiters until after we've finished draining or they'll
   * just requeue. Fortunately if there were waiters, we can't have been freed
   * yet.
   */
  if (notify) {
    notifyAllWaiters(&scrubber->waiters, NULL, NULL);
  }
}

/**********************************************************************/
static void scrubNextSlab(SlabScrubber *scrubber);

/**
 * Notify the scrubber that a slab has been scrubbed. This callback is
 * registered in applyJournalEntries().
 *
 * @param completion  The slab rebuild completion
 **/
static void slabScrubbed(VDOCompletion *completion)
{
  SlabScrubber *scrubber = completion->parent;
  finishScrubbingSlab(scrubber->slab);
  relaxedAdd64(&scrubber->slabCount, -1);
  scrubNextSlab(scrubber);
}

/**
 * Abort scrubbing due to an error.
 *
 * @param scrubber  The slab scrubber
 * @param result    The error
 **/
static void abortScrubbing(SlabScrubber *scrubber, int result)
{
  enterReadOnlyMode(scrubber->readOnlyNotifier, result);
  setCompletionResult(&scrubber->completion, result);
  scrubNextSlab(scrubber);
}

/**
 * Handle errors while rebuilding a slab.
 *
 * @param completion  The slab rebuild completion
 **/
static void handleScrubberError(VDOCompletion *completion)
{
  abortScrubbing(completion->parent, completion->result);
}

/**
 * Apply all the entries in a block to the reference counts.
 *
 * @param block        A block with entries to apply
 * @param entryCount   The number of entries to apply
 * @param blockNumber  The sequence number of the block
 * @param slab         The slab to apply the entries to
 *
 * @return VDO_SUCCESS or an error code
 **/
static int applyBlockEntries(PackedSlabJournalBlock *block,
                             JournalEntryCount       entryCount,
                             SequenceNumber          blockNumber,
                             Slab                   *slab)
{
  JournalPoint entryPoint = {
    .sequenceNumber = blockNumber,
    .entryCount     = 0,
  };

  SlabBlockNumber maxSBN = slab->end - slab->start;
  while (entryPoint.entryCount < entryCount) {
    SlabJournalEntry entry = decodeSlabJournalEntry(block,
                                                    entryPoint.entryCount);
    if (entry.sbn > maxSBN) {
      // This entry is out of bounds.
      return logErrorWithStringError(VDO_CORRUPT_JOURNAL, "Slab journal entry"
                                     " (%llu, %u) had invalid offset"
                                     " %u in slab (size %u blocks)",
                                     blockNumber, entryPoint.entryCount,
                                     entry.sbn, maxSBN);
    }

    int result = replayReferenceCountChange(slab->referenceCounts, &entryPoint,
                                            entry);
    if (result != VDO_SUCCESS) {
      logErrorWithStringError(result, "Slab journal entry (%llu, %u)"
                              " (%s of offset %" PRIu32 ") could not be"
                              " applied in slab %u",
                              blockNumber, entryPoint.entryCount,
                              getJournalOperationName(entry.operation),
                              entry.sbn, slab->slabNumber);
      return result;
    }
    entryPoint.entryCount++;
  }

  return VDO_SUCCESS;
}

/**
 * Find the relevant extent of the slab journal and apply all valid entries.
 * This is a callback registered in startScrubbing().
 *
 * @param completion  The metadata read extent completion
 **/
static void applyJournalEntries(VDOCompletion *completion)
{
  SlabScrubber *scrubber        = completion->parent;
  Slab         *slab            = scrubber->slab;
  SlabJournal  *journal         = slab->journal;
  RefCounts    *referenceCounts = slab->referenceCounts;

  // Find the boundaries of the useful part of the journal.
  SequenceNumber  tail     = journal->tail;
  TailBlockOffset endIndex = getSlabJournalBlockOffset(journal, tail - 1);
  char *endData = scrubber->journalData + (endIndex * VDO_BLOCK_SIZE);
  PackedSlabJournalBlock *endBlock = (PackedSlabJournalBlock *) endData;

  SequenceNumber  head      = getUInt64LE(endBlock->header.fields.head);
  TailBlockOffset headIndex = getSlabJournalBlockOffset(journal, head);
  BlockCount      index     = headIndex;

  JournalPoint refCountsPoint   = referenceCounts->slabJournalPoint;
  JournalPoint lastEntryApplied = refCountsPoint;
  for (SequenceNumber sequence = head; sequence < tail; sequence++) {
    char *blockData = scrubber->journalData + (index * VDO_BLOCK_SIZE);
    PackedSlabJournalBlock *block  = (PackedSlabJournalBlock *) blockData;
    SlabJournalBlockHeader header;
    unpackSlabJournalBlockHeader(&block->header, &header);

    if ((header.nonce != slab->allocator->nonce)
        || (header.metadataType != VDO_METADATA_SLAB_JOURNAL)
        || (header.sequenceNumber != sequence)
        || (header.entryCount > journal->entriesPerBlock)
        || (header.hasBlockMapIncrements
            && (header.entryCount > journal->fullEntriesPerBlock))) {
      // The block is not what we expect it to be.
      logError("Slab journal block for slab %u was invalid",
               slab->slabNumber);
      abortScrubbing(scrubber, VDO_CORRUPT_JOURNAL);
      return;
    }

    int result = applyBlockEntries(block, header.entryCount, sequence, slab);
    if (result != VDO_SUCCESS) {
      abortScrubbing(scrubber, result);
      return;
    }

    lastEntryApplied.sequenceNumber = sequence;
    lastEntryApplied.entryCount     = header.entryCount - 1;
    index++;
    if (index == journal->size) {
      index = 0;
    }
  }

  // At the end of rebuild, the refCounts should be accurate to the end
  // of the journal we just applied.
  int result = ASSERT(!beforeJournalPoint(&lastEntryApplied, &refCountsPoint),
                      "Refcounts are not more accurate than the slab journal");
  if (result != VDO_SUCCESS) {
    abortScrubbing(scrubber, result);
    return;
  }

  // Save out the rebuilt reference blocks.
  prepareCompletion(completion, slabScrubbed, handleScrubberError,
                    completion->callbackThreadID, scrubber);
  startSlabAction(slab, ADMIN_STATE_SAVE_FOR_SCRUBBING, completion);
}

/**
 * Read the current slab's journal from disk now that it has been flushed.
 * This callback is registered in scrubNextSlab().
 *
 * @param completion  The scrubber's extent completion
 **/
static void startScrubbing(VDOCompletion *completion)
{
  SlabScrubber *scrubber = completion->parent;
  Slab         *slab     = scrubber->slab;
  if (getSummarizedCleanliness(slab->allocator->summary, slab->slabNumber)) {
    slabScrubbed(completion);
    return;
  }

  prepareCompletion(&scrubber->extent->completion, applyJournalEntries,
                    handleScrubberError, completion->callbackThreadID,
                    completion->parent);
  readMetadataExtent(scrubber->extent, slab->journalOrigin);
}

/**
 * Scrub the next slab if there is one.
 *
 * @param scrubber  The scrubber
 **/
static void scrubNextSlab(SlabScrubber *scrubber)
{
  // Note: this notify call is always safe only because scrubbing can only
  // be started when the VDO is quiescent.
  notifyAllWaiters(&scrubber->waiters, NULL, NULL);
  if (isReadOnly(scrubber->readOnlyNotifier)) {
    setCompletionResult(&scrubber->completion, VDO_READ_ONLY);
    finishScrubbing(scrubber);
    return;
  }

  Slab *slab = getNextSlab(scrubber);
  if ((slab == NULL)
      || (scrubber->highPriorityOnly
          && isRingEmpty(&scrubber->highPrioritySlabs))) {
    scrubber->highPriorityOnly = false;
    finishScrubbing(scrubber);
    return;
  }

  if (finishDraining(&scrubber->adminState)) {
    return;
  }

  unspliceRingNode(&slab->ringNode);
  scrubber->slab = slab;
  VDOCompletion *completion = extentAsCompletion(scrubber->extent);
  prepareCompletion(completion, startScrubbing,
                    handleScrubberError, scrubber->completion.callbackThreadID,
                    scrubber);
  startSlabAction(slab, ADMIN_STATE_SCRUBBING, completion);
}

/**********************************************************************/
void scrubSlabs(SlabScrubber *scrubber,
                void         *parent,
                VDOAction    *callback,
                VDOAction    *errorHandler)
{
  resumeIfQuiescent(&scrubber->adminState);
  ThreadID threadID = getCallbackThreadID();
  prepareCompletion(&scrubber->completion, callback, errorHandler, threadID,
                    parent);
  if (!hasSlabsToScrub(scrubber)) {
    finishScrubbing(scrubber);
    return;
  }

  scrubNextSlab(scrubber);
}

/**********************************************************************/
void scrubHighPrioritySlabs(SlabScrubber  *scrubber,
                            bool           scrubAtLeastOne,
                            VDOCompletion *parent,
                            VDOAction     *callback,
                            VDOAction     *errorHandler)
{
  if (scrubAtLeastOne && isRingEmpty(&scrubber->highPrioritySlabs)) {
    Slab *slab = getNextSlab(scrubber);
    if (slab != NULL) {
      registerSlabForScrubbing(scrubber, slab, true);
    }
  }
  scrubber->highPriorityOnly = true;
  scrubSlabs(scrubber, parent, callback, errorHandler);
}

/**********************************************************************/
void stopScrubbing(SlabScrubber *scrubber, VDOCompletion *parent)
{
  if (isQuiescent(&scrubber->adminState)) {
    completeCompletion(parent);
  } else {
    startDraining(&scrubber->adminState, ADMIN_STATE_SUSPENDING, parent, NULL);
  }
}

/**********************************************************************/
void resumeScrubbing(SlabScrubber *scrubber, VDOCompletion *parent)
{
  if (!hasSlabsToScrub(scrubber)) {
    completeCompletion(parent);
    return;
  }

  int result = resumeIfQuiescent(&scrubber->adminState);
  if (result != VDO_SUCCESS) {
    finishCompletion(parent, result);
    return;
  }

  scrubNextSlab(scrubber);
  completeCompletion(parent);
}

/**********************************************************************/
int enqueueCleanSlabWaiter(SlabScrubber *scrubber, Waiter *waiter)
{
  if (isReadOnly(scrubber->readOnlyNotifier)) {
    return VDO_READ_ONLY;
  }

  if (isQuiescent(&scrubber->adminState)) {
    return VDO_NO_SPACE;
  }

  return enqueueWaiter(&scrubber->waiters, waiter);
}

/**********************************************************************/
void dumpSlabScrubber(const SlabScrubber *scrubber)
{
  logInfo("slabScrubber slabCount %u waiters %zu %s%s",
          getScrubberSlabCount(scrubber),
          countWaiters(&scrubber->waiters),
          getAdminStateName(&scrubber->adminState),
          scrubber->highPriorityOnly ? ", highPriorityOnly " : "");
}