Blob Blame History Raw
/*
 * Copyright (c) 2020 Red Hat, Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA. 
 *
 * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/forest.c#8 $
 */

#include "forest.h"

#include "logger.h"
#include "memoryAlloc.h"

#include "blockMap.h"
#include "blockMapInternals.h"
#include "blockMapPage.h"
#include "blockMapTree.h"
#include "blockMapTreeInternals.h"
#include "constants.h"
#include "dirtyLists.h"
#include "forest.h"
#include "numUtils.h"
#include "recoveryJournal.h"
#include "slabDepot.h"
#include "slabJournal.h"
#include "types.h"
#include "vdoInternal.h"
#include "vio.h"
#include "vioPool.h"

enum {
  BLOCK_MAP_VIO_POOL_SIZE = 64,
};

typedef struct {
  TreePage *levels[BLOCK_MAP_TREE_HEIGHT];
} BlockMapTreeSegment;

typedef struct blockMapTree {
  BlockMapTreeSegment *segments;
} BlockMapTree;

struct forest {
  BlockMap      *map;
  size_t         segments;
  Boundary      *boundaries;
  TreePage     **pages;
  BlockMapTree   trees[];
};

typedef struct {
  PageNumber pageIndex;
  SlotNumber slot;
} CursorLevel;

typedef struct cursors Cursors;

typedef struct {
  Waiter        waiter;
  BlockMapTree *tree;
  Height        height;
  Cursors      *parent;
  Boundary      boundary;
  CursorLevel   levels[BLOCK_MAP_TREE_HEIGHT];
  VIOPoolEntry *vioPoolEntry;
} Cursor;

struct cursors {
  BlockMap         *map;
  BlockMapTreeZone *zone;
  VIOPool          *pool;
  EntryCallback    *entryCallback;
  VDOCompletion    *parent;
  RootCount         activeRoots;
  Cursor            cursors[];
};

/**********************************************************************/
TreePage *getTreePageByIndex(Forest       *forest,
                             RootCount     rootIndex,
                             Height        height,
                             PageNumber    pageIndex)
{
  PageNumber offset = 0;
  for (size_t segment = 0; segment < forest->segments; segment++) {
    PageNumber border = forest->boundaries[segment].levels[height - 1];
    if (pageIndex < border) {
      BlockMapTree *tree = &forest->trees[rootIndex];
      return &(tree->segments[segment].levels[height - 1][pageIndex - offset]);
    }
    offset = border;
  }

  return NULL;
}

/**
 * Compute the number of pages which must be allocated at each level in order
 * to grow the forest to a new number of entries.
 *
 * @param [in]  rootCount      The number of roots
 * @param [in]  flatPageCount  The number of flat block map pages
 * @param [in]  oldSizes       The current size of the forest at each level
 * @param [in]  entries        The new number of entries the block map must
 *                             address
 * @param [out] newSizes       The new size of the forest at each level
 *
 * @return The total number of non-leaf pages required
 **/
static BlockCount computeNewPages(RootCount   rootCount,
                                  BlockCount  flatPageCount,
                                  Boundary   *oldSizes,
                                  BlockCount  entries,
                                  Boundary   *newSizes)
{
  PageCount leafPages
    = maxPageCount(computeBlockMapPageCount(entries) - flatPageCount, 1);
  PageCount  levelSize  = computeBucketCount(leafPages, rootCount);
  BlockCount totalPages = 0;
  for (Height height = 0; height < BLOCK_MAP_TREE_HEIGHT; height++) {
    levelSize = computeBucketCount(levelSize, BLOCK_MAP_ENTRIES_PER_PAGE);
    newSizes->levels[height] = levelSize;
    BlockCount newPages = levelSize;
    if (oldSizes != NULL) {
      newPages -= oldSizes->levels[height];
    }
    totalPages += (newPages * rootCount);
  }

  return totalPages;
}

/**********************************************************************/
static int makeSegment(Forest      *oldForest,
                       BlockCount   newPages,
                       Boundary    *newBoundary,
                       Forest      *forest)
{
  size_t index     = (oldForest == NULL) ? 0 : oldForest->segments;
  forest->segments = index + 1;

  int result = ALLOCATE(forest->segments, Boundary, "forest boundary array",
                        &forest->boundaries);
  if (result != VDO_SUCCESS) {
    return result;
  }

  result = ALLOCATE(forest->segments, TreePage *, "forest page pointers",
                    &forest->pages);
  if (result != VDO_SUCCESS) {
    return result;
  }

  result = ALLOCATE(newPages, TreePage, "new forest pages",
                    &forest->pages[index]);
  if (result != VDO_SUCCESS) {
    return result;
  }

  if (index > 0) {
    memcpy(forest->boundaries, oldForest->boundaries,
           index * sizeof(Boundary));
    memcpy(forest->pages, oldForest->pages, index * sizeof(TreePage *));
  }

  memcpy(&(forest->boundaries[index]), newBoundary, sizeof(Boundary));

  PageCount segmentSizes[BLOCK_MAP_TREE_HEIGHT];
  for (Height height = 0; height < BLOCK_MAP_TREE_HEIGHT; height++) {
    segmentSizes[height] = newBoundary->levels[height];
    if (index > 0) {
      segmentSizes[height] -= oldForest->boundaries[index - 1].levels[height];
    }
  }

  TreePage *pagePtr = forest->pages[index];
  for (RootCount root = 0; root < forest->map->rootCount; root++) {
    BlockMapTree *tree = &(forest->trees[root]);
    int result = ALLOCATE(forest->segments, BlockMapTreeSegment,
                          "tree root segments", &tree->segments);
    if (result != VDO_SUCCESS) {
      return result;
    }

    if (index > 0) {
      memcpy(tree->segments, oldForest->trees[root].segments,
             index * sizeof(BlockMapTreeSegment));
    }

    BlockMapTreeSegment *segment = &(tree->segments[index]);
    for (Height height = 0; height < BLOCK_MAP_TREE_HEIGHT; height++) {
      if (segmentSizes[height] == 0) {
        continue;
      }

      segment->levels[height] = pagePtr;
      if (height == (BLOCK_MAP_TREE_HEIGHT - 1)) {
        // Record the root.
        BlockMapPage *page = formatBlockMapPage(pagePtr->pageBuffer,
                                                forest->map->nonce,
                                                INVALID_PBN, true);
        page->entries[0] = packPBN(forest->map->rootOrigin + root,
                                   MAPPING_STATE_UNCOMPRESSED);
      }
      pagePtr += segmentSizes[height];
    }
  }

  return VDO_SUCCESS;
}

/**********************************************************************/
static void deforest(Forest *forest, size_t firstPageSegment)
{
  if (forest->pages != NULL) {
    for (size_t segment = firstPageSegment; segment < forest->segments;
         segment++) {
      FREE(forest->pages[segment]);
    }
    FREE(forest->pages);
  }

  for (RootCount root = 0; root < forest->map->rootCount; root++) {
    BlockMapTree *tree = &(forest->trees[root]);
    FREE(tree->segments);
  }

  FREE(forest->boundaries);
  FREE(forest);
}

/**********************************************************************/
int makeForest(BlockMap *map, BlockCount entries)
{
  STATIC_ASSERT(offsetof(TreePage, waiter) == 0);

  Forest   *oldForest   = map->forest;
  Boundary *oldBoundary = NULL;
  if (oldForest != NULL) {
    oldBoundary = &(oldForest->boundaries[oldForest->segments - 1]);
  }

  Boundary newBoundary;
  BlockCount newPages = computeNewPages(map->rootCount, map->flatPageCount,
                                        oldBoundary, entries, &newBoundary);
  if (newPages == 0) {
    map->nextEntryCount = entries;
    return VDO_SUCCESS;
  }

  Forest *forest;
  int result = ALLOCATE_EXTENDED(Forest, map->rootCount, BlockMapTree,
                                 __func__, &forest);
  if (result != VDO_SUCCESS) {
    return result;
  }

  forest->map = map;
  result = makeSegment(oldForest, newPages, &newBoundary, forest);
  if (result != VDO_SUCCESS) {
    deforest(forest, forest->segments - 1);
    return result;
  }

  map->nextForest     = forest;
  map->nextEntryCount = entries;
  return VDO_SUCCESS;
}

/**********************************************************************/
void freeForest(Forest **forestPtr)
{
  Forest *forest = *forestPtr;
  if (forest == NULL) {
    return;
  }

  deforest(forest, 0);
  *forestPtr = NULL;
}

/**********************************************************************/
void abandonForest(BlockMap *map)
{
  Forest *forest = map->nextForest;
  map->nextForest = NULL;
  if (forest != NULL) {
    deforest(forest, forest->segments - 1);
  }

  map->nextEntryCount = 0;
}

/**********************************************************************/
void replaceForest(BlockMap *map)
{
  if (map->nextForest != NULL) {
    if (map->forest != NULL) {
      deforest(map->forest, map->forest->segments);
    }
    map->forest     = map->nextForest;
    map->nextForest = NULL;
  }

  map->entryCount     = map->nextEntryCount;
  map->nextEntryCount = 0;
}

/**
 * Finish the traversal of a single tree. If it was the last cursor, finish
 * the traversal.
 *
 * @param cursor  The cursor doing the traversal
 **/
static void finishCursor(Cursor *cursor)
{
  Cursors *cursors = cursor->parent;
  returnVIOToPool(cursors->pool, cursor->vioPoolEntry);
  if (--cursors->activeRoots > 0) {
    return;
  }

  VDOCompletion *parent = cursors->parent;
  FREE(cursors);

  finishCompletion(parent, VDO_SUCCESS);
}

/**********************************************************************/
static void traverse(Cursor *cursor);

/**
 * Continue traversing a block map tree.
 *
 * @param completion  The VIO doing a read or write
 **/
static void continueTraversal(VDOCompletion *completion)
{
  VIOPoolEntry *poolEntry = completion->parent;
  Cursor       *cursor    = poolEntry->parent;
  traverse(cursor);
}

/**
 * Continue traversing a block map tree now that a page has been loaded.
 *
 * @param completion  The VIO doing the read
 **/
static void finishTraversalLoad(VDOCompletion *completion)
{
  VIOPoolEntry *entry  = completion->parent;
  Cursor       *cursor = entry->parent;
  Height        height = cursor->height;
  CursorLevel  *level  = &cursor->levels[height];

  TreePage     *treePage
    = &(cursor->tree->segments[0].levels[height][level->pageIndex]);
  BlockMapPage *page = (BlockMapPage *) treePage->pageBuffer;
  copyValidPage(entry->buffer, cursor->parent->map->nonce,
                entry->vio->physical, page);
  traverse(cursor);
}

/**
 * Traverse a single block map tree. This is the recursive heart of the
 * traversal process.
 *
 * @param cursor  The cursor doing the traversal
 **/
static void traverse(Cursor *cursor)
{
  for (; cursor->height < BLOCK_MAP_TREE_HEIGHT; cursor->height++) {
    Height       height = cursor->height;
    CursorLevel *level  = &cursor->levels[height];
    TreePage *treePage
      = &(cursor->tree->segments[0].levels[height][level->pageIndex]);
    BlockMapPage *page = (BlockMapPage *) treePage->pageBuffer;
    if (!isBlockMapPageInitialized(page)) {
      continue;
    }

    for (; level->slot < BLOCK_MAP_ENTRIES_PER_PAGE; level->slot++) {
      DataLocation location = unpackBlockMapEntry(&page->entries[level->slot]);
      if (!isValidLocation(&location)) {
        // This entry is invalid, so remove it from the page.
        page->entries[level->slot]
          = packPBN(ZERO_BLOCK, MAPPING_STATE_UNMAPPED);
        writeTreePage(treePage, cursor->parent->zone);
        continue;
      }

      if (!isMappedLocation(&location)) {
        continue;
      }

      PageNumber entryIndex
        = (BLOCK_MAP_ENTRIES_PER_PAGE * level->pageIndex) + level->slot;

      // Erase mapped entries past the end of the logical space.
      if (entryIndex >= cursor->boundary.levels[height]) {
        page->entries[level->slot]
          = packPBN(ZERO_BLOCK, MAPPING_STATE_UNMAPPED);
        writeTreePage(treePage, cursor->parent->zone);
        continue;
      }

      if (cursor->height < BLOCK_MAP_TREE_HEIGHT - 1) {
        int result = cursor->parent->entryCallback(location.pbn,
                                                   cursor->parent->parent);
        if (result != VDO_SUCCESS) {
          page->entries[level->slot]
            = packPBN(ZERO_BLOCK, MAPPING_STATE_UNMAPPED);
          writeTreePage(treePage, cursor->parent->zone);
          continue;
        }
      }

      if (cursor->height == 0) {
        continue;
      }

      cursor->height--;
      CursorLevel *nextLevel = &cursor->levels[cursor->height];
      nextLevel->pageIndex   = entryIndex;
      nextLevel->slot        = 0;
      level->slot++;
      launchReadMetadataVIO(cursor->vioPoolEntry->vio, location.pbn,
                            finishTraversalLoad, continueTraversal);
      return;
    }
  }

  finishCursor(cursor);
}

/**
 * Start traversing a single block map tree now that the Cursor has a VIO with
 * which to load pages.
 *
 * <p>Implements WaiterCallback.
 *
 * @param waiter   The Cursor
 * @param context  The VIOPoolEntry just acquired
 **/
static void launchCursor(Waiter *waiter, void *context)
{
  STATIC_ASSERT(offsetof(Cursor, waiter) == 0);
  Cursor *cursor               = (Cursor *) waiter;
  cursor->vioPoolEntry         = (VIOPoolEntry *) context;
  cursor->vioPoolEntry->parent = cursor;
  vioAsCompletion(cursor->vioPoolEntry->vio)->callbackThreadID
    = cursor->parent->zone->mapZone->threadID;
  traverse(cursor);
}

/**
 * Compute the number of pages used at each level of the given root's tree.
 *
 * @param map        The block map
 * @param rootIndex  The index of the root to measure
 *
 * @return The list of page counts as a Boundary
 **/
static Boundary computeBoundary(BlockMap *map, RootCount rootIndex)
{
  PageCount leafPages     = computeBlockMapPageCount(map->entryCount);
  PageCount treeLeafPages = leafPages - map->flatPageCount;

  /*
   * Compute the leaf pages for this root. If the number of leaf pages does
   * not distribute evenly, we must determine if this root gets an extra page.
   * Extra pages are assigned to roots starting at firstTreeRoot and going up.
   */
  PageCount firstTreeRoot = map->flatPageCount % map->rootCount;
  PageCount lastTreeRoot  = (leafPages - 1) % map->rootCount;

  PageCount levelPages = treeLeafPages / map->rootCount;
  if (inCyclicRange(firstTreeRoot, rootIndex, lastTreeRoot, map->rootCount)) {
    levelPages++;
  }

  Boundary boundary;
  for (Height height = 0; height < BLOCK_MAP_TREE_HEIGHT - 1; height++) {
    boundary.levels[height] = levelPages;
    levelPages = computeBucketCount(levelPages, BLOCK_MAP_ENTRIES_PER_PAGE);
  }

  // The root node always exists, even if the root is otherwise unused.
  boundary.levels[BLOCK_MAP_TREE_HEIGHT - 1] = 1;

  return boundary;
}

/**********************************************************************/
void traverseForest(BlockMap      *map,
                    EntryCallback *entryCallback,
                    VDOCompletion *parent)
{
  if (computeBlockMapPageCount(map->entryCount) <= map->flatPageCount) {
    // There are no tree pages, so there's nothing to do.
    finishCompletion(parent, VDO_SUCCESS);
    return;
  }

  Cursors *cursors;
  int result = ALLOCATE_EXTENDED(Cursors, map->rootCount, Cursor, __func__,
                                 &cursors);
  if (result != VDO_SUCCESS) {
    finishCompletion(parent, result);
    return;
  }

  cursors->map           = map;
  cursors->zone          = &(getBlockMapZone(map, 0)->treeZone);
  cursors->pool          = cursors->zone->vioPool;
  cursors->entryCallback = entryCallback;
  cursors->parent        = parent;
  cursors->activeRoots   = map->rootCount;
  for (RootCount root = 0; root < map->rootCount; root++) {
    Cursor *cursor = &cursors->cursors[root];
    *cursor = (Cursor) {
      .tree     = &map->forest->trees[root],
      .height   = BLOCK_MAP_TREE_HEIGHT - 1,
      .parent   = cursors,
      .boundary = computeBoundary(map, root),
    };

    cursor->waiter.callback = launchCursor;
    acquireVIOFromPool(cursors->pool, &cursor->waiter);
  };
}

/**********************************************************************/
BlockCount computeForestSize(BlockCount logicalBlocks, RootCount rootCount)
{
  Boundary newSizes;
  BlockCount approximateNonLeaves
    = computeNewPages(rootCount, 0, NULL, logicalBlocks, &newSizes);

  // Exclude the tree roots since those aren't allocated from slabs,
  // and also exclude the super-roots, which only exist in memory.
  approximateNonLeaves
    -= rootCount * (newSizes.levels[BLOCK_MAP_TREE_HEIGHT - 2]
                    + newSizes.levels[BLOCK_MAP_TREE_HEIGHT - 1]);

  BlockCount approximateLeaves
    = computeBlockMapPageCount(logicalBlocks - approximateNonLeaves);

  // This can be a slight over-estimate since the tree will never have to
  // address these blocks, so it might be a tiny bit smaller.
  return (approximateNonLeaves + approximateLeaves);
}