Blame vdo/base/slabJournalInternals.h

Packit Service 310c69
/*
Packit Service 310c69
 * Copyright (c) 2020 Red Hat, Inc.
Packit Service 310c69
 *
Packit Service 310c69
 * This program is free software; you can redistribute it and/or
Packit Service 310c69
 * modify it under the terms of the GNU General Public License
Packit Service 310c69
 * as published by the Free Software Foundation; either version 2
Packit Service 310c69
 * of the License, or (at your option) any later version.
Packit Service 310c69
 * 
Packit Service 310c69
 * This program is distributed in the hope that it will be useful,
Packit Service 310c69
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit Service 310c69
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
Packit Service 310c69
 * GNU General Public License for more details.
Packit Service 310c69
 * 
Packit Service 310c69
 * You should have received a copy of the GNU General Public License
Packit Service 310c69
 * along with this program; if not, write to the Free Software
Packit Service 310c69
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
Packit Service 310c69
 * 02110-1301, USA. 
Packit Service 310c69
 *
Packit Service 310c69
 * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/slabJournalInternals.h#8 $
Packit Service 310c69
 */
Packit Service 310c69
Packit Service 310c69
#ifndef SLAB_JOURNAL_INTERNALS_H
Packit Service 310c69
#define SLAB_JOURNAL_INTERNALS_H
Packit Service 310c69
Packit Service 310c69
#include "slabJournal.h"
Packit Service 310c69
Packit Service 310c69
#include "numeric.h"
Packit Service 310c69
Packit Service 310c69
#include "blockAllocatorInternals.h"
Packit Service 310c69
#include "blockMapEntry.h"
Packit Service 310c69
#include "journalPoint.h"
Packit Service 310c69
#include "slab.h"
Packit Service 310c69
#include "slabSummary.h"
Packit Service 310c69
#include "statistics.h"
Packit Service 310c69
#include "waitQueue.h"
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Slab journal blocks may have one of two formats, depending upon whether or
Packit Service 310c69
 * not any of the entries in the block are block map increments. Since the
Packit Service 310c69
 * steady state for a VDO is that all of the necessary block map pages will
Packit Service 310c69
 * be allocated, most slab journal blocks will have only data entries. Such
Packit Service 310c69
 * blocks can hold more entries, hence the two formats.
Packit Service 310c69
 **/
Packit Service 310c69
Packit Service 310c69
/** A single slab journal entry */
Packit Service 310c69
struct slabJournalEntry {
Packit Service 310c69
  SlabBlockNumber  sbn;
Packit Service 310c69
  JournalOperation operation;
Packit Service 310c69
};
Packit Service 310c69
Packit Service 310c69
/** A single slab journal entry in its on-disk form */
Packit Service 310c69
typedef union {
Packit Service 310c69
  struct __attribute__((packed)) {
Packit Service 310c69
    uint8_t offsetLow8;
Packit Service 310c69
    uint8_t offsetMid8;
Packit Service 310c69
Packit Service 310c69
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
Packit Service 310c69
    unsigned offsetHigh7 : 7;
Packit Service 310c69
    unsigned increment   : 1;
Packit Service 310c69
#else
Packit Service 310c69
    unsigned increment   : 1;
Packit Service 310c69
    unsigned offsetHigh7 : 7;
Packit Service 310c69
#endif
Packit Service 310c69
  } fields;
Packit Service 310c69
Packit Service 310c69
  // A raw view of the packed encoding.
Packit Service 310c69
  uint8_t raw[3];
Packit Service 310c69
Packit Service 310c69
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
Packit Service 310c69
  // This view is only valid on little-endian machines and is only present for
Packit Service 310c69
  // ease of directly examining packed entries in GDB.
Packit Service 310c69
  struct __attribute__((packed)) {
Packit Service 310c69
    unsigned offset    : 23;
Packit Service 310c69
    unsigned increment : 1;
Packit Service 310c69
  } littleEndian;
Packit Service 310c69
#endif
Packit Service 310c69
} __attribute__((packed)) PackedSlabJournalEntry;
Packit Service 310c69
Packit Service 310c69
/** The unpacked representation of the header of a slab journal block */
Packit Service 310c69
typedef struct {
Packit Service 310c69
  /** Sequence number for head of journal */
Packit Service 310c69
  SequenceNumber     head;
Packit Service 310c69
  /** Sequence number for this block */
Packit Service 310c69
  SequenceNumber     sequenceNumber;
Packit Service 310c69
  /** The nonce for a given VDO instance */
Packit Service 310c69
  Nonce              nonce;
Packit Service 310c69
  /** Recovery journal point for last entry */
Packit Service 310c69
  JournalPoint       recoveryPoint;
Packit Service 310c69
  /** Metadata type */
Packit Service 310c69
  VDOMetadataType    metadataType;
Packit Service 310c69
  /** Whether this block contains block map increments */
Packit Service 310c69
  bool               hasBlockMapIncrements;
Packit Service 310c69
  /** The number of entries in the block */
Packit Service 310c69
  JournalEntryCount  entryCount;
Packit Service 310c69
} SlabJournalBlockHeader;
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * The packed, on-disk representation of a slab journal block header.
Packit Service 310c69
 * All fields are kept in little-endian byte order.
Packit Service 310c69
 **/
Packit Service 310c69
typedef union __attribute__((packed)) {
Packit Service 310c69
  struct __attribute__((packed)) {
Packit Service 310c69
    /** 64-bit sequence number for head of journal */
Packit Service 310c69
    byte               head[8];
Packit Service 310c69
    /** 64-bit sequence number for this block */
Packit Service 310c69
    byte               sequenceNumber[8];
Packit Service 310c69
    /** Recovery journal point for last entry, packed into 64 bits */
Packit Service 310c69
    PackedJournalPoint recoveryPoint;
Packit Service 310c69
    /** The 64-bit nonce for a given VDO instance */
Packit Service 310c69
    byte               nonce[8];
Packit Service 310c69
    /** 8-bit metadata type (should always be two, for the slab journal) */
Packit Service 310c69
    uint8_t            metadataType;
Packit Service 310c69
    /** Whether this block contains block map increments */
Packit Service 310c69
    bool               hasBlockMapIncrements;
Packit Service 310c69
    /** 16-bit count of the entries encoded in the block */
Packit Service 310c69
    byte               entryCount[2];
Packit Service 310c69
  } fields;
Packit Service 310c69
Packit Service 310c69
  // A raw view of the packed encoding.
Packit Service 310c69
  uint8_t raw[8 + 8 + 8 + 8 + 1 + 1 + 2];
Packit Service 310c69
Packit Service 310c69
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
Packit Service 310c69
  // This view is only valid on little-endian machines and is only present for
Packit Service 310c69
  // ease of directly examining packed entries in GDB.
Packit Service 310c69
  struct __attribute__((packed)) {
Packit Service 310c69
    SequenceNumber     head;
Packit Service 310c69
    SequenceNumber     sequenceNumber;
Packit Service 310c69
    PackedJournalPoint recoveryPoint;
Packit Service 310c69
    Nonce              nonce;
Packit Service 310c69
    VDOMetadataType    metadataType;
Packit Service 310c69
    bool               hasBlockMapIncrements;
Packit Service 310c69
    JournalEntryCount  entryCount;
Packit Service 310c69
  } littleEndian;
Packit Service 310c69
#endif
Packit Service 310c69
} PackedSlabJournalBlockHeader;
Packit Service 310c69
Packit Service 310c69
enum {
Packit Service 310c69
  SLAB_JOURNAL_PAYLOAD_SIZE
Packit Service 310c69
    = VDO_BLOCK_SIZE - sizeof(PackedSlabJournalBlockHeader),
Packit Service 310c69
  SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK = (SLAB_JOURNAL_PAYLOAD_SIZE * 8) / 25,
Packit Service 310c69
  SLAB_JOURNAL_ENTRY_TYPES_SIZE = ((SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK - 1)
Packit Service 310c69
                                   / 8) + 1,
Packit Service 310c69
  SLAB_JOURNAL_ENTRIES_PER_BLOCK = (SLAB_JOURNAL_PAYLOAD_SIZE
Packit Service 310c69
                                    / sizeof(PackedSlabJournalEntry)),
Packit Service 310c69
};
Packit Service 310c69
Packit Service 310c69
/** The payload of a slab journal block which has block map increments */
Packit Service 310c69
typedef struct {
Packit Service 310c69
  /* The entries themselves */
Packit Service 310c69
  PackedSlabJournalEntry entries[SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK];
Packit Service 310c69
  /* The bit map indicating which entries are block map increments */
Packit Service 310c69
  byte                   entryTypes[SLAB_JOURNAL_ENTRY_TYPES_SIZE];
Packit Service 310c69
} __attribute__((packed)) FullSlabJournalEntries;
Packit Service 310c69
Packit Service 310c69
typedef union {
Packit Service 310c69
  /* Entries which include block map increments */
Packit Service 310c69
  FullSlabJournalEntries fullEntries;
Packit Service 310c69
  /* Entries which are only data updates */
Packit Service 310c69
  PackedSlabJournalEntry entries[SLAB_JOURNAL_ENTRIES_PER_BLOCK];
Packit Service 310c69
  /* Ensure the payload fills to the end of the block */
Packit Service 310c69
  byte                   space[SLAB_JOURNAL_PAYLOAD_SIZE];
Packit Service 310c69
} __attribute__((packed)) SlabJournalPayload;
Packit Service 310c69
Packit Service 310c69
typedef struct {
Packit Service 310c69
  PackedSlabJournalBlockHeader header;
Packit Service 310c69
  SlabJournalPayload           payload;
Packit Service 310c69
} __attribute__((packed)) PackedSlabJournalBlock;
Packit Service 310c69
Packit Service 310c69
typedef struct {
Packit Service 310c69
  uint16_t       count;
Packit Service 310c69
  SequenceNumber recoveryStart;
Packit Service 310c69
} JournalLock;
Packit Service 310c69
Packit Service 310c69
struct slabJournal {
Packit Service 310c69
  /** A waiter object for getting a VIO pool entry */
Packit Service 310c69
  Waiter                       resourceWaiter;
Packit Service 310c69
  /** A waiter object for updating the slab summary */
Packit Service 310c69
  Waiter                       slabSummaryWaiter;
Packit Service 310c69
  /** A waiter object for getting an extent with which to flush */
Packit Service 310c69
  Waiter                       flushWaiter;
Packit Service 310c69
  /** The queue of VIOs waiting to make an entry */
Packit Service 310c69
  WaitQueue                    entryWaiters;
Packit Service 310c69
  /** The parent slab reference of this journal */
Packit Service 310c69
  Slab                        *slab;
Packit Service 310c69
Packit Service 310c69
  /** Whether a tail block commit is pending */
Packit Service 310c69
  bool                         waitingToCommit;
Packit Service 310c69
  /** Whether the journal is updating the slab summary */
Packit Service 310c69
  bool                         updatingSlabSummary;
Packit Service 310c69
  /** Whether the journal is adding entries from the entryWaiters queue */
Packit Service 310c69
  bool                         addingEntries;
Packit Service 310c69
  /** Whether a partial write is in progress */
Packit Service 310c69
  bool                         partialWriteInProgress;
Packit Service 310c69
Packit Service 310c69
  /** The oldest block in the journal on disk */
Packit Service 310c69
  SequenceNumber               head;
Packit Service 310c69
  /** The oldest block in the journal which may not be reaped */
Packit Service 310c69
  SequenceNumber               unreapable;
Packit Service 310c69
  /** The end of the half-open interval of the active journal */
Packit Service 310c69
  SequenceNumber               tail;
Packit Service 310c69
  /** The next journal block to be committed */
Packit Service 310c69
  SequenceNumber               nextCommit;
Packit Service 310c69
  /** The tail sequence number that is written in the slab summary */
Packit Service 310c69
  SequenceNumber               summarized;
Packit Service 310c69
  /** The tail sequence number that was last summarized in slab summary */
Packit Service 310c69
  SequenceNumber               lastSummarized;
Packit Service 310c69
Packit Service 310c69
  /** The sequence number of the recovery journal lock */
Packit Service 310c69
  SequenceNumber               recoveryLock;
Packit Service 310c69
Packit Service 310c69
  /**
Packit Service 310c69
   * The number of entries which fit in a single block. Can't use the constant
Packit Service 310c69
   * because unit tests change this number.
Packit Service 310c69
   **/
Packit Service 310c69
  JournalEntryCount            entriesPerBlock;
Packit Service 310c69
  /**
Packit Service 310c69
   * The number of full entries which fit in a single block. Can't use the
Packit Service 310c69
   * constant because unit tests change this number.
Packit Service 310c69
   **/
Packit Service 310c69
  JournalEntryCount            fullEntriesPerBlock;
Packit Service 310c69
Packit Service 310c69
  /** The recovery journal of the VDO (slab journal holds locks on it) */
Packit Service 310c69
  RecoveryJournal             *recoveryJournal;
Packit Service 310c69
Packit Service 310c69
  /** The slab summary to update tail block location */
Packit Service 310c69
  SlabSummaryZone             *summary;
Packit Service 310c69
  /** The statistics shared by all slab journals in our physical zone */
Packit Service 310c69
  AtomicSlabJournalStatistics *events;
Packit Service 310c69
  /** A ring of the VIO pool entries for outstanding journal block writes */
Packit Service 310c69
  RingNode                     uncommittedBlocks;
Packit Service 310c69
Packit Service 310c69
  /**
Packit Service 310c69
   * The current tail block header state. This will be packed into
Packit Service 310c69
   * the block just before it is written.
Packit Service 310c69
   **/
Packit Service 310c69
  SlabJournalBlockHeader       tailHeader;
Packit Service 310c69
  /** A pointer to a block-sized buffer holding the packed block data */
Packit Service 310c69
  PackedSlabJournalBlock      *block;
Packit Service 310c69
Packit Service 310c69
  /** The number of blocks in the on-disk journal */
Packit Service 310c69
  BlockCount                   size;
Packit Service 310c69
  /** The number of blocks at which to start pushing reference blocks */
Packit Service 310c69
  BlockCount                   flushingThreshold;
Packit Service 310c69
  /** The number of blocks at which all reference blocks should be writing */
Packit Service 310c69
  BlockCount                   flushingDeadline;
Packit Service 310c69
  /** The number of blocks at which to wait for reference blocks to write */
Packit Service 310c69
  BlockCount                   blockingThreshold;
Packit Service 310c69
  /** The number of blocks at which to scrub the slab before coming online */
Packit Service 310c69
  BlockCount                   scrubbingThreshold;
Packit Service 310c69
Packit Service 310c69
  /** This node is for BlockAllocator to keep a queue of dirty journals */
Packit Service 310c69
  RingNode                     dirtyNode;
Packit Service 310c69
Packit Service 310c69
  /** The lock for the oldest unreaped block of the journal */
Packit Service 310c69
  JournalLock                 *reapLock;
Packit Service 310c69
  /** The locks for each on disk block */
Packit Service 310c69
  JournalLock                  locks[];
Packit Service 310c69
};
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Get the slab journal block offset of the given sequence number.
Packit Service 310c69
 *
Packit Service 310c69
 * @param journal   The slab journal
Packit Service 310c69
 * @param sequence  The sequence number
Packit Service 310c69
 *
Packit Service 310c69
 * @return the offset corresponding to the sequence number
Packit Service 310c69
 **/
Packit Service 310c69
__attribute__((warn_unused_result))
Packit Service 310c69
static inline TailBlockOffset
Packit Service 310c69
getSlabJournalBlockOffset(SlabJournal *journal, SequenceNumber sequence)
Packit Service 310c69
{
Packit Service 310c69
  return (sequence % journal->size);
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Encode a slab journal entry (exposed for unit tests).
Packit Service 310c69
 *
Packit Service 310c69
 * @param tailHeader  The unpacked header for the block
Packit Service 310c69
 * @param payload     The journal block payload to hold the entry
Packit Service 310c69
 * @param sbn         The slab block number of the entry to encode
Packit Service 310c69
 * @param operation   The type of the entry
Packit Service 310c69
 **/
Packit Service 310c69
void encodeSlabJournalEntry(SlabJournalBlockHeader *tailHeader,
Packit Service 310c69
                            SlabJournalPayload     *payload,
Packit Service 310c69
                            SlabBlockNumber         sbn,
Packit Service 310c69
                            JournalOperation        operation);
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Decode a slab journal entry.
Packit Service 310c69
 *
Packit Service 310c69
 * @param block       The journal block holding the entry
Packit Service 310c69
 * @param entryCount  The number of the entry
Packit Service 310c69
 *
Packit Service 310c69
 * @return The decoded entry
Packit Service 310c69
 **/
Packit Service 310c69
SlabJournalEntry decodeSlabJournalEntry(PackedSlabJournalBlock *block,
Packit Service 310c69
                                        JournalEntryCount       entryCount)
Packit Service 310c69
  __attribute__((warn_unused_result));
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Generate the packed encoding of a slab journal entry.
Packit Service 310c69
 *
Packit Service 310c69
 * @param packed       The entry into which to pack the values
Packit Service 310c69
 * @param sbn          The slab block number of the entry to encode
Packit Service 310c69
 * @param isIncrement  The increment flag
Packit Service 310c69
 **/
Packit Service 310c69
static inline void packSlabJournalEntry(PackedSlabJournalEntry *packed,
Packit Service 310c69
                                        SlabBlockNumber         sbn,
Packit Service 310c69
                                        bool                    isIncrement)
Packit Service 310c69
{
Packit Service 310c69
  packed->fields.offsetLow8  = (sbn & 0x0000FF);
Packit Service 310c69
  packed->fields.offsetMid8  = (sbn & 0x00FF00) >> 8;
Packit Service 310c69
  packed->fields.offsetHigh7 = (sbn & 0x7F0000) >> 16;
Packit Service 310c69
  packed->fields.increment   = isIncrement ? 1 : 0;
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Decode the packed representation of a slab journal entry.
Packit Service 310c69
 *
Packit Service 310c69
 * @param packed  The packed entry to decode
Packit Service 310c69
 *
Packit Service 310c69
 * @return The decoded slab journal entry
Packit Service 310c69
 **/
Packit Service 310c69
__attribute__((warn_unused_result))
Packit Service 310c69
static inline
Packit Service 310c69
SlabJournalEntry unpackSlabJournalEntry(const PackedSlabJournalEntry *packed)
Packit Service 310c69
{
Packit Service 310c69
  SlabJournalEntry entry;
Packit Service 310c69
  entry.sbn = packed->fields.offsetHigh7;
Packit Service 310c69
  entry.sbn <<= 8;
Packit Service 310c69
  entry.sbn |= packed->fields.offsetMid8;
Packit Service 310c69
  entry.sbn <<= 8;
Packit Service 310c69
  entry.sbn |= packed->fields.offsetLow8;
Packit Service 310c69
  entry.operation
Packit Service 310c69
    = (packed->fields.increment ? DATA_INCREMENT : DATA_DECREMENT);
Packit Service 310c69
  return entry;
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Generate the packed representation of a slab block header.
Packit Service 310c69
 *
Packit Service 310c69
 * @param header  The header containing the values to encode
Packit Service 310c69
 * @param packed  The header into which to pack the values
Packit Service 310c69
 **/
Packit Service 310c69
static inline
Packit Service 310c69
void packSlabJournalBlockHeader(const SlabJournalBlockHeader *header,
Packit Service 310c69
                                PackedSlabJournalBlockHeader *packed)
Packit Service 310c69
{
Packit Service 310c69
  storeUInt64LE(packed->fields.head,           header->head);
Packit Service 310c69
  storeUInt64LE(packed->fields.sequenceNumber, header->sequenceNumber);
Packit Service 310c69
  storeUInt64LE(packed->fields.nonce,          header->nonce);
Packit Service 310c69
  storeUInt16LE(packed->fields.entryCount,     header->entryCount);
Packit Service 310c69
Packit Service 310c69
  packed->fields.metadataType          = header->metadataType;
Packit Service 310c69
  packed->fields.hasBlockMapIncrements = header->hasBlockMapIncrements;
Packit Service 310c69
Packit Service 310c69
  packJournalPoint(&header->recoveryPoint, &packed->fields.recoveryPoint);
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
/**
Packit Service 310c69
 * Decode the packed representation of a slab block header.
Packit Service 310c69
 *
Packit Service 310c69
 * @param packed  The packed header to decode
Packit Service 310c69
 * @param header  The header into which to unpack the values
Packit Service 310c69
 **/
Packit Service 310c69
static inline
Packit Service 310c69
void unpackSlabJournalBlockHeader(const PackedSlabJournalBlockHeader *packed,
Packit Service 310c69
                                  SlabJournalBlockHeader             *header)
Packit Service 310c69
{
Packit Service 310c69
  *header = (SlabJournalBlockHeader) {
Packit Service 310c69
    .head                  = getUInt64LE(packed->fields.head),
Packit Service 310c69
    .sequenceNumber        = getUInt64LE(packed->fields.sequenceNumber),
Packit Service 310c69
    .nonce                 = getUInt64LE(packed->fields.nonce),
Packit Service 310c69
    .entryCount            = getUInt16LE(packed->fields.entryCount),
Packit Service 310c69
    .metadataType          = packed->fields.metadataType,
Packit Service 310c69
    .hasBlockMapIncrements = packed->fields.hasBlockMapIncrements,
Packit Service 310c69
  };
Packit Service 310c69
  unpackJournalPoint(&packed->fields.recoveryPoint, &header->recoveryPoint);
Packit Service 310c69
}
Packit Service 310c69
Packit Service 310c69
#endif // SLAB_JOURNAL_INTERNALS_H