/* * Copyright (c) 2020 Red Hat, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/base/slabJournalInternals.h#8 $ */ #ifndef SLAB_JOURNAL_INTERNALS_H #define SLAB_JOURNAL_INTERNALS_H #include "slabJournal.h" #include "numeric.h" #include "blockAllocatorInternals.h" #include "blockMapEntry.h" #include "journalPoint.h" #include "slab.h" #include "slabSummary.h" #include "statistics.h" #include "waitQueue.h" /** * Slab journal blocks may have one of two formats, depending upon whether or * not any of the entries in the block are block map increments. Since the * steady state for a VDO is that all of the necessary block map pages will * be allocated, most slab journal blocks will have only data entries. Such * blocks can hold more entries, hence the two formats. **/ /** A single slab journal entry */ struct slabJournalEntry { SlabBlockNumber sbn; JournalOperation operation; }; /** A single slab journal entry in its on-disk form */ typedef union { struct __attribute__((packed)) { uint8_t offsetLow8; uint8_t offsetMid8; #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ unsigned offsetHigh7 : 7; unsigned increment : 1; #else unsigned increment : 1; unsigned offsetHigh7 : 7; #endif } fields; // A raw view of the packed encoding. uint8_t raw[3]; #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ // This view is only valid on little-endian machines and is only present for // ease of directly examining packed entries in GDB. struct __attribute__((packed)) { unsigned offset : 23; unsigned increment : 1; } littleEndian; #endif } __attribute__((packed)) PackedSlabJournalEntry; /** The unpacked representation of the header of a slab journal block */ typedef struct { /** Sequence number for head of journal */ SequenceNumber head; /** Sequence number for this block */ SequenceNumber sequenceNumber; /** The nonce for a given VDO instance */ Nonce nonce; /** Recovery journal point for last entry */ JournalPoint recoveryPoint; /** Metadata type */ VDOMetadataType metadataType; /** Whether this block contains block map increments */ bool hasBlockMapIncrements; /** The number of entries in the block */ JournalEntryCount entryCount; } SlabJournalBlockHeader; /** * The packed, on-disk representation of a slab journal block header. * All fields are kept in little-endian byte order. **/ typedef union __attribute__((packed)) { struct __attribute__((packed)) { /** 64-bit sequence number for head of journal */ byte head[8]; /** 64-bit sequence number for this block */ byte sequenceNumber[8]; /** Recovery journal point for last entry, packed into 64 bits */ PackedJournalPoint recoveryPoint; /** The 64-bit nonce for a given VDO instance */ byte nonce[8]; /** 8-bit metadata type (should always be two, for the slab journal) */ uint8_t metadataType; /** Whether this block contains block map increments */ bool hasBlockMapIncrements; /** 16-bit count of the entries encoded in the block */ byte entryCount[2]; } fields; // A raw view of the packed encoding. uint8_t raw[8 + 8 + 8 + 8 + 1 + 1 + 2]; #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ // This view is only valid on little-endian machines and is only present for // ease of directly examining packed entries in GDB. struct __attribute__((packed)) { SequenceNumber head; SequenceNumber sequenceNumber; PackedJournalPoint recoveryPoint; Nonce nonce; VDOMetadataType metadataType; bool hasBlockMapIncrements; JournalEntryCount entryCount; } littleEndian; #endif } PackedSlabJournalBlockHeader; enum { SLAB_JOURNAL_PAYLOAD_SIZE = VDO_BLOCK_SIZE - sizeof(PackedSlabJournalBlockHeader), SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK = (SLAB_JOURNAL_PAYLOAD_SIZE * 8) / 25, SLAB_JOURNAL_ENTRY_TYPES_SIZE = ((SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK - 1) / 8) + 1, SLAB_JOURNAL_ENTRIES_PER_BLOCK = (SLAB_JOURNAL_PAYLOAD_SIZE / sizeof(PackedSlabJournalEntry)), }; /** The payload of a slab journal block which has block map increments */ typedef struct { /* The entries themselves */ PackedSlabJournalEntry entries[SLAB_JOURNAL_FULL_ENTRIES_PER_BLOCK]; /* The bit map indicating which entries are block map increments */ byte entryTypes[SLAB_JOURNAL_ENTRY_TYPES_SIZE]; } __attribute__((packed)) FullSlabJournalEntries; typedef union { /* Entries which include block map increments */ FullSlabJournalEntries fullEntries; /* Entries which are only data updates */ PackedSlabJournalEntry entries[SLAB_JOURNAL_ENTRIES_PER_BLOCK]; /* Ensure the payload fills to the end of the block */ byte space[SLAB_JOURNAL_PAYLOAD_SIZE]; } __attribute__((packed)) SlabJournalPayload; typedef struct { PackedSlabJournalBlockHeader header; SlabJournalPayload payload; } __attribute__((packed)) PackedSlabJournalBlock; typedef struct { uint16_t count; SequenceNumber recoveryStart; } JournalLock; struct slabJournal { /** A waiter object for getting a VIO pool entry */ Waiter resourceWaiter; /** A waiter object for updating the slab summary */ Waiter slabSummaryWaiter; /** A waiter object for getting an extent with which to flush */ Waiter flushWaiter; /** The queue of VIOs waiting to make an entry */ WaitQueue entryWaiters; /** The parent slab reference of this journal */ Slab *slab; /** Whether a tail block commit is pending */ bool waitingToCommit; /** Whether the journal is updating the slab summary */ bool updatingSlabSummary; /** Whether the journal is adding entries from the entryWaiters queue */ bool addingEntries; /** Whether a partial write is in progress */ bool partialWriteInProgress; /** The oldest block in the journal on disk */ SequenceNumber head; /** The oldest block in the journal which may not be reaped */ SequenceNumber unreapable; /** The end of the half-open interval of the active journal */ SequenceNumber tail; /** The next journal block to be committed */ SequenceNumber nextCommit; /** The tail sequence number that is written in the slab summary */ SequenceNumber summarized; /** The tail sequence number that was last summarized in slab summary */ SequenceNumber lastSummarized; /** The sequence number of the recovery journal lock */ SequenceNumber recoveryLock; /** * The number of entries which fit in a single block. Can't use the constant * because unit tests change this number. **/ JournalEntryCount entriesPerBlock; /** * The number of full entries which fit in a single block. Can't use the * constant because unit tests change this number. **/ JournalEntryCount fullEntriesPerBlock; /** The recovery journal of the VDO (slab journal holds locks on it) */ RecoveryJournal *recoveryJournal; /** The slab summary to update tail block location */ SlabSummaryZone *summary; /** The statistics shared by all slab journals in our physical zone */ AtomicSlabJournalStatistics *events; /** A ring of the VIO pool entries for outstanding journal block writes */ RingNode uncommittedBlocks; /** * The current tail block header state. This will be packed into * the block just before it is written. **/ SlabJournalBlockHeader tailHeader; /** A pointer to a block-sized buffer holding the packed block data */ PackedSlabJournalBlock *block; /** The number of blocks in the on-disk journal */ BlockCount size; /** The number of blocks at which to start pushing reference blocks */ BlockCount flushingThreshold; /** The number of blocks at which all reference blocks should be writing */ BlockCount flushingDeadline; /** The number of blocks at which to wait for reference blocks to write */ BlockCount blockingThreshold; /** The number of blocks at which to scrub the slab before coming online */ BlockCount scrubbingThreshold; /** This node is for BlockAllocator to keep a queue of dirty journals */ RingNode dirtyNode; /** The lock for the oldest unreaped block of the journal */ JournalLock *reapLock; /** The locks for each on disk block */ JournalLock locks[]; }; /** * Get the slab journal block offset of the given sequence number. * * @param journal The slab journal * @param sequence The sequence number * * @return the offset corresponding to the sequence number **/ __attribute__((warn_unused_result)) static inline TailBlockOffset getSlabJournalBlockOffset(SlabJournal *journal, SequenceNumber sequence) { return (sequence % journal->size); } /** * Encode a slab journal entry (exposed for unit tests). * * @param tailHeader The unpacked header for the block * @param payload The journal block payload to hold the entry * @param sbn The slab block number of the entry to encode * @param operation The type of the entry **/ void encodeSlabJournalEntry(SlabJournalBlockHeader *tailHeader, SlabJournalPayload *payload, SlabBlockNumber sbn, JournalOperation operation); /** * Decode a slab journal entry. * * @param block The journal block holding the entry * @param entryCount The number of the entry * * @return The decoded entry **/ SlabJournalEntry decodeSlabJournalEntry(PackedSlabJournalBlock *block, JournalEntryCount entryCount) __attribute__((warn_unused_result)); /** * Generate the packed encoding of a slab journal entry. * * @param packed The entry into which to pack the values * @param sbn The slab block number of the entry to encode * @param isIncrement The increment flag **/ static inline void packSlabJournalEntry(PackedSlabJournalEntry *packed, SlabBlockNumber sbn, bool isIncrement) { packed->fields.offsetLow8 = (sbn & 0x0000FF); packed->fields.offsetMid8 = (sbn & 0x00FF00) >> 8; packed->fields.offsetHigh7 = (sbn & 0x7F0000) >> 16; packed->fields.increment = isIncrement ? 1 : 0; } /** * Decode the packed representation of a slab journal entry. * * @param packed The packed entry to decode * * @return The decoded slab journal entry **/ __attribute__((warn_unused_result)) static inline SlabJournalEntry unpackSlabJournalEntry(const PackedSlabJournalEntry *packed) { SlabJournalEntry entry; entry.sbn = packed->fields.offsetHigh7; entry.sbn <<= 8; entry.sbn |= packed->fields.offsetMid8; entry.sbn <<= 8; entry.sbn |= packed->fields.offsetLow8; entry.operation = (packed->fields.increment ? DATA_INCREMENT : DATA_DECREMENT); return entry; } /** * Generate the packed representation of a slab block header. * * @param header The header containing the values to encode * @param packed The header into which to pack the values **/ static inline void packSlabJournalBlockHeader(const SlabJournalBlockHeader *header, PackedSlabJournalBlockHeader *packed) { storeUInt64LE(packed->fields.head, header->head); storeUInt64LE(packed->fields.sequenceNumber, header->sequenceNumber); storeUInt64LE(packed->fields.nonce, header->nonce); storeUInt16LE(packed->fields.entryCount, header->entryCount); packed->fields.metadataType = header->metadataType; packed->fields.hasBlockMapIncrements = header->hasBlockMapIncrements; packJournalPoint(&header->recoveryPoint, &packed->fields.recoveryPoint); } /** * Decode the packed representation of a slab block header. * * @param packed The packed header to decode * @param header The header into which to unpack the values **/ static inline void unpackSlabJournalBlockHeader(const PackedSlabJournalBlockHeader *packed, SlabJournalBlockHeader *header) { *header = (SlabJournalBlockHeader) { .head = getUInt64LE(packed->fields.head), .sequenceNumber = getUInt64LE(packed->fields.sequenceNumber), .nonce = getUInt64LE(packed->fields.nonce), .entryCount = getUInt16LE(packed->fields.entryCount), .metadataType = packed->fields.metadataType, .hasBlockMapIncrements = packed->fields.hasBlockMapIncrements, }; unpackJournalPoint(&packed->fields.recoveryPoint, &header->recoveryPoint); } #endif // SLAB_JOURNAL_INTERNALS_H