/* * Copyright (c) 2020 Red Hat, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * $Id: //eng/vdo-releases/aluminum/src/c++/vdo/kernel/kernelLayer.h#18 $ */ #ifndef KERNELLAYER_H #define KERNELLAYER_H #include #include "atomic.h" #include "constants.h" #include "flush.h" #include "intMap.h" #include "physicalLayer.h" #include "ringNode.h" #include "volumeGeometry.h" #include "waitQueue.h" #include "batchProcessor.h" #include "bufferPool.h" #include "deadlockQueue.h" #include "deviceConfig.h" #include "histogram.h" #include "kernelStatistics.h" #include "kernelTypes.h" #include "kernelVDO.h" #include "ktrace.h" #include "limiter.h" #include "statistics.h" #include "workQueue.h" enum { VDO_SECTORS_PER_BLOCK = (VDO_BLOCK_SIZE >> SECTOR_SHIFT) }; typedef enum { LAYER_SIMPLE_THINGS_INITIALIZED, LAYER_BUFFER_POOLS_INITIALIZED, LAYER_REQUEST_QUEUE_INITIALIZED, LAYER_CPU_QUEUE_INITIALIZED, LAYER_BIO_ACK_QUEUE_INITIALIZED, LAYER_BIO_DATA_INITIALIZED, LAYER_STARTING, LAYER_RUNNING, LAYER_SUSPENDED, LAYER_STOPPING, LAYER_STOPPED, LAYER_RESUMING, } KernelLayerState; /* Keep BIO statistics atomically */ struct atomicBioStats { atomic64_t read; // Number of not REQ_WRITE bios atomic64_t write; // Number of REQ_WRITE bios atomic64_t discard; // Number of REQ_DISCARD bios atomic64_t flush; // Number of REQ_FLUSH bios atomic64_t fua; // Number of REQ_FUA bios }; // Data managing the reporting of Albireo timeouts typedef struct periodicEventReporter { uint64_t lastReportedValue; const char *format; atomic64_t value; Jiffies reportingInterval; // jiffies /* * Just an approximation. If nonzero, then either the work item has * been queued to run, or some other thread currently has * responsibility for enqueueing it, or the reporter function is * running but hasn't looked at the current value yet. * * If this is set, don't set the timer again, because we don't want * the work item queued twice. Use an atomic xchg or cmpxchg to * test-and-set it, and an atomic store to clear it. */ atomic_t workItemQueued; KvdoWorkItem workItem; KernelLayer *layer; } PeriodicEventReporter; static inline uint64_t getEventCount(PeriodicEventReporter *reporter) { return atomic64_read(&reporter->value); } /** * The VDO representation of the target device **/ struct kernelLayer { PhysicalLayer common; // Layer specific info DeviceConfig *deviceConfig; /** A ring of all DeviceConfigs referencing this layer */ RingNode deviceConfigRing; char threadNamePrefix[MAX_QUEUE_NAME_LEN]; struct kobject kobj; struct kobject wqDirectory; struct kobject statsDirectory; /** * A counter value to attach to thread names and log messages to * identify the individual device. **/ unsigned int instance; /** Contains the current KernelLayerState, which rarely changes */ Atomic32 state; bool noFlushSuspend; bool allocationsAllowed; AtomicBool processingMessage; /** Limit the number of requests that are being processed. */ Limiter requestLimiter; Limiter discardLimiter; KVDO kvdo; /** Incoming bios we've had to buffer to avoid deadlock. */ DeadlockQueue deadlockQueue; // for REQ_FLUSH processing struct bio_list waitingFlushes; KVDOFlush *spareKVDOFlush; spinlock_t flushLock; Jiffies flushArrivalTime; /** * Bio submission manager used for sending bios to the storage * device. **/ IOSubmitter *ioSubmitter; /** * Work queue (possibly with multiple threads) for miscellaneous * CPU-intensive, non-blocking work. **/ KvdoWorkQueue *cpuQueue; /** N blobs of context data for LZ4 code, one per CPU thread. */ char **compressionContext; Atomic32 compressionContextIndex; /** Optional work queue for calling bio_endio. */ KvdoWorkQueue *bioAckQueue; /** Underlying block device info. */ uint64_t startingSectorOffset; VolumeGeometry geometry; // Memory allocation BufferPool *dataKVIOPool; struct bio_set *bioset; // Albireo specific info DedupeIndex *dedupeIndex; // Statistics atomic64_t biosSubmitted; atomic64_t biosCompleted; atomic64_t dedupeContextBusy; atomic64_t flushOut; AtomicBioStats biosIn; AtomicBioStats biosInPartial; AtomicBioStats biosOut; AtomicBioStats biosOutCompleted; AtomicBioStats biosAcknowledged; AtomicBioStats biosAcknowledgedPartial; AtomicBioStats biosMeta; AtomicBioStats biosMetaCompleted; AtomicBioStats biosJournal; AtomicBioStats biosPageCache; AtomicBioStats biosJournalCompleted; AtomicBioStats biosPageCacheCompleted; // for reporting Albireo timeouts PeriodicEventReporter albireoTimeoutReporter; // Debugging /* Whether to dump VDO state on shutdown */ bool dumpOnShutdown; /** * Whether we should collect tracing info. (Actually, this controls * allocations; non-null record pointers cause recording.) **/ bool vioTraceRecording; SampleCounter traceSampleCounter; /* Should we log tracing info? */ bool traceLogging; /* Storage for trace data. */ BufferPool *traceBufferPool; /* Private storage for procfs. */ void *procfsPrivate; /* For returning batches of DataKVIOs to their pool */ BatchProcessor *dataKVIOReleaser; // Administrative operations /* The object used to wait for administrative operations to complete */ struct completion callbackSync; // Statistics reporting /* Protects the *statsStorage structs */ struct mutex statsMutex; /* Used when shutting down the sysfs statistics */ struct completion statsShutdown;; /* true if sysfs statistics directory is set up */ bool statsAdded; /* Used to gather statistics without allocating memory */ VDOStatistics vdoStatsStorage; KernelStatistics kernelStatsStorage; }; typedef enum bioQAction { BIO_Q_ACTION_COMPRESSED_DATA, BIO_Q_ACTION_DATA, BIO_Q_ACTION_FLUSH, BIO_Q_ACTION_HIGH, BIO_Q_ACTION_METADATA, BIO_Q_ACTION_READCACHE, BIO_Q_ACTION_VERIFY } BioQAction; typedef enum cpuQAction { CPU_Q_ACTION_COMPLETE_KVIO, CPU_Q_ACTION_COMPRESS_BLOCK, CPU_Q_ACTION_EVENT_REPORTER, CPU_Q_ACTION_HASH_BLOCK, } CPUQAction; typedef enum bioAckQAction { BIO_ACK_Q_ACTION_ACK, } BioAckQAction; typedef void (*DedupeShutdownCallbackFunction)(KernelLayer *layer); /* * Wrapper for the Enqueueable object, to associate it with a kernel * layer work item. */ typedef struct kvdoEnqueueable { KvdoWorkItem workItem; Enqueueable enqueueable; } KvdoEnqueueable; /** * Implements LayerFilter. **/ bool layerIsNamed(KernelLayer *layer, void *context) __attribute__((warn_unused_result)); /** * Creates a kernel specific physical layer to be used by VDO * * @param startingSector The sector offset of our table entry in the * DM device * @param instance Device instantiation counter * @param parentKobject The parent sysfs node * @param config The device configuration * @param threadConfigPointer Where to store the new threadConfig handle * @param reason The reason for any failure during this call * @param layerPtr A pointer to hold the created layer * * @return VDO_SUCCESS or an error **/ int makeKernelLayer(uint64_t startingSector, unsigned int instance, DeviceConfig *config, struct kobject *parentKobject, ThreadConfig **threadConfigPointer, char **reason, KernelLayer **layerPtr) __attribute__((warn_unused_result)); /** * Prepare to modify a kernel layer. * * @param layer The layer to modify * @param config The new device configuration * @param errorPtr A pointer to store the reason for any failure * * @return VDO_SUCCESS or an error **/ int prepareToModifyKernelLayer(KernelLayer *layer, DeviceConfig *config, char **errorPtr) __attribute__((warn_unused_result)); /** * Modify a kernel physical layer. * * @param layer The layer to modify * @param config The new device configuration * * @return VDO_SUCCESS or an error **/ int modifyKernelLayer(KernelLayer *layer, DeviceConfig *config) __attribute__((warn_unused_result)); /** * Free a kernel physical layer. * * @param layer The layer, which must have been created by * makeKernelLayer **/ void freeKernelLayer(KernelLayer *layer); /** * Make and configure a kernel layer. This method does not alter the VDO state * on disk. It should be run from the VDO constructor for devices which have * not been started. * * @param layer The kernel layer * @param loadConfig Load-time parameters for the VDO * @param reason The reason for any failure during this call * * @return VDO_SUCCESS or an error * * @note redundant starts are silently ignored **/ int preloadKernelLayer(KernelLayer *layer, const VDOLoadConfig *loadConfig, char **reason); /** * Start the kernel layer. This method finishes bringing a VDO online now that * a table is being resumed for the first time. * * @param layer The kernel layer * @param reason The reason for any failure during this call * * @return VDO_SUCCESS or an error **/ int startKernelLayer(KernelLayer *layer, char **reason); /** * Stop the kernel layer. * * @param layer The kernel layer **/ void stopKernelLayer(KernelLayer *layer); /** * Suspend the kernel layer. * * @param layer The kernel layer * * @return VDO_SUCCESS or an error **/ int suspendKernelLayer(KernelLayer *layer); /** * Resume the kernel layer. * * @param layer The kernel layer * * @return VDO_SUCCESS or an error **/ int resumeKernelLayer(KernelLayer *layer); /** * Get the kernel layer state. * * @param layer The kernel layer * * @return the instantaneously correct kernel layer state **/ static inline KernelLayerState getKernelLayerState(const KernelLayer *layer) { return atomicLoad32(&layer->state); } /** * Function call to begin processing a bio passed in from the block layer * * @param layer The physical layer * @param bio The bio from the block layer * * @return value to return from the VDO map function. Either an error code * or DM_MAPIO_REMAPPED or DM_MAPPED_SUBMITTED (see vdoMapBio for * details). **/ int kvdoMapBio(KernelLayer *layer, BIO *bio); /** * Convert a generic PhysicalLayer to a kernelLayer. * * @param layer The PhysicalLayer to convert * * @return The PhysicalLayer as a KernelLayer **/ static inline KernelLayer *asKernelLayer(PhysicalLayer *layer) { return container_of(layer, KernelLayer, common); } /** * Convert a block number (or count) to a (512-byte-)sector number. * * The argument type is sector_t to force conversion to the type we * want, although the actual values passed are of various integral * types. It's just too easy to forget and do the multiplication * without casting, resulting in 32-bit arithmetic that accidentally * produces wrong results in devices over 2TB (2**32 sectors). * * @param [in] layer the physical layer * @param [in] blockNumber the block number/count * * @return the sector number/count **/ static inline sector_t blockToSector(KernelLayer *layer, sector_t blockNumber) { return (blockNumber * VDO_SECTORS_PER_BLOCK); } /** * Convert a sector number (or count) to a block number. Does not * check to make sure the sector number is an integral number of * blocks. * * @param [in] layer the physical layer * @param [in] sectorNumber the sector number/count * * @return the block number/count **/ static inline sector_t sectorToBlock(KernelLayer *layer, sector_t sectorNumber) { return (sectorNumber / VDO_SECTORS_PER_BLOCK); } /** * Convert a sector number to an offset within a block. * * @param [in] layer the physical layer * @param [in] sectorNumber the sector number * * @return the offset within the block **/ static inline BlockSize sectorToBlockOffset(KernelLayer *layer, sector_t sectorNumber) { unsigned int sectorsPerBlockMask = VDO_SECTORS_PER_BLOCK - 1; return to_bytes(sectorNumber & sectorsPerBlockMask); } /** * Get the block device object currently underlying a kernel layer. * * @param layer The kernel layer in question * * @return The block device object under the layer **/ struct block_device *getKernelLayerBdev(const KernelLayer *layer) __attribute__((warn_unused_result)); /** * Set the layer's active config. * * @param layer The kernel layer in question * @param config The config in question **/ static inline void setKernelLayerActiveConfig(KernelLayer *layer, DeviceConfig *config) { layer->deviceConfig = config; } /** * Given an error code, return a value we can return to the OS. The * input error code may be a system-generated value (such as -EIO), an * errno macro used in our code (such as EIO), or a UDS or VDO status * code; the result must be something the rest of the OS can consume * (negative errno values such as -EIO, in the case of the kernel). * * @param error the error code to convert * * @return a system error code value **/ int mapToSystemError(int error); /** * Record and eventually report that some number of dedupe requests * reached their expiration time without getting an answer, so we * timed out on them. * * This is called in a timer context, so it shouldn't do the reporting * directly. * * @param layer The kernel layer for the device * @param expiredCount The number of expired requests we timed out on **/ void kvdoReportDedupeTimeout(KernelLayer *layer, unsigned int expiredCount); /** * Wait until there are no requests in progress. * * @param layer The kernel layer for the device **/ void waitForNoRequestsActive(KernelLayer *layer); /** * Enqueues an item on our internal "cpu queues". Since there is more than * one, we rotate through them in hopes of creating some general balance. * * @param layer The kernel layer * @param item The work item to enqueue */ static inline void enqueueCPUWorkQueue(KernelLayer *layer, KvdoWorkItem *item) { enqueueWorkQueue(layer->cpuQueue, item); } /** * Adjust parameters to prepare to use a larger physical space. * The size must be larger than the current size. * * @param layer the kernel layer * @param physicalCount the new physical size in blocks * * @return VDO_SUCCESS or an error */ int prepareToResizePhysical(KernelLayer *layer, BlockCount physicalCount); /** * Adjusts parameters to reflect resizing the underlying device. * The size must be larger than the current size. * * @param layer the kernel layer * @param physicalCount the new physical count in blocks * * @return VDO_SUCCESS or an error */ int resizePhysical(KernelLayer *layer, BlockCount physicalCount); /** * Adjust parameters to prepare to present a larger logical space. * The size must be larger than the current size. * * @param layer the kernel layer * @param logicalCount the new logical size in blocks * * @return VDO_SUCCESS or an error */ int prepareToResizeLogical(KernelLayer *layer, BlockCount logicalCount); /** * Adjust parameters to present a larger logical space. * The size must be larger than the current size. * * @param layer the kernel layer * @param logicalCount the new logical size in blocks * * @return VDO_SUCCESS or an error */ int resizeLogical(KernelLayer *layer, BlockCount logicalCount); /** * Indicate whether the kernel layer is configured to use a separate * work queue for acknowledging received and processed bios. * * Note that this directly controls handling of write operations, but * the compile-time flag USE_BIO_ACK_QUEUE_FOR_READ is also checked * for read operations. * * @param layer The kernel layer * * @return Whether a bio-acknowledgement work queue is in use **/ static inline bool useBioAckQueue(KernelLayer *layer) { return layer->deviceConfig->threadCounts.bioAckThreads > 0; } /** * Update bookkeeping for the completion of some number of requests, so that * more incoming requests can be accepted. * * @param layer The kernel layer * @param count The number of completed requests **/ void completeManyRequests(KernelLayer *layer, uint32_t count); #endif /* KERNELLAYER_H */