/* * Copyright (c) 2020 Red Hat, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * $Id: //eng/uds-releases/jasper/kernelLinux/uds/memoryLinuxKernel.c#6 $ */ #include #include #include #include #include #include #include #include "compilerDefs.h" #include "logger.h" #include "memoryAlloc.h" #include "permassert.h" /* ****************************************************************************** * Production: UDS and VDO keep track of which threads are allowed to allocate * memory freely, and which threads must be careful to not do a memory * allocation that does an I/O request. The allocatingThreads ThreadsRegistry * and its associated methods implement this tracking. */ static ThreadRegistry allocatingThreads; /*****************************************************************************/ static bool allocationsAllowed(void) { const bool *pointer = lookupThread(&allocatingThreads); return pointer != NULL ? *pointer : false; } /*****************************************************************************/ void registerAllocatingThread(RegisteredThread *newThread, const bool *flagPtr) { if (flagPtr == NULL) { static const bool allocationAlwaysAllowed = true; flagPtr = &allocationAlwaysAllowed; } registerThread(&allocatingThreads, newThread, flagPtr); } /*****************************************************************************/ void unregisterAllocatingThread(void) { unregisterThread(&allocatingThreads); } /* ****************************************************************************** * Production: We track how much memory has been allocated and freed. When we * unload the UDS module, we log an error if we have not freed all the memory * that we allocated. Nearly all memory allocation and freeing is done using * this module. * * We do not use kernel functions like the kvasprintf() method, which allocate * memory indirectly using kmalloc. * * These data structures and methods are used to track the amount of memory * used. */ // We allocate very few large objects, and allocation/deallocation isn't done // in a performance-critical stage for us, so a linked list should be fine. typedef struct vmallocBlockInfo { void *ptr; size_t size; struct vmallocBlockInfo *next; } VmallocBlockInfo; static struct { spinlock_t lock; size_t kmallocBlocks; size_t kmallocBytes; size_t vmallocBlocks; size_t vmallocBytes; size_t peakBytes; VmallocBlockInfo *vmallocList; } memoryStats __cacheline_aligned; /*****************************************************************************/ static void updatePeakUsage(void) { size_t totalBytes = memoryStats.kmallocBytes + memoryStats.vmallocBytes; if (totalBytes > memoryStats.peakBytes) { memoryStats.peakBytes = totalBytes; } } /*****************************************************************************/ static void addKmallocBlock(size_t size) { unsigned long flags; spin_lock_irqsave(&memoryStats.lock, flags); memoryStats.kmallocBlocks++; memoryStats.kmallocBytes += size; updatePeakUsage(); spin_unlock_irqrestore(&memoryStats.lock, flags); } /*****************************************************************************/ static void removeKmallocBlock(size_t size) { unsigned long flags; spin_lock_irqsave(&memoryStats.lock, flags); memoryStats.kmallocBlocks--; memoryStats.kmallocBytes -= size; spin_unlock_irqrestore(&memoryStats.lock, flags); } /*****************************************************************************/ static void addVmallocBlock(VmallocBlockInfo *block) { unsigned long flags; spin_lock_irqsave(&memoryStats.lock, flags); block->next = memoryStats.vmallocList; memoryStats.vmallocList = block; memoryStats.vmallocBlocks++; memoryStats.vmallocBytes += block->size; updatePeakUsage(); spin_unlock_irqrestore(&memoryStats.lock, flags); } /*****************************************************************************/ static void removeVmallocBlock(void *ptr) { VmallocBlockInfo *block, **blockPtr; unsigned long flags; spin_lock_irqsave(&memoryStats.lock, flags); for (blockPtr = &memoryStats.vmallocList; (block = *blockPtr) != NULL; blockPtr = &block->next) { if (block->ptr == ptr) { *blockPtr = block->next; memoryStats.vmallocBlocks--; memoryStats.vmallocBytes -= block->size; break; } } spin_unlock_irqrestore(&memoryStats.lock, flags); if (block != NULL) { FREE(block); } else { logInfo("attempting to remove ptr %" PRIptr " not found in vmalloc list", ptr); } } /** * Determine whether allocating a memory block should use kmalloc or vmalloc. * * vmalloc can allocate any integral number of pages. * * kmalloc can allocate any number of bytes up to a configured limit, which * defaults to 8 megabytes on some of our systems. kmalloc is especially good * when memory is being both allocated and freed, and it does this efficiently * in a multi CPU environment. * * kmalloc usually rounds the size of the block up to the next power of two. * So when the requested block is bigger than PAGE_SIZE / 2 bytes, kmalloc will * never give you less space than the corresponding vmalloc allocation. * Sometimes vmalloc will use less overhead than kmalloc. * * The advantages of kmalloc do not help out UDS or VDO, because we allocate * all our memory up front and do not free and reallocate it. Sometimes we * have problems using kmalloc, because the Linux memory page map can become so * fragmented that kmalloc will not give us a 32KB chunk. We have used vmalloc * as a backup to kmalloc in the past, and a followup vmalloc of 32KB will * work. But there is no strong case to be made for using kmalloc over vmalloc * for these size chunks. * * The kmalloc/vmalloc boundary is set at 4KB, and kmalloc gets the 4KB * requests. There is no strong reason for favoring either kmalloc or vmalloc * for 4KB requests, except that the keeping of vmalloc statistics uses a * linked list implementation. Using a simple test, this choice of boundary * results in 132 vmalloc calls. Using vmalloc for requests of exactly 4KB * results in an additional 6374 vmalloc calls, which will require a change to * the code that tracks vmalloc statistics. * * @param size How many bytes to allocate **/ static INLINE bool useKmalloc(size_t size) { return size <= PAGE_SIZE; } /*****************************************************************************/ int allocateMemory(size_t size, size_t align, const char *what, void *ptr) { if (ptr == NULL) { return UDS_INVALID_ARGUMENT; } if (size == 0) { *((void **) ptr) = NULL; return UDS_SUCCESS; } /* * The __GFP_RETRY_MAYFAIL means: The VM implementation will retry memory * reclaim procedures that have previously failed if there is some indication * that progress has been made else where. It can wait for other tasks to * attempt high level approaches to freeing memory such as compaction (which * removes fragmentation) and page-out. There is still a definite limit to * the number of retries, but it is a larger limit than with __GFP_NORETRY. * Allocations with this flag may fail, but only when there is genuinely * little unused memory. While these allocations do not directly trigger the * OOM killer, their failure indicates that the system is likely to need to * use the OOM killer soon. The caller must handle failure, but can * reasonably do so by failing a higher-level request, or completing it only * in a much less efficient manner. */ const gfp_t gfpFlags = GFP_KERNEL | __GFP_ZERO | __GFP_RETRY_MAYFAIL; bool allocationsRestricted = !allocationsAllowed(); unsigned int noioFlags; if (allocationsRestricted) { noioFlags = memalloc_noio_save(); } unsigned long startTime = jiffies; void *p = NULL; if (useKmalloc(size) && (align < PAGE_SIZE)) { p = kmalloc(size, gfpFlags | __GFP_NOWARN); if (p == NULL) { /* * If we had just done kmalloc(size, gfpFlags) it is possible that the * allocation would fail (see VDO-3688). The kernel log would then * contain a long report about the failure. Although the failure occurs * because there is no page available to allocate, by the time it logs * the available space, there is a page available. So hopefully a short * sleep will allow the page reclaimer to free a single page, which is * all that we need. */ msleep(1); p = kmalloc(size, gfpFlags); } if (p != NULL) { addKmallocBlock(ksize(p)); } } else { VmallocBlockInfo *block; if (ALLOCATE(1, VmallocBlockInfo, __func__, &block) == UDS_SUCCESS) { /* * If we just do __vmalloc(size, gfpFlags, PAGE_KERNEL) it is possible * that the allocation will fail (see VDO-3661). The kernel log will * then contain a long report about the failure. Although the failure * occurs because there are not enough pages available to allocate, by * the time it logs the available space, there may enough pages available * for smaller allocations. So hopefully a short sleep will allow the * page reclaimer to free enough pages for us. * * For larger allocations, the kernel page_alloc code is racing against * the page reclaimer. If the page reclaimer can stay ahead of * page_alloc, the __vmalloc will succeed. But if page_alloc overtakes * the page reclaimer, the allocation fails. It is possible that more * retries will succeed. */ for (;;) { p = __vmalloc(size, gfpFlags | __GFP_NOWARN, PAGE_KERNEL); // Try again unless we succeeded or more than 1 second has elapsed. if ((p != NULL) || (jiffies_to_msecs(jiffies - startTime) > 1000)) { break; } msleep(1); } if (p == NULL) { // Try one more time, logging a failure for this call. p = __vmalloc(size, gfpFlags, PAGE_KERNEL); } if (p == NULL) { FREE(block); } else { block->ptr = p; block->size = PAGE_ALIGN(size); addVmallocBlock(block); } } } if (allocationsRestricted) { memalloc_noio_restore(noioFlags); } if (p == NULL) { unsigned int duration = jiffies_to_msecs(jiffies - startTime); logError("Could not allocate %zu bytes for %s in %u msecs", size, what, duration); return ENOMEM; } *((void **) ptr) = p; return UDS_SUCCESS; } /*****************************************************************************/ void *allocateMemoryNowait(size_t size, const char *what __attribute__((unused))) { void *p = kmalloc(size, GFP_NOWAIT | __GFP_ZERO); if (p != NULL) { addKmallocBlock(ksize(p)); } return p; } /*****************************************************************************/ void freeMemory(void *ptr) { if (ptr != NULL) { if (is_vmalloc_addr(ptr)) { removeVmallocBlock(ptr); vfree(ptr); } else { removeKmallocBlock(ksize(ptr)); kfree(ptr); } } } /*****************************************************************************/ int reallocateMemory(void *ptr, size_t oldSize, size_t size, const char *what, void *newPtr) { // Handle special case of zero sized result if (size == 0) { FREE(ptr); *(void **)newPtr = NULL; return UDS_SUCCESS; } int result = ALLOCATE(size, char, what, newPtr); if (result != UDS_SUCCESS) { return result; } if (ptr != NULL) { if (oldSize < size) { size = oldSize; } memcpy(*((void **) newPtr), ptr, size); FREE(ptr); } return UDS_SUCCESS; } /*****************************************************************************/ void memoryInit(void) { spin_lock_init(&memoryStats.lock); initializeThreadRegistry(&allocatingThreads); } /*****************************************************************************/ void memoryExit(void) { ASSERT_LOG_ONLY(memoryStats.kmallocBytes == 0, "kmalloc memory used (%zd bytes in %zd blocks)" " is returned to the kernel", memoryStats.kmallocBytes, memoryStats.kmallocBlocks); ASSERT_LOG_ONLY(memoryStats.vmallocBytes == 0, "vmalloc memory used (%zd bytes in %zd blocks)" " is returned to the kernel", memoryStats.vmallocBytes, memoryStats.vmallocBlocks); logDebug("%s peak usage %zd bytes", THIS_MODULE->name, memoryStats.peakBytes); } /**********************************************************************/ void getMemoryStats(uint64_t *bytesUsed, uint64_t *peakBytesUsed) { unsigned long flags; spin_lock_irqsave(&memoryStats.lock, flags); *bytesUsed = memoryStats.kmallocBytes + memoryStats.vmallocBytes; *peakBytesUsed = memoryStats.peakBytes; spin_unlock_irqrestore(&memoryStats.lock, flags); } /**********************************************************************/ void reportMemoryUsage() { unsigned long flags; spin_lock_irqsave(&memoryStats.lock, flags); uint64_t kmallocBlocks = memoryStats.kmallocBlocks; uint64_t kmallocBytes = memoryStats.kmallocBytes; uint64_t vmallocBlocks = memoryStats.vmallocBlocks; uint64_t vmallocBytes = memoryStats.vmallocBytes; uint64_t peakUsage = memoryStats.peakBytes; spin_unlock_irqrestore(&memoryStats.lock, flags); uint64_t totalBytes = kmallocBytes + vmallocBytes; logInfo("current module memory tracking" " (actual allocation sizes, not requested):"); logInfo(" %llu bytes in %llu kmalloc blocks", kmallocBytes, kmallocBlocks); logInfo(" %llu bytes in %llu vmalloc blocks", vmallocBytes, vmallocBlocks); logInfo(" total %llu bytes, peak usage %llu bytes", totalBytes, peakUsage); }