|
Packit Service |
b439df |
/*
|
|
Packit Service |
b439df |
* libhugetlbfs - Easy use of Linux hugepages
|
|
Packit Service |
b439df |
* alloc.c - Simple allocator of regions backed by hugepages
|
|
Packit Service |
b439df |
*
|
|
Packit Service |
b439df |
* This library is free software; you can redistribute it and/or
|
|
Packit Service |
b439df |
* modify it under the terms of the GNU Lesser General Public License
|
|
Packit Service |
b439df |
* as published by the Free Software Foundation; either version 2.1 of
|
|
Packit Service |
b439df |
* the License, or (at your option) any later version.
|
|
Packit Service |
b439df |
*
|
|
Packit Service |
b439df |
* This library is distributed in the hope that it will be useful, but
|
|
Packit Service |
b439df |
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit Service |
b439df |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit Service |
b439df |
* Lesser General Public License for more details.
|
|
Packit Service |
b439df |
*
|
|
Packit Service |
b439df |
* You should have received a copy of the GNU Lesser General Public
|
|
Packit Service |
b439df |
* License along with this library; if not, write to the Free Software
|
|
Packit Service |
b439df |
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
Packit Service |
b439df |
*/
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
#define _GNU_SOURCE
|
|
Packit Service |
b439df |
#include <fcntl.h>
|
|
Packit Service |
b439df |
#include <errno.h>
|
|
Packit Service |
b439df |
#include <stdio.h>
|
|
Packit Service |
b439df |
#include <stdlib.h>
|
|
Packit Service |
b439df |
#include <string.h>
|
|
Packit Service |
b439df |
#include <unistd.h>
|
|
Packit Service |
b439df |
#include <time.h>
|
|
Packit Service |
b439df |
#include <sys/mman.h>
|
|
Packit Service |
b439df |
#include <sys/types.h>
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
#include "hugetlbfs.h"
|
|
Packit Service |
b439df |
#include "libhugetlbfs_internal.h"
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Allocate base pages if huge page allocation fails */
|
|
Packit Service |
b439df |
static void *fallback_base_pages(size_t len, ghp_t flags)
|
|
Packit Service |
b439df |
{
|
|
Packit Service |
b439df |
int fd;
|
|
Packit Service |
b439df |
void *buf;
|
|
Packit Service |
b439df |
INFO("get_huge_pages: Falling back to base pages\n");
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/*
|
|
Packit Service |
b439df |
* Map /dev/zero instead of MAP_ANONYMOUS avoid VMA mergings. Freeing
|
|
Packit Service |
b439df |
* pages depends on /proc/pid/maps to find lengths of allocations.
|
|
Packit Service |
b439df |
* This is a bit lazy and if found to be costly due to either the
|
|
Packit Service |
b439df |
* extra open() or virtual address space usage, we could track active
|
|
Packit Service |
b439df |
* mappings in a lock-protected list instead.
|
|
Packit Service |
b439df |
*/
|
|
Packit Service |
b439df |
fd = open("/dev/zero", O_RDWR);
|
|
Packit Service |
b439df |
if (fd == -1) {
|
|
Packit Service |
b439df |
ERROR("get_huge_pages: Failed to open /dev/zero for fallback");
|
|
Packit Service |
b439df |
return NULL;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
buf = mmap(NULL, len,
|
|
Packit Service |
b439df |
PROT_READ|PROT_WRITE,
|
|
Packit Service |
b439df |
MAP_PRIVATE,
|
|
Packit Service |
b439df |
fd, 0);
|
|
Packit Service |
b439df |
if (buf == MAP_FAILED) {
|
|
Packit Service |
b439df |
WARNING("Base page fallback failed: %s\n", strerror(errno));
|
|
Packit Service |
b439df |
buf = NULL;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
close(fd);
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
return buf;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/**
|
|
Packit Service |
b439df |
* get_huge_pages - Allocate an amount of memory backed by huge pages
|
|
Packit Service |
b439df |
* len: Size of the region to allocate, must be hugepage-aligned
|
|
Packit Service |
b439df |
* flags: Flags specifying the behaviour of the function
|
|
Packit Service |
b439df |
*
|
|
Packit Service |
b439df |
* This function allocates a region of memory that is backed by huge pages
|
|
Packit Service |
b439df |
* and hugepage-aligned. This is not a suitable drop-in for malloc() but a
|
|
Packit Service |
b439df |
* a malloc library could use this function to create a new fixed-size heap
|
|
Packit Service |
b439df |
* similar in principal to what morecore does for glibc malloc.
|
|
Packit Service |
b439df |
*/
|
|
Packit Service |
b439df |
void *get_huge_pages(size_t len, ghp_t flags)
|
|
Packit Service |
b439df |
{
|
|
Packit Service |
b439df |
void *buf;
|
|
Packit Service |
b439df |
int buf_fd = -1;
|
|
Packit Service |
b439df |
int mmap_reserve = __hugetlb_opts.no_reserve ? MAP_NORESERVE : 0;
|
|
Packit Service |
b439df |
int mmap_hugetlb = 0;
|
|
Packit Service |
b439df |
int ret;
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Catch an altogether-too easy typo */
|
|
Packit Service |
b439df |
if (flags & GHR_MASK)
|
|
Packit Service |
b439df |
ERROR("Improper use of GHR_* in get_huge_pages()\n");
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
#ifdef MAP_HUGETLB
|
|
Packit Service |
b439df |
mmap_hugetlb = MAP_HUGETLB;
|
|
Packit Service |
b439df |
#endif
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
if (__hugetlb_opts.map_hugetlb &&
|
|
Packit Service |
b439df |
gethugepagesize() == kernel_default_hugepage_size()) {
|
|
Packit Service |
b439df |
/* Because we can use MAP_HUGETLB, we simply mmap the region */
|
|
Packit Service |
b439df |
buf = mmap(NULL, len, PROT_READ|PROT_WRITE,
|
|
Packit Service |
b439df |
MAP_PRIVATE|MAP_ANONYMOUS|mmap_hugetlb|mmap_reserve,
|
|
Packit Service |
b439df |
0, 0);
|
|
Packit Service |
b439df |
} else {
|
|
Packit Service |
b439df |
/* Create a file descriptor for the new region */
|
|
Packit Service |
b439df |
buf_fd = hugetlbfs_unlinked_fd();
|
|
Packit Service |
b439df |
if (buf_fd < 0) {
|
|
Packit Service |
b439df |
WARNING("Couldn't open hugetlbfs file for %zd-sized buffer\n",
|
|
Packit Service |
b439df |
len);
|
|
Packit Service |
b439df |
return NULL;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Map the requested region */
|
|
Packit Service |
b439df |
buf = mmap(NULL, len, PROT_READ|PROT_WRITE,
|
|
Packit Service |
b439df |
MAP_PRIVATE|mmap_reserve, buf_fd, 0);
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
if (buf == MAP_FAILED) {
|
|
Packit Service |
b439df |
if (buf_fd >= 0)
|
|
Packit Service |
b439df |
close(buf_fd);
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
WARNING("get_huge_pages: New region mapping failed (flags: 0x%lX): %s\n",
|
|
Packit Service |
b439df |
flags, strerror(errno));
|
|
Packit Service |
b439df |
return NULL;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Fault the region to ensure accesses succeed */
|
|
Packit Service |
b439df |
ret = hugetlbfs_prefault(buf, len);
|
|
Packit Service |
b439df |
if (ret != 0) {
|
|
Packit Service |
b439df |
munmap(buf, len);
|
|
Packit Service |
b439df |
if (buf_fd >= 0)
|
|
Packit Service |
b439df |
close(buf_fd);
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
WARNING("get_huge_pages: Prefaulting failed (flags: 0x%lX): %s\n",
|
|
Packit Service |
b439df |
flags, strerror(ret));
|
|
Packit Service |
b439df |
return NULL;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Close the file so we do not have to track the descriptor */
|
|
Packit Service |
b439df |
if (buf_fd >= 0 && close(buf_fd) != 0) {
|
|
Packit Service |
b439df |
WARNING("Failed to close new buffer fd: %s\n", strerror(errno));
|
|
Packit Service |
b439df |
munmap(buf, len);
|
|
Packit Service |
b439df |
return NULL;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* woo, new buffer of shiny */
|
|
Packit Service |
b439df |
return buf;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
#define MAPS_BUF_SZ 4096
|
|
Packit Service |
b439df |
static void __free_huge_pages(void *ptr, int aligned)
|
|
Packit Service |
b439df |
{
|
|
Packit Service |
b439df |
FILE *fd;
|
|
Packit Service |
b439df |
char line[MAPS_BUF_SZ];
|
|
Packit Service |
b439df |
unsigned long start = 0, end = 0;
|
|
Packit Service |
b439df |
unsigned long palign = 0, hpalign = 0;
|
|
Packit Service |
b439df |
unsigned long hpalign_end = 0;
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/*
|
|
Packit Service |
b439df |
* /proc/self/maps is used to determine the length of the original
|
|
Packit Service |
b439df |
* allocation. As mappings are based on different files, we can
|
|
Packit Service |
b439df |
* assume that maps will not merge. If the hugepages were truly
|
|
Packit Service |
b439df |
* anonymous, this assumption would be broken.
|
|
Packit Service |
b439df |
*/
|
|
Packit Service |
b439df |
fd = fopen("/proc/self/maps", "r");
|
|
Packit Service |
b439df |
if (!fd) {
|
|
Packit Service |
b439df |
ERROR("Failed to open /proc/self/maps\n");
|
|
Packit Service |
b439df |
return;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/*
|
|
Packit Service |
b439df |
* An unaligned address allocated by get_hugepage_region()
|
|
Packit Service |
b439df |
* could be either page or hugepage aligned
|
|
Packit Service |
b439df |
*/
|
|
Packit Service |
b439df |
if (!aligned) {
|
|
Packit Service |
b439df |
palign = ALIGN_DOWN((unsigned long)ptr, getpagesize());
|
|
Packit Service |
b439df |
hpalign = ALIGN_DOWN((unsigned long)ptr, gethugepagesize());
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Parse /proc/maps for address ranges line by line */
|
|
Packit Service |
b439df |
while (!feof(fd)) {
|
|
Packit Service |
b439df |
char *bufptr;
|
|
Packit Service |
b439df |
char *saveptr = NULL;
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Read a line of input */
|
|
Packit Service |
b439df |
if (fgets(line, MAPS_BUF_SZ, fd) == NULL)
|
|
Packit Service |
b439df |
break;
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Parse the line to get the start and end of each mapping */
|
|
Packit Service |
b439df |
bufptr = strtok_r(line, " ", &saveptr);
|
|
Packit Service |
b439df |
bufptr = strtok_r(bufptr, "-", &saveptr);
|
|
Packit Service |
b439df |
start = strtoull(bufptr, NULL, 16);
|
|
Packit Service |
b439df |
bufptr = strtok_r(NULL, "-", &saveptr);
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* If the correct mapping is found, remove it */
|
|
Packit Service |
b439df |
if (start == (unsigned long)ptr) {
|
|
Packit Service |
b439df |
end = strtoull(bufptr, NULL, 16);
|
|
Packit Service |
b439df |
munmap(ptr, end - start);
|
|
Packit Service |
b439df |
break;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* If the passed address is aligned, just move along */
|
|
Packit Service |
b439df |
if (aligned)
|
|
Packit Service |
b439df |
continue;
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/*
|
|
Packit Service |
b439df |
* If an address is hpage-aligned, record it but keep looking.
|
|
Packit Service |
b439df |
* We might find a page-aligned or exact address later
|
|
Packit Service |
b439df |
*/
|
|
Packit Service |
b439df |
if (start == hpalign) {
|
|
Packit Service |
b439df |
hpalign_end = strtoull(bufptr, NULL, 16);
|
|
Packit Service |
b439df |
continue;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* If an address is page-aligned, free it */
|
|
Packit Service |
b439df |
if (start == palign) {
|
|
Packit Service |
b439df |
end = strtoull(bufptr, NULL, 16);
|
|
Packit Service |
b439df |
munmap((void *)start, end - start);
|
|
Packit Service |
b439df |
break;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/*
|
|
Packit Service |
b439df |
* If no exact or page-aligned address was found, check for a
|
|
Packit Service |
b439df |
* hpage-aligned address. If found, free it, otherwise warn that
|
|
Packit Service |
b439df |
* the ptr pointed nowhere
|
|
Packit Service |
b439df |
*/
|
|
Packit Service |
b439df |
if (end == 0) {
|
|
Packit Service |
b439df |
if (hpalign_end == 0)
|
|
Packit Service |
b439df |
ERROR("hugepages_free using invalid or double free\n");
|
|
Packit Service |
b439df |
else
|
|
Packit Service |
b439df |
munmap((void *)hpalign, hpalign_end - hpalign);
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
fclose(fd);
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/**
|
|
Packit Service |
b439df |
* free_huge_pages - Free a region allocated that was backed by large pages
|
|
Packit Service |
b439df |
* ptr - The pointer to the buffer returned by get_huge_pages()
|
|
Packit Service |
b439df |
*
|
|
Packit Service |
b439df |
* This function finds a region to free based on the contents of
|
|
Packit Service |
b439df |
* /proc/pid/maps. The assumption is made that the ptr is the start of
|
|
Packit Service |
b439df |
* a hugepage region allocated with free_huge_pages. No checking is made
|
|
Packit Service |
b439df |
* that the pointer is to a hugepage backed region.
|
|
Packit Service |
b439df |
*/
|
|
Packit Service |
b439df |
void free_huge_pages(void *ptr)
|
|
Packit Service |
b439df |
{
|
|
Packit Service |
b439df |
__free_huge_pages(ptr, 1);
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/*
|
|
Packit Service |
b439df |
* Offset the buffer using bytes wasted due to alignment to avoid using the
|
|
Packit Service |
b439df |
* same cache lines for the start of every buffer returned by
|
|
Packit Service |
b439df |
* get_huge_pages(). A small effort is made to select a random cacheline
|
|
Packit Service |
b439df |
* rather than sequential lines to give decent behaviour on average.
|
|
Packit Service |
b439df |
*/
|
|
Packit Service |
b439df |
void *cachecolor(void *buf, size_t len, size_t color_bytes)
|
|
Packit Service |
b439df |
{
|
|
Packit Service |
b439df |
static long cacheline_size = 0;
|
|
Packit Service |
b439df |
static int linemod = 0;
|
|
Packit Service |
b439df |
char *bytebuf = (char *)buf;
|
|
Packit Service |
b439df |
int numlines;
|
|
Packit Service |
b439df |
int line = 0;
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Lookup our cacheline size once */
|
|
Packit Service |
b439df |
if (cacheline_size == 0) {
|
|
Packit Service |
b439df |
cacheline_size = sysconf(_SC_LEVEL2_CACHE_LINESIZE);
|
|
Packit Service |
b439df |
linemod = time(NULL);
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
numlines = color_bytes / cacheline_size;
|
|
Packit Service |
b439df |
DEBUG("%d lines of cacheline size %ld due to %zd wastage\n",
|
|
Packit Service |
b439df |
numlines, cacheline_size, color_bytes);
|
|
Packit Service |
b439df |
if (numlines) {
|
|
Packit Service |
b439df |
line = linemod % numlines;
|
|
Packit Service |
b439df |
bytebuf += cacheline_size * line;
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Pseudo-ish random line selection */
|
|
Packit Service |
b439df |
linemod += len % numlines;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
DEBUG("Using line offset %d from start\n", line);
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
return bytebuf;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/**
|
|
Packit Service |
b439df |
* get_hugepage_region - Allocate an amount of memory backed by huge pages
|
|
Packit Service |
b439df |
*
|
|
Packit Service |
b439df |
* len: Size of the region to allocate
|
|
Packit Service |
b439df |
* flags: Flags specifying the behaviour of the function
|
|
Packit Service |
b439df |
*
|
|
Packit Service |
b439df |
* This function allocates a region of memory backed by huge pages. Care should
|
|
Packit Service |
b439df |
* be taken when using this function as a drop-in replacement for malloc() as
|
|
Packit Service |
b439df |
* memory can be wasted if the length is not hugepage-aligned. This function
|
|
Packit Service |
b439df |
* is more relaxed than get_huge_pages() in that it allows fallback to small
|
|
Packit Service |
b439df |
* pages when requested.
|
|
Packit Service |
b439df |
*/
|
|
Packit Service |
b439df |
void *get_hugepage_region(size_t len, ghr_t flags)
|
|
Packit Service |
b439df |
{
|
|
Packit Service |
b439df |
size_t aligned_len, wastage;
|
|
Packit Service |
b439df |
void *buf;
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Catch an altogether-too easy typo */
|
|
Packit Service |
b439df |
if (flags & GHP_MASK)
|
|
Packit Service |
b439df |
ERROR("Improper use of GHP_* in get_hugepage_region()\n");
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Align the len parameter to a hugepage boundary and allocate */
|
|
Packit Service |
b439df |
aligned_len = ALIGN(len, gethugepagesize());
|
|
Packit Service |
b439df |
buf = get_huge_pages(aligned_len, GHP_DEFAULT);
|
|
Packit Service |
b439df |
if (buf == NULL && (flags & GHR_FALLBACK)) {
|
|
Packit Service |
b439df |
aligned_len = ALIGN(len, getpagesize());
|
|
Packit Service |
b439df |
buf = fallback_base_pages(len, flags);
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Calculate wastage for coloring */
|
|
Packit Service |
b439df |
wastage = aligned_len - len;
|
|
Packit Service |
b439df |
if (wastage != 0 && !(flags & GHR_COLOR))
|
|
Packit Service |
b439df |
DEBUG("get_hugepage_region: Wasted %zd bytes due to alignment\n",
|
|
Packit Service |
b439df |
wastage);
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/* Only colour if requested */
|
|
Packit Service |
b439df |
if (flags & GHR_COLOR)
|
|
Packit Service |
b439df |
buf = cachecolor(buf, len, wastage);
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
return buf;
|
|
Packit Service |
b439df |
}
|
|
Packit Service |
b439df |
|
|
Packit Service |
b439df |
/**
|
|
Packit Service |
b439df |
* free_hugepage_region - Free a region allocated by get_hugepage_region
|
|
Packit Service |
b439df |
* ptr - The pointer to the buffer returned by get_hugepage_region
|
|
Packit Service |
b439df |
*
|
|
Packit Service |
b439df |
* This function finds a region to free based on the contents of
|
|
Packit Service |
b439df |
* /proc/pid/maps. The assumption is made that the ptr is the start of
|
|
Packit Service |
b439df |
* a hugepage region allocated with get_hugepage_region. No checking is made
|
|
Packit Service |
b439df |
* that the pointer is to a hugepage backed region.
|
|
Packit Service |
b439df |
*/
|
|
Packit Service |
b439df |
void free_hugepage_region(void *ptr)
|
|
Packit Service |
b439df |
{
|
|
Packit Service |
b439df |
__free_huge_pages(ptr, 0);
|
|
Packit Service |
b439df |
}
|