/*
* Copyright (C) 2014 - 2019 Intel Corporation.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright notice(s),
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice(s),
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <memkind/internal/memkind_hugetlb.h>
#include <memkind/internal/memkind_default.h>
#include <memkind/internal/memkind_arena.h>
#include <memkind/internal/memkind_private.h>
#include <memkind/internal/memkind_log.h>
#include <sys/mman.h>
#ifndef MAP_HUGETLB
#define MAP_HUGETLB 0x40000
#endif
#ifndef MAP_HUGE_2MB
#define MAP_HUGE_2MB (21 << 26)
#endif
#include <stdio.h>
#include <errno.h>
#include <numa.h>
#include <pthread.h>
#include <dirent.h>
MEMKIND_EXPORT struct memkind_ops MEMKIND_HUGETLB_OPS = {
.create = memkind_arena_create,
.destroy = memkind_default_destroy,
.malloc = memkind_arena_malloc,
.calloc = memkind_arena_calloc,
.posix_memalign = memkind_arena_posix_memalign,
.realloc = memkind_arena_realloc,
.free = memkind_arena_free,
.check_available = memkind_hugetlb_check_available_2mb,
.get_mmap_flags = memkind_hugetlb_get_mmap_flags,
.get_arena = memkind_thread_get_arena,
.init_once = memkind_hugetlb_init_once,
.malloc_usable_size = memkind_default_malloc_usable_size,
.finalize = memkind_arena_finalize,
.get_stat = memkind_arena_get_kind_stat,
.defrag_reallocate = memkind_arena_defrag_reallocate
};
static int get_nr_overcommit_hugepages_cached(size_t pagesize, size_t *out);
static int get_nr_hugepages_cached(size_t pagesize, struct bitmask *nodemask,
size_t *out);
static int memkind_hugetlb_check_available(struct memkind *kind,
size_t huge_size);
MEMKIND_EXPORT int memkind_hugetlb_get_mmap_flags(struct memkind *kind,
int *flags)
{
*flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_2MB;
return 0;
}
MEMKIND_EXPORT void memkind_hugetlb_init_once(void)
{
memkind_init(MEMKIND_HUGETLB, true);
}
MEMKIND_EXPORT int memkind_hugetlb_check_available_2mb(struct memkind *kind)
{
return memkind_hugetlb_check_available(kind, 2097152);
}
/* huge_size: the huge page size in bytes */
static int memkind_hugetlb_check_available(struct memkind *kind,
size_t huge_size)
{
int err = 0;
nodemask_t nodemask;
struct bitmask nodemask_bm = {NUMA_NUM_NODES, nodemask.n};
/* on x86_64 default huge page size is 2MB */
if (huge_size == 0) {
huge_size = 2097152;
}
if (kind->ops->get_mbind_nodemask) {
err = kind->ops->get_mbind_nodemask(kind, nodemask.n, NUMA_NUM_NODES);
} else {
numa_bitmask_setall(&nodemask_bm);
}
size_t nr_persistent_hugepages, nr_overcommit_hugepages;
err = get_nr_hugepages_cached(huge_size, &nodemask_bm,
&nr_persistent_hugepages);
if(err) {
return err;
}
err = get_nr_overcommit_hugepages_cached(huge_size, &nr_overcommit_hugepages);
if(err) {
return err;
}
if (!nr_overcommit_hugepages && !nr_persistent_hugepages) {
log_err("Persistent hugepages and overcommit hugepages are not available.");
return MEMKIND_ERROR_HUGETLB;
}
return err;
}
struct hugepage_size_info {
size_t size;
size_t *nr_hugepages_per_node_array;
size_t nr_overcommit;
};
struct memkind_hugepages_config_t {
struct hugepage_size_info **hugepages_info_array;
int hugepages_info_array_len;
int err; // 0 if sysfs parsing successful, appropriate memkind_error otherwise
} memkind_hugepages_config;
static pthread_once_t memkind_hugepages_config_once_g = PTHREAD_ONCE_INIT;
static struct hugepage_size_info *allocate_hugepage_size_info()
{
struct hugepage_size_info *newInfo = malloc(sizeof(struct hugepage_size_info));
if(newInfo == NULL) {
log_err("malloc() failed.");
return NULL;
}
newInfo->nr_hugepages_per_node_array = calloc(NUMA_NUM_NODES, sizeof(size_t));
if(newInfo->nr_hugepages_per_node_array == NULL) {
free(newInfo);
log_err("calloc() failed.");
return NULL;
}
return newInfo;
}
static size_t get_sysfs_entry_value(const char *entry_path)
{
int errno_before;
FILE *fid;
int num_read;
size_t value_read, ret = 0;
errno_before = errno;
fid = fopen(entry_path, "r");
if (fid) {
num_read = fscanf(fid, "%zud", &value_read);
if(num_read) {
ret = value_read;
}
fclose(fid);
} else {
errno = errno_before;
}
return ret;
}
// construct hugepage_size_info object and fill it with data for provided pagesize
static void init_hugepage_size_info(size_t pagesize,
struct hugepage_size_info *newInfo)
{
char formatted_path[128];
const char *nr_path_fmt =
"/sys/devices/system/node/node%u/hugepages/hugepages-%zukB/nr_hugepages";
const char *nr_overcommit_path_fmt =
"/sys/kernel/mm/hugepages/hugepages-%zukB/nr_overcommit_hugepages";
int snprintf_ret = 0;
size_t node;
size_t pagesize_kb = pagesize >> 10;
newInfo->size = pagesize;
//read overcommit hugepages limit for this pagesize
snprintf_ret = snprintf(formatted_path, sizeof(formatted_path),
nr_overcommit_path_fmt, pagesize_kb);
if (snprintf_ret > 0 && snprintf_ret < sizeof(formatted_path)) {
newInfo->nr_overcommit = get_sysfs_entry_value(formatted_path);
log_info("Overcommit limit for %zu kB hugepages is %zu.", pagesize,
newInfo->nr_overcommit);
}
//read every node nr_hugepages for this pagesize
for (node = 0; node < NUMA_NUM_NODES; ++node) {
snprintf_ret = snprintf(formatted_path, sizeof(formatted_path), nr_path_fmt,
node, pagesize_kb);
if(snprintf_ret > 0 && snprintf_ret < sizeof(formatted_path)) {
newInfo->nr_hugepages_per_node_array[node] = get_sysfs_entry_value(
formatted_path);
if(node < numa_num_configured_nodes()) {
log_info("Number of %zu kB hugepages on node %zu equals %zu.", pagesize, node,
newInfo->nr_hugepages_per_node_array[node]);
}
}
}
}
// get hugepage size in bytes out of sysfs dir name
static int parse_pagesize_from_sysfs_entry(const char *entry, size_t *out)
{
size_t pagesize;
int ret = sscanf(entry, "hugepages-%zukB", &pagesize);
if(ret == 1) {
*out = pagesize << 10; //we are using bytes but kernel is using kB
return 0;
}
return -1;
}
static void hugepages_config_init_once()
{
unsigned j, i = 0;
size_t pagesize;
struct hugepage_size_info **hugepages_info_array = NULL;
struct dirent *dir;
DIR *hugepages_sysfs = opendir("/sys/kernel/mm/hugepages");
if(hugepages_sysfs == NULL) {
memkind_hugepages_config.err = MEMKIND_ERROR_HUGETLB;
log_err("/sys/kernel/mm/hugepages directory is not available.");
return;
}
unsigned hugepages_info_array_len = 2; //initial size of array
hugepages_info_array = malloc(hugepages_info_array_len * sizeof(
struct hugepage_size_info *));
if (hugepages_info_array == NULL) {
memkind_hugepages_config.err = MEMKIND_ERROR_MALLOC;
closedir(hugepages_sysfs);
log_err("malloc() failed.");
return;
}
while ((dir = readdir(hugepages_sysfs)) != NULL) {
if(dir->d_type == DT_DIR &&
parse_pagesize_from_sysfs_entry(dir->d_name, &pagesize) == 0) {
struct hugepage_size_info *new_hugepage_info = allocate_hugepage_size_info();
if(new_hugepage_info == NULL) {
memkind_hugepages_config.err = MEMKIND_ERROR_MALLOC;
break;
}
init_hugepage_size_info(pagesize, new_hugepage_info);
//there is more hugepage sizes than expected, reallocation of array needed
if(i == hugepages_info_array_len) {
hugepages_info_array_len *= 2;
struct hugepage_size_info **swap_tmp = realloc(hugepages_info_array,
hugepages_info_array_len * sizeof(struct hugepage_size_info *));
if(swap_tmp == NULL) {
free(new_hugepage_info);
memkind_hugepages_config.err = MEMKIND_ERROR_MALLOC;
log_err("realloc() failed.");
break;
}
hugepages_info_array = swap_tmp;
}
hugepages_info_array[i] = new_hugepage_info;
i++;
}
}
closedir(hugepages_sysfs);
if(memkind_hugepages_config.err == 0) {
memkind_hugepages_config.hugepages_info_array = hugepages_info_array;
memkind_hugepages_config.hugepages_info_array_len = i;
} else {
for(j=0; j<i; j++) {
free(hugepages_info_array[i]);
}
free(hugepages_info_array);
}
return;
}
#ifdef __GNUC__
__attribute__((destructor))
#endif
static void destroy_hugepages_per_node()
{
int i;
for(i=0; i<memkind_hugepages_config.hugepages_info_array_len; i++) {
free(memkind_hugepages_config.hugepages_info_array[i]);
}
free(memkind_hugepages_config.hugepages_info_array);
}
// helper function that find and return hugepage_size_info object for specified pagesize
static struct hugepage_size_info *get_hugepage_info_for_pagesize(
size_t pagesize)
{
int i;
for(i=0; i<memkind_hugepages_config.hugepages_info_array_len; i++) {
if(memkind_hugepages_config.hugepages_info_array[i]->size == pagesize) {
return memkind_hugepages_config.hugepages_info_array[i];
}
}
return NULL;
}
// returns sum of pre-allocated hugepage for specified pagesize and set of nodes
static int get_nr_hugepages_cached(size_t pagesize, struct bitmask *nodemask,
size_t *out)
{
int i;
size_t nr_hugepages = 0;
int num_node = numa_num_configured_nodes();
pthread_once(&memkind_hugepages_config_once_g,
hugepages_config_init_once);
if(memkind_hugepages_config.err != 0) {
return memkind_hugepages_config.err;
}
struct hugepage_size_info *info = get_hugepage_info_for_pagesize(pagesize);
if(info == NULL) {
log_err("Unable to allocate hugepages, because info about pre-allocated hugepages is not available.");
return MEMKIND_ERROR_HUGETLB;
}
for(i=0; i<num_node; i++) {
if(numa_bitmask_isbitset(nodemask, i)) {
nr_hugepages += info->nr_hugepages_per_node_array[i];
}
}
*out = nr_hugepages;
return 0;
}
// returns hugepages overcommit limit for specified pagesize
static int get_nr_overcommit_hugepages_cached(size_t pagesize, size_t *out)
{
pthread_once(&memkind_hugepages_config_once_g,
hugepages_config_init_once);
if(memkind_hugepages_config.err != 0) {
return memkind_hugepages_config.err;
}
struct hugepage_size_info *info = get_hugepage_info_for_pagesize(pagesize);
if(info == NULL) {
log_err("Unable to allocate hugepages, because info about overcommit hugepages is not available.");
return MEMKIND_ERROR_HUGETLB;
}
*out = info->nr_overcommit;
return 0;
}