Blame src/memkind_hugetlb.c

Packit 345191
/*
Packit 345191
 * Copyright (C) 2014 - 2019 Intel Corporation.
Packit 345191
 * All rights reserved.
Packit 345191
 *
Packit 345191
 * Redistribution and use in source and binary forms, with or without
Packit 345191
 * modification, are permitted provided that the following conditions are met:
Packit 345191
 * 1. Redistributions of source code must retain the above copyright notice(s),
Packit 345191
 *    this list of conditions and the following disclaimer.
Packit 345191
 * 2. Redistributions in binary form must reproduce the above copyright notice(s),
Packit 345191
 *    this list of conditions and the following disclaimer in the documentation
Packit 345191
 *    and/or other materials provided with the distribution.
Packit 345191
 *
Packit 345191
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
Packit 345191
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
Packit 345191
 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
Packit 345191
 * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
Packit 345191
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
Packit 345191
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
Packit 345191
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
Packit 345191
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
Packit 345191
 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
Packit 345191
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Packit 345191
 */
Packit 345191
Packit 345191
#include <memkind/internal/memkind_hugetlb.h>
Packit 345191
#include <memkind/internal/memkind_default.h>
Packit 345191
#include <memkind/internal/memkind_arena.h>
Packit 345191
#include <memkind/internal/memkind_private.h>
Packit 345191
#include <memkind/internal/memkind_log.h>
Packit 345191
Packit 345191
#include <sys/mman.h>
Packit 345191
#ifndef MAP_HUGETLB
Packit 345191
#define MAP_HUGETLB 0x40000
Packit 345191
#endif
Packit 345191
#ifndef MAP_HUGE_2MB
Packit 345191
#define MAP_HUGE_2MB (21 << 26)
Packit 345191
#endif
Packit 345191
Packit 345191
#include <stdio.h>
Packit 345191
#include <errno.h>
Packit 345191
#include <numa.h>
Packit 345191
#include <pthread.h>
Packit 345191
#include <dirent.h>
Packit 345191
Packit 345191
MEMKIND_EXPORT struct memkind_ops MEMKIND_HUGETLB_OPS = {
Packit 345191
    .create = memkind_arena_create,
Packit 345191
    .destroy = memkind_default_destroy,
Packit 345191
    .malloc = memkind_arena_malloc,
Packit 345191
    .calloc = memkind_arena_calloc,
Packit 345191
    .posix_memalign = memkind_arena_posix_memalign,
Packit 345191
    .realloc = memkind_arena_realloc,
Packit 345191
    .free = memkind_arena_free,
Packit 345191
    .check_available = memkind_hugetlb_check_available_2mb,
Packit 345191
    .get_mmap_flags = memkind_hugetlb_get_mmap_flags,
Packit 345191
    .get_arena = memkind_thread_get_arena,
Packit 345191
    .init_once = memkind_hugetlb_init_once,
Packit 345191
    .malloc_usable_size = memkind_default_malloc_usable_size,
Packit 345191
    .finalize = memkind_arena_finalize,
Packit 345191
    .get_stat = memkind_arena_get_kind_stat,
Packit 345191
    .defrag_reallocate = memkind_arena_defrag_reallocate
Packit 345191
};
Packit 345191
Packit 345191
static int get_nr_overcommit_hugepages_cached(size_t pagesize, size_t *out);
Packit 345191
static int get_nr_hugepages_cached(size_t pagesize, struct bitmask *nodemask,
Packit 345191
                                   size_t *out);
Packit 345191
Packit 345191
static int memkind_hugetlb_check_available(struct memkind *kind,
Packit 345191
                                           size_t huge_size);
Packit 345191
Packit 345191
MEMKIND_EXPORT int memkind_hugetlb_get_mmap_flags(struct memkind *kind,
Packit 345191
                                                  int *flags)
Packit 345191
{
Packit 345191
    *flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_2MB;
Packit 345191
    return 0;
Packit 345191
}
Packit 345191
Packit 345191
MEMKIND_EXPORT void memkind_hugetlb_init_once(void)
Packit 345191
{
Packit 345191
    memkind_init(MEMKIND_HUGETLB, true);
Packit 345191
}
Packit 345191
Packit 345191
MEMKIND_EXPORT int memkind_hugetlb_check_available_2mb(struct memkind *kind)
Packit 345191
{
Packit 345191
    return memkind_hugetlb_check_available(kind, 2097152);
Packit 345191
}
Packit 345191
Packit 345191
/* huge_size: the huge page size in bytes */
Packit 345191
static int memkind_hugetlb_check_available(struct memkind *kind,
Packit 345191
                                           size_t huge_size)
Packit 345191
{
Packit 345191
    int err = 0;
Packit 345191
    nodemask_t nodemask;
Packit 345191
    struct bitmask nodemask_bm = {NUMA_NUM_NODES, nodemask.n};
Packit 345191
Packit 345191
    /* on x86_64 default huge page size is 2MB */
Packit 345191
    if (huge_size == 0) {
Packit 345191
        huge_size = 2097152;
Packit 345191
    }
Packit 345191
Packit 345191
    if (kind->ops->get_mbind_nodemask) {
Packit 345191
        err = kind->ops->get_mbind_nodemask(kind, nodemask.n, NUMA_NUM_NODES);
Packit 345191
    } else {
Packit 345191
        numa_bitmask_setall(&nodemask_bm);
Packit 345191
    }
Packit 345191
Packit 345191
    size_t nr_persistent_hugepages, nr_overcommit_hugepages;
Packit 345191
Packit 345191
    err = get_nr_hugepages_cached(huge_size, &nodemask_bm,
Packit 345191
                                  &nr_persistent_hugepages);
Packit 345191
    if(err) {
Packit 345191
        return err;
Packit 345191
    }
Packit 345191
Packit 345191
    err = get_nr_overcommit_hugepages_cached(huge_size, &nr_overcommit_hugepages);
Packit 345191
    if(err) {
Packit 345191
        return err;
Packit 345191
    }
Packit 345191
Packit 345191
    if (!nr_overcommit_hugepages && !nr_persistent_hugepages) {
Packit 345191
        log_err("Persistent hugepages and overcommit hugepages are not available.");
Packit 345191
        return MEMKIND_ERROR_HUGETLB;
Packit 345191
    }
Packit 345191
Packit 345191
    return err;
Packit 345191
}
Packit 345191
Packit 345191
struct hugepage_size_info {
Packit 345191
    size_t size;
Packit 345191
    size_t *nr_hugepages_per_node_array;
Packit 345191
    size_t  nr_overcommit;
Packit 345191
};
Packit 345191
Packit 345191
struct memkind_hugepages_config_t {
Packit 345191
    struct hugepage_size_info **hugepages_info_array;
Packit 345191
    int hugepages_info_array_len;
Packit 345191
    int err; // 0 if sysfs parsing successful, appropriate memkind_error otherwise
Packit 345191
} memkind_hugepages_config;
Packit 345191
Packit 345191
static pthread_once_t memkind_hugepages_config_once_g = PTHREAD_ONCE_INIT;
Packit 345191
Packit 345191
static struct hugepage_size_info *allocate_hugepage_size_info()
Packit 345191
{
Packit 345191
    struct hugepage_size_info *newInfo = malloc(sizeof(struct hugepage_size_info));
Packit 345191
    if(newInfo == NULL) {
Packit 345191
        log_err("malloc() failed.");
Packit 345191
        return NULL;
Packit 345191
    }
Packit 345191
Packit 345191
    newInfo->nr_hugepages_per_node_array = calloc(NUMA_NUM_NODES, sizeof(size_t));
Packit 345191
    if(newInfo->nr_hugepages_per_node_array == NULL) {
Packit 345191
        free(newInfo);
Packit 345191
        log_err("calloc() failed.");
Packit 345191
        return NULL;
Packit 345191
    }
Packit 345191
Packit 345191
    return newInfo;
Packit 345191
}
Packit 345191
Packit 345191
static size_t get_sysfs_entry_value(const char *entry_path)
Packit 345191
{
Packit 345191
    int errno_before;
Packit 345191
    FILE *fid;
Packit 345191
    int num_read;
Packit 345191
    size_t value_read, ret = 0;
Packit 345191
Packit 345191
    errno_before = errno;
Packit 345191
    fid = fopen(entry_path, "r");
Packit 345191
    if (fid) {
Packit 345191
        num_read = fscanf(fid, "%zud", &value_read);
Packit 345191
        if(num_read) {
Packit 345191
            ret  = value_read;
Packit 345191
        }
Packit 345191
        fclose(fid);
Packit 345191
    } else {
Packit 345191
        errno = errno_before;
Packit 345191
    }
Packit 345191
    return ret;
Packit 345191
}
Packit 345191
Packit 345191
// construct hugepage_size_info object and fill it with data for provided pagesize
Packit 345191
static void init_hugepage_size_info(size_t pagesize,
Packit 345191
                                    struct hugepage_size_info *newInfo)
Packit 345191
{
Packit 345191
    char formatted_path[128];
Packit 345191
    const char *nr_path_fmt =
Packit 345191
        "/sys/devices/system/node/node%u/hugepages/hugepages-%zukB/nr_hugepages";
Packit 345191
    const char *nr_overcommit_path_fmt =
Packit 345191
        "/sys/kernel/mm/hugepages/hugepages-%zukB/nr_overcommit_hugepages";
Packit 345191
    int snprintf_ret = 0;
Packit 345191
    size_t node;
Packit 345191
Packit 345191
    size_t pagesize_kb = pagesize >> 10;
Packit 345191
Packit 345191
    newInfo->size = pagesize;
Packit 345191
Packit 345191
    //read overcommit hugepages limit for this pagesize
Packit 345191
    snprintf_ret = snprintf(formatted_path, sizeof(formatted_path),
Packit 345191
                            nr_overcommit_path_fmt, pagesize_kb);
Packit 345191
    if (snprintf_ret > 0 && snprintf_ret < sizeof(formatted_path)) {
Packit 345191
        newInfo->nr_overcommit = get_sysfs_entry_value(formatted_path);
Packit 345191
        log_info("Overcommit limit for %zu kB hugepages is %zu.", pagesize,
Packit 345191
                 newInfo->nr_overcommit);
Packit 345191
    }
Packit 345191
Packit 345191
    //read every node nr_hugepages for this pagesize
Packit 345191
    for (node = 0; node < NUMA_NUM_NODES; ++node) {
Packit 345191
        snprintf_ret = snprintf(formatted_path, sizeof(formatted_path), nr_path_fmt,
Packit 345191
                                node, pagesize_kb);
Packit 345191
        if(snprintf_ret > 0 && snprintf_ret < sizeof(formatted_path)) {
Packit 345191
            newInfo->nr_hugepages_per_node_array[node] = get_sysfs_entry_value(
Packit 345191
                                                             formatted_path);
Packit 345191
            if(node < numa_num_configured_nodes()) {
Packit 345191
                log_info("Number of %zu kB hugepages on node %zu equals %zu.", pagesize, node,
Packit 345191
                         newInfo->nr_hugepages_per_node_array[node]);
Packit 345191
            }
Packit 345191
        }
Packit 345191
    }
Packit 345191
}
Packit 345191
Packit 345191
// get hugepage size in bytes out of sysfs dir name
Packit 345191
static int parse_pagesize_from_sysfs_entry(const char *entry, size_t *out)
Packit 345191
{
Packit 345191
    size_t pagesize;
Packit 345191
    int ret = sscanf(entry, "hugepages-%zukB", &pagesize);
Packit 345191
Packit 345191
    if(ret == 1) {
Packit 345191
        *out = pagesize << 10; //we are using bytes but kernel is using kB
Packit 345191
        return 0;
Packit 345191
    }
Packit 345191
Packit 345191
    return -1;
Packit 345191
}
Packit 345191
Packit 345191
Packit 345191
static void hugepages_config_init_once()
Packit 345191
{
Packit 345191
    unsigned j, i = 0;
Packit 345191
    size_t pagesize;
Packit 345191
    struct hugepage_size_info **hugepages_info_array = NULL;
Packit 345191
    struct dirent *dir;
Packit 345191
    DIR *hugepages_sysfs = opendir("/sys/kernel/mm/hugepages");
Packit 345191
    if(hugepages_sysfs == NULL) {
Packit 345191
        memkind_hugepages_config.err = MEMKIND_ERROR_HUGETLB;
Packit 345191
        log_err("/sys/kernel/mm/hugepages directory is not available.");
Packit 345191
        return;
Packit 345191
    }
Packit 345191
Packit 345191
    unsigned hugepages_info_array_len = 2; //initial size of array
Packit 345191
    hugepages_info_array = malloc(hugepages_info_array_len * sizeof(
Packit 345191
                                      struct hugepage_size_info *));
Packit 345191
    if (hugepages_info_array == NULL) {
Packit 345191
        memkind_hugepages_config.err = MEMKIND_ERROR_MALLOC;
Packit 345191
        closedir(hugepages_sysfs);
Packit 345191
        log_err("malloc() failed.");
Packit 345191
        return;
Packit 345191
    }
Packit 345191
Packit 345191
    while ((dir = readdir(hugepages_sysfs)) != NULL) {
Packit 345191
        if(dir->d_type == DT_DIR &&
Packit 345191
           parse_pagesize_from_sysfs_entry(dir->d_name, &pagesize) == 0) {
Packit 345191
            struct hugepage_size_info *new_hugepage_info = allocate_hugepage_size_info();
Packit 345191
            if(new_hugepage_info == NULL) {
Packit 345191
                memkind_hugepages_config.err = MEMKIND_ERROR_MALLOC;
Packit 345191
                break;
Packit 345191
            }
Packit 345191
Packit 345191
            init_hugepage_size_info(pagesize, new_hugepage_info);
Packit 345191
Packit 345191
            //there is more hugepage sizes than expected, reallocation of array needed
Packit 345191
            if(i == hugepages_info_array_len) {
Packit 345191
                hugepages_info_array_len *= 2;
Packit 345191
                struct hugepage_size_info **swap_tmp = realloc(hugepages_info_array,
Packit 345191
                                                               hugepages_info_array_len * sizeof(struct hugepage_size_info *));
Packit 345191
                if(swap_tmp == NULL) {
Packit 345191
                    free(new_hugepage_info);
Packit 345191
                    memkind_hugepages_config.err = MEMKIND_ERROR_MALLOC;
Packit 345191
                    log_err("realloc() failed.");
Packit 345191
                    break;
Packit 345191
                }
Packit 345191
                hugepages_info_array = swap_tmp;
Packit 345191
Packit 345191
            }
Packit 345191
            hugepages_info_array[i] = new_hugepage_info;
Packit 345191
            i++;
Packit 345191
        }
Packit 345191
    }
Packit 345191
Packit 345191
    closedir(hugepages_sysfs);
Packit 345191
Packit 345191
    if(memkind_hugepages_config.err == 0) {
Packit 345191
        memkind_hugepages_config.hugepages_info_array = hugepages_info_array;
Packit 345191
        memkind_hugepages_config.hugepages_info_array_len = i;
Packit 345191
    } else {
Packit 345191
        for(j=0; j
Packit 345191
            free(hugepages_info_array[i]);
Packit 345191
        }
Packit 345191
        free(hugepages_info_array);
Packit 345191
    }
Packit 345191
Packit 345191
    return;
Packit 345191
}
Packit 345191
Packit 345191
#ifdef __GNUC__
Packit 345191
__attribute__((destructor))
Packit 345191
#endif
Packit 345191
static void destroy_hugepages_per_node()
Packit 345191
{
Packit 345191
    int i;
Packit 345191
    for(i=0; i
Packit 345191
        free(memkind_hugepages_config.hugepages_info_array[i]);
Packit 345191
    }
Packit 345191
    free(memkind_hugepages_config.hugepages_info_array);
Packit 345191
}
Packit 345191
Packit 345191
// helper function that find and return hugepage_size_info object for specified pagesize
Packit 345191
static struct hugepage_size_info *get_hugepage_info_for_pagesize(
Packit 345191
    size_t pagesize)
Packit 345191
{
Packit 345191
    int i;
Packit 345191
Packit 345191
    for(i=0; i
Packit 345191
        if(memkind_hugepages_config.hugepages_info_array[i]->size == pagesize) {
Packit 345191
            return memkind_hugepages_config.hugepages_info_array[i];
Packit 345191
        }
Packit 345191
    }
Packit 345191
    return NULL;
Packit 345191
}
Packit 345191
Packit 345191
// returns sum of pre-allocated hugepage for specified pagesize and set of nodes
Packit 345191
static int get_nr_hugepages_cached(size_t pagesize, struct bitmask *nodemask,
Packit 345191
                                   size_t *out)
Packit 345191
{
Packit 345191
    int i;
Packit 345191
    size_t nr_hugepages = 0;
Packit 345191
    int num_node = numa_num_configured_nodes();
Packit 345191
    pthread_once(&memkind_hugepages_config_once_g,
Packit 345191
                 hugepages_config_init_once);
Packit 345191
Packit 345191
Packit 345191
    if(memkind_hugepages_config.err != 0) {
Packit 345191
        return memkind_hugepages_config.err;
Packit 345191
    }
Packit 345191
Packit 345191
    struct hugepage_size_info *info = get_hugepage_info_for_pagesize(pagesize);
Packit 345191
    if(info == NULL) {
Packit 345191
        log_err("Unable to allocate hugepages, because info about pre-allocated hugepages is not available.");
Packit 345191
        return MEMKIND_ERROR_HUGETLB;
Packit 345191
    }
Packit 345191
Packit 345191
    for(i=0; i
Packit 345191
        if(numa_bitmask_isbitset(nodemask, i)) {
Packit 345191
            nr_hugepages += info->nr_hugepages_per_node_array[i];
Packit 345191
        }
Packit 345191
    }
Packit 345191
Packit 345191
    *out = nr_hugepages;
Packit 345191
    return 0;
Packit 345191
}
Packit 345191
Packit 345191
// returns hugepages overcommit limit for specified pagesize
Packit 345191
static int get_nr_overcommit_hugepages_cached(size_t pagesize, size_t *out)
Packit 345191
{
Packit 345191
    pthread_once(&memkind_hugepages_config_once_g,
Packit 345191
                 hugepages_config_init_once);
Packit 345191
Packit 345191
    if(memkind_hugepages_config.err != 0) {
Packit 345191
        return memkind_hugepages_config.err;
Packit 345191
    }
Packit 345191
Packit 345191
    struct hugepage_size_info *info = get_hugepage_info_for_pagesize(pagesize);
Packit 345191
    if(info == NULL) {
Packit 345191
        log_err("Unable to allocate hugepages, because info about overcommit hugepages is not available.");
Packit 345191
        return MEMKIND_ERROR_HUGETLB;
Packit 345191
    }
Packit 345191
Packit 345191
    *out = info->nr_overcommit;
Packit 345191
    return 0;
Packit 345191
}
Packit 345191