/* * Copyright (C) 2014 - 2019 Intel Corporation. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright notice(s), * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice(s), * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO * EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #ifndef MAP_HUGETLB #define MAP_HUGETLB 0x40000 #endif #ifndef MAP_HUGE_2MB #define MAP_HUGE_2MB (21 << 26) #endif #include #include #include #include #include MEMKIND_EXPORT struct memkind_ops MEMKIND_HUGETLB_OPS = { .create = memkind_arena_create, .destroy = memkind_default_destroy, .malloc = memkind_arena_malloc, .calloc = memkind_arena_calloc, .posix_memalign = memkind_arena_posix_memalign, .realloc = memkind_arena_realloc, .free = memkind_arena_free, .check_available = memkind_hugetlb_check_available_2mb, .get_mmap_flags = memkind_hugetlb_get_mmap_flags, .get_arena = memkind_thread_get_arena, .init_once = memkind_hugetlb_init_once, .malloc_usable_size = memkind_default_malloc_usable_size, .finalize = memkind_arena_finalize, .get_stat = memkind_arena_get_kind_stat, .defrag_reallocate = memkind_arena_defrag_reallocate }; static int get_nr_overcommit_hugepages_cached(size_t pagesize, size_t *out); static int get_nr_hugepages_cached(size_t pagesize, struct bitmask *nodemask, size_t *out); static int memkind_hugetlb_check_available(struct memkind *kind, size_t huge_size); MEMKIND_EXPORT int memkind_hugetlb_get_mmap_flags(struct memkind *kind, int *flags) { *flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_2MB; return 0; } MEMKIND_EXPORT void memkind_hugetlb_init_once(void) { memkind_init(MEMKIND_HUGETLB, true); } MEMKIND_EXPORT int memkind_hugetlb_check_available_2mb(struct memkind *kind) { return memkind_hugetlb_check_available(kind, 2097152); } /* huge_size: the huge page size in bytes */ static int memkind_hugetlb_check_available(struct memkind *kind, size_t huge_size) { int err = 0; nodemask_t nodemask; struct bitmask nodemask_bm = {NUMA_NUM_NODES, nodemask.n}; /* on x86_64 default huge page size is 2MB */ if (huge_size == 0) { huge_size = 2097152; } if (kind->ops->get_mbind_nodemask) { err = kind->ops->get_mbind_nodemask(kind, nodemask.n, NUMA_NUM_NODES); } else { numa_bitmask_setall(&nodemask_bm); } size_t nr_persistent_hugepages, nr_overcommit_hugepages; err = get_nr_hugepages_cached(huge_size, &nodemask_bm, &nr_persistent_hugepages); if(err) { return err; } err = get_nr_overcommit_hugepages_cached(huge_size, &nr_overcommit_hugepages); if(err) { return err; } if (!nr_overcommit_hugepages && !nr_persistent_hugepages) { log_err("Persistent hugepages and overcommit hugepages are not available."); return MEMKIND_ERROR_HUGETLB; } return err; } struct hugepage_size_info { size_t size; size_t *nr_hugepages_per_node_array; size_t nr_overcommit; }; struct memkind_hugepages_config_t { struct hugepage_size_info **hugepages_info_array; int hugepages_info_array_len; int err; // 0 if sysfs parsing successful, appropriate memkind_error otherwise } memkind_hugepages_config; static pthread_once_t memkind_hugepages_config_once_g = PTHREAD_ONCE_INIT; static struct hugepage_size_info *allocate_hugepage_size_info() { struct hugepage_size_info *newInfo = malloc(sizeof(struct hugepage_size_info)); if(newInfo == NULL) { log_err("malloc() failed."); return NULL; } newInfo->nr_hugepages_per_node_array = calloc(NUMA_NUM_NODES, sizeof(size_t)); if(newInfo->nr_hugepages_per_node_array == NULL) { free(newInfo); log_err("calloc() failed."); return NULL; } return newInfo; } static size_t get_sysfs_entry_value(const char *entry_path) { int errno_before; FILE *fid; int num_read; size_t value_read, ret = 0; errno_before = errno; fid = fopen(entry_path, "r"); if (fid) { num_read = fscanf(fid, "%zud", &value_read); if(num_read) { ret = value_read; } fclose(fid); } else { errno = errno_before; } return ret; } // construct hugepage_size_info object and fill it with data for provided pagesize static void init_hugepage_size_info(size_t pagesize, struct hugepage_size_info *newInfo) { char formatted_path[128]; const char *nr_path_fmt = "/sys/devices/system/node/node%u/hugepages/hugepages-%zukB/nr_hugepages"; const char *nr_overcommit_path_fmt = "/sys/kernel/mm/hugepages/hugepages-%zukB/nr_overcommit_hugepages"; int snprintf_ret = 0; size_t node; size_t pagesize_kb = pagesize >> 10; newInfo->size = pagesize; //read overcommit hugepages limit for this pagesize snprintf_ret = snprintf(formatted_path, sizeof(formatted_path), nr_overcommit_path_fmt, pagesize_kb); if (snprintf_ret > 0 && snprintf_ret < sizeof(formatted_path)) { newInfo->nr_overcommit = get_sysfs_entry_value(formatted_path); log_info("Overcommit limit for %zu kB hugepages is %zu.", pagesize, newInfo->nr_overcommit); } //read every node nr_hugepages for this pagesize for (node = 0; node < NUMA_NUM_NODES; ++node) { snprintf_ret = snprintf(formatted_path, sizeof(formatted_path), nr_path_fmt, node, pagesize_kb); if(snprintf_ret > 0 && snprintf_ret < sizeof(formatted_path)) { newInfo->nr_hugepages_per_node_array[node] = get_sysfs_entry_value( formatted_path); if(node < numa_num_configured_nodes()) { log_info("Number of %zu kB hugepages on node %zu equals %zu.", pagesize, node, newInfo->nr_hugepages_per_node_array[node]); } } } } // get hugepage size in bytes out of sysfs dir name static int parse_pagesize_from_sysfs_entry(const char *entry, size_t *out) { size_t pagesize; int ret = sscanf(entry, "hugepages-%zukB", &pagesize); if(ret == 1) { *out = pagesize << 10; //we are using bytes but kernel is using kB return 0; } return -1; } static void hugepages_config_init_once() { unsigned j, i = 0; size_t pagesize; struct hugepage_size_info **hugepages_info_array = NULL; struct dirent *dir; DIR *hugepages_sysfs = opendir("/sys/kernel/mm/hugepages"); if(hugepages_sysfs == NULL) { memkind_hugepages_config.err = MEMKIND_ERROR_HUGETLB; log_err("/sys/kernel/mm/hugepages directory is not available."); return; } unsigned hugepages_info_array_len = 2; //initial size of array hugepages_info_array = malloc(hugepages_info_array_len * sizeof( struct hugepage_size_info *)); if (hugepages_info_array == NULL) { memkind_hugepages_config.err = MEMKIND_ERROR_MALLOC; closedir(hugepages_sysfs); log_err("malloc() failed."); return; } while ((dir = readdir(hugepages_sysfs)) != NULL) { if(dir->d_type == DT_DIR && parse_pagesize_from_sysfs_entry(dir->d_name, &pagesize) == 0) { struct hugepage_size_info *new_hugepage_info = allocate_hugepage_size_info(); if(new_hugepage_info == NULL) { memkind_hugepages_config.err = MEMKIND_ERROR_MALLOC; break; } init_hugepage_size_info(pagesize, new_hugepage_info); //there is more hugepage sizes than expected, reallocation of array needed if(i == hugepages_info_array_len) { hugepages_info_array_len *= 2; struct hugepage_size_info **swap_tmp = realloc(hugepages_info_array, hugepages_info_array_len * sizeof(struct hugepage_size_info *)); if(swap_tmp == NULL) { free(new_hugepage_info); memkind_hugepages_config.err = MEMKIND_ERROR_MALLOC; log_err("realloc() failed."); break; } hugepages_info_array = swap_tmp; } hugepages_info_array[i] = new_hugepage_info; i++; } } closedir(hugepages_sysfs); if(memkind_hugepages_config.err == 0) { memkind_hugepages_config.hugepages_info_array = hugepages_info_array; memkind_hugepages_config.hugepages_info_array_len = i; } else { for(j=0; jsize == pagesize) { return memkind_hugepages_config.hugepages_info_array[i]; } } return NULL; } // returns sum of pre-allocated hugepage for specified pagesize and set of nodes static int get_nr_hugepages_cached(size_t pagesize, struct bitmask *nodemask, size_t *out) { int i; size_t nr_hugepages = 0; int num_node = numa_num_configured_nodes(); pthread_once(&memkind_hugepages_config_once_g, hugepages_config_init_once); if(memkind_hugepages_config.err != 0) { return memkind_hugepages_config.err; } struct hugepage_size_info *info = get_hugepage_info_for_pagesize(pagesize); if(info == NULL) { log_err("Unable to allocate hugepages, because info about pre-allocated hugepages is not available."); return MEMKIND_ERROR_HUGETLB; } for(i=0; inr_hugepages_per_node_array[i]; } } *out = nr_hugepages; return 0; } // returns hugepages overcommit limit for specified pagesize static int get_nr_overcommit_hugepages_cached(size_t pagesize, size_t *out) { pthread_once(&memkind_hugepages_config_once_g, hugepages_config_init_once); if(memkind_hugepages_config.err != 0) { return memkind_hugepages_config.err; } struct hugepage_size_info *info = get_hugepage_info_for_pagesize(pagesize); if(info == NULL) { log_err("Unable to allocate hugepages, because info about overcommit hugepages is not available."); return MEMKIND_ERROR_HUGETLB; } *out = info->nr_overcommit; return 0; }