// SPDX-License-Identifier: BSD-2-Clause
/* Copyright (C) 2019 - 2020 Intel Corporation. */
#include <errno.h>
#include <limits.h>
#include <stdint.h>
#include <memkind/internal/memkind_log.h>
#include <memkind/internal/memkind_private.h>
#include <memkind/internal/memkind_bandwidth.h>
struct numanode_bandwidth_t {
int numanode;
int bandwidth;
};
struct bandwidth_nodes_t {
int bandwidth;
int num_numanodes;
int *numanodes;
};
VEC(vec_cpu_node, int);
#define BANDWIDTH_NODE_HIGH_VAL 2
#define BANDWIDTH_NODE_LOW_VAL 1
#define BANDWIDTH_NODE_NOT_PRESENT 0
static void bandwidth_assign_arbitrary_values(int *bandwidth,
struct bitmask *numa_nodes_bm)
{
unsigned i;
int nodes_num = numa_num_configured_nodes();
for (i = 0; i<NUMA_NUM_NODES; i++) {
if (numa_bitmask_isbitset(numa_nodes_bm, i)) {
bandwidth[i] = BANDWIDTH_NODE_HIGH_VAL;
nodes_num--;
}
}
for (i = 0; i<NUMA_NUM_NODES; i++) {
if (!numa_bitmask_isbitset(numa_nodes_bm, i)) {
bandwidth[i] = BANDWIDTH_NODE_LOW_VAL;
nodes_num--;
}
if (nodes_num==0) break;
}
}
int bandwidth_fill(int *bandwidth, get_node_bitmask get_bitmask)
{
struct bitmask *high_val_node_mask = numa_allocate_nodemask();
int ret = get_bitmask(high_val_node_mask);
if(ret == 0) {
bandwidth_assign_arbitrary_values(bandwidth, high_val_node_mask);
}
numa_bitmask_free(high_val_node_mask);
return ret;
}
static int bandwidth_fill_values_from_enviroment(int *bandwidth,
char *nodes_env)
{
struct bitmask *numa_nodes_bm = numa_parse_nodestring(nodes_env);
if (!numa_nodes_bm) {
log_err("Invalid value of environment variable.");
return MEMKIND_ERROR_ENVIRON;
} else {
bandwidth_assign_arbitrary_values(bandwidth, numa_nodes_bm);
numa_bitmask_free(numa_nodes_bm);
}
return 0;
}
static int bandwidth_fill_nodes(int *bandwidth, fill_bandwidth_values fill,
const char *env)
{
char *high_value_nodes_env = secure_getenv(env);
if (high_value_nodes_env) {
log_info("Environment variable %s detected: %s.", env, high_value_nodes_env);
return bandwidth_fill_values_from_enviroment(bandwidth, high_value_nodes_env);
}
return fill(bandwidth);
}
static int bandwidth_compare_numanode(const void *a, const void *b)
{
/***************************************************************************
* qsort comparison function for numa_node_bandwidth structures. Sorts in *
* order of bandwidth and then numanode. *
***************************************************************************/
int result;
struct numanode_bandwidth_t *aa = (struct numanode_bandwidth_t *)(a);
struct numanode_bandwidth_t *bb = (struct numanode_bandwidth_t *)(b);
result = (aa->bandwidth > bb->bandwidth) - (aa->bandwidth < bb->bandwidth);
if (result == 0) {
result = (aa->numanode > bb->numanode) - (aa->numanode < bb->numanode);
}
return result;
}
static int bandwidth_create_nodes(const int *bandwidth, int *num_unique,
struct bandwidth_nodes_t **bandwidth_nodes)
{
/***************************************************************************
* bandwidth (IN): *
* A vector of length equal NUMA_NUM_NODES that gives bandwidth for *
* each numa node, zero if numa node has unknown bandwidth. *
* num_unique (OUT): *
* number of unique non-zero bandwidth values in bandwidth *
* vector. *
* bandwidth_nodes (OUT): *
* A list of length num_unique sorted by bandwidth value where *
* each element gives a list of the numa nodes that have the *
* given bandwidth. *
* RETURNS MEMKIND_SUCCESS on success, error code on failure *
***************************************************************************/
int err = MEMKIND_SUCCESS;
int i, last_bandwidth;
*bandwidth_nodes = NULL;
/* allocate space for sorting array */
int nodes_num = numa_num_configured_nodes();
struct numanode_bandwidth_t *numanode_bandwidth = malloc(sizeof(
struct numanode_bandwidth_t) * nodes_num);
if (!numanode_bandwidth) {
err = MEMKIND_ERROR_MALLOC;
log_err("malloc() failed.");
}
if (!err) {
/* set sorting array */
int j = 0;
for (i = 0; i < NUMA_NUM_NODES; ++i) {
if (bandwidth[i] != BANDWIDTH_NODE_NOT_PRESENT) {
numanode_bandwidth[j].numanode = i;
numanode_bandwidth[j].bandwidth = bandwidth[i];
++j;
}
}
if (j == 0) {
err = MEMKIND_ERROR_UNAVAILABLE;
}
}
if (!err) {
qsort(numanode_bandwidth, nodes_num, sizeof(struct numanode_bandwidth_t),
bandwidth_compare_numanode);
/* calculate the number of unique bandwidths */
*num_unique = 1;
last_bandwidth = numanode_bandwidth[0].bandwidth;
for (i = 1; i < nodes_num; ++i) {
if (numanode_bandwidth[i].bandwidth != last_bandwidth) {
last_bandwidth = numanode_bandwidth[i].bandwidth;
++*num_unique;
}
}
/* allocate output array */
*bandwidth_nodes = (struct bandwidth_nodes_t *)malloc(sizeof(
struct bandwidth_nodes_t) * (*num_unique) + sizeof(int) * nodes_num);
if (!*bandwidth_nodes) {
err = MEMKIND_ERROR_MALLOC;
log_err("malloc() failed.");
}
}
if (!err) {
/* populate output */
(*bandwidth_nodes)[0].numanodes = (int *)(*bandwidth_nodes + *num_unique);
last_bandwidth = numanode_bandwidth[0].bandwidth;
int k = 0;
int l = 0;
for (i = 0; i < nodes_num; ++i, ++l) {
(*bandwidth_nodes)[0].numanodes[i] = numanode_bandwidth[i].numanode;
if (numanode_bandwidth[i].bandwidth != last_bandwidth) {
(*bandwidth_nodes)[k].num_numanodes = l;
(*bandwidth_nodes)[k].bandwidth = last_bandwidth;
l = 0;
++k;
(*bandwidth_nodes)[k].numanodes = (*bandwidth_nodes)[0].numanodes + i;
last_bandwidth = numanode_bandwidth[i].bandwidth;
}
}
(*bandwidth_nodes)[k].num_numanodes = l;
(*bandwidth_nodes)[k].bandwidth = last_bandwidth;
}
if (numanode_bandwidth) {
free(numanode_bandwidth);
}
if (err) {
if (*bandwidth_nodes) {
free(*bandwidth_nodes);
}
}
return err;
}
static int bandwidth_set_closest_numanode(int num_unique,
const struct bandwidth_nodes_t *bandwidth_nodes, bool is_single_node,
int num_cpunode, struct vec_cpu_node **closest_numanode)
{
/***************************************************************************
* num_unique (IN): *
* Length of bandwidth_nodes vector. *
* bandwidth_nodes (IN): *
* Output vector from bandwidth_create_nodes(). *
* is_single_node (IN): *
* Flag used to mark is only single closest numanode *
* is a valid configuration *
* num_cpunode (IN): *
* Number of cpu's and length of closest_numanode. *
* closest_numanode (OUT): *
* Vector that maps cpu index to closest numa node(s) *
* of the specified bandwidth. *
* RETURNS zero on success, error code on failure *
***************************************************************************/
int err = MEMKIND_SUCCESS;
int min_distance, distance, i, j, old_errno;
struct bandwidth_nodes_t match;
match.bandwidth = -1;
int target_bandwidth = bandwidth_nodes[num_unique-1].bandwidth;
struct vec_cpu_node *node_arr = (struct vec_cpu_node *) calloc(num_cpunode,
sizeof(struct vec_cpu_node));
if (!node_arr) {
log_err("calloc() failed.");
return MEMKIND_ERROR_MALLOC;
}
for (i = 0; i < num_unique; ++i) {
if (bandwidth_nodes[i].bandwidth == target_bandwidth) {
match = bandwidth_nodes[i];
break;
}
}
if (match.bandwidth == -1) {
err = MEMKIND_ERROR_UNAVAILABLE;
} else {
for (i = 0; i < num_cpunode; ++i) {
min_distance = INT_MAX;
for (j = 0; j < match.num_numanodes; ++j) {
old_errno = errno;
distance = numa_distance(numa_node_of_cpu(i), match.numanodes[j]);
errno = old_errno;
if (distance < min_distance) {
min_distance = distance;
VEC_CLEAR(&node_arr[i]);
VEC_PUSH_BACK(&node_arr[i], match.numanodes[j]);
} else if (distance == min_distance) {
VEC_PUSH_BACK(&node_arr[i], match.numanodes[j]);
}
}
if (VEC_SIZE(&node_arr[i]) > 1 && is_single_node &&
numa_bitmask_isbitset(numa_all_cpus_ptr, i)) {
log_err("Invalid Numa Configuration for cpu %d", i);
int node = -1;
VEC_FOREACH(node, &node_arr[i]) {
log_err("Node %d", node);
}
err = MEMKIND_ERROR_RUNTIME;
}
}
}
if (err) {
for (i = 0; i < num_cpunode; ++i) {
VEC_DELETE(&node_arr[i]);
}
free(node_arr);
node_arr = NULL;
}
*closest_numanode = node_arr;
return err;
}
int set_closest_numanode(fill_bandwidth_values fill_values, const char *env,
struct vec_cpu_node **closest_numanode, int num_cpu, bool is_single_node)
{
int status;
int num_unique = 0;
int i;
struct bandwidth_nodes_t *bandwidth_nodes = NULL;
int *bandwidth = (int *)calloc(NUMA_NUM_NODES, sizeof(int));
if (!bandwidth) {
log_err("calloc() failed.");
return MEMKIND_ERROR_MALLOC;
}
status = bandwidth_fill_nodes(bandwidth, fill_values, env);
if (status)
goto exit;
status = bandwidth_create_nodes(bandwidth, &num_unique, &bandwidth_nodes);
if (status)
goto exit;
status = bandwidth_set_closest_numanode(num_unique, bandwidth_nodes,
is_single_node, num_cpu, closest_numanode);
for(i = 0; i < bandwidth_nodes[num_unique-1].num_numanodes; ++i) {
log_info("NUMA node %d is high-bandwidth/dax-kmem memory.",
bandwidth_nodes[num_unique-1].numanodes[i]);
}
exit:
free(bandwidth_nodes);
free(bandwidth);
return status;
}
void set_bitmask_for_all_closest_numanodes(unsigned long *nodemask,
unsigned long maxnode, const struct vec_cpu_node *closest_numanode, int num_cpu)
{
if (MEMKIND_LIKELY(nodemask)) {
struct bitmask nodemask_bm = {maxnode, nodemask};
int cpu;
int node = -1;
numa_bitmask_clearall(&nodemask_bm);
for (cpu = 0; cpu < num_cpu; ++cpu) {
VEC_FOREACH(node, &closest_numanode[cpu]) {
numa_bitmask_setbit(&nodemask_bm, node);
}
}
}
}
int set_bitmask_for_current_closest_numanode(unsigned long *nodemask,
unsigned long maxnode, const struct vec_cpu_node *closest_numanode, int num_cpu)
{
if (MEMKIND_LIKELY(nodemask)) {
struct bitmask nodemask_bm = {maxnode, nodemask};
numa_bitmask_clearall(&nodemask_bm);
int cpu = sched_getcpu();
if (MEMKIND_LIKELY(cpu < num_cpu)) {
int node = -1;
VEC_FOREACH(node, &closest_numanode[cpu]) {
numa_bitmask_setbit(&nodemask_bm, node);
}
} else {
return MEMKIND_ERROR_RUNTIME;
}
}
return MEMKIND_SUCCESS;
}