/*
* BSD LICENSE
*
* Copyright(c) 2016-2020 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define _GNU_SOURCE
#include <stdint.h> /* uint64_t etc. */
#include <stdlib.h> /* malloc() */
#include <string.h> /* memcpy() */
#include <types.h> /* ASSERT() */
#include <pqos.h>
#ifdef __linux__
#include <sched.h> /* sched_setaffinity() */
#endif
#ifdef __FreeBSD__
#include <sys/param.h> /* sched affinity */
#include <sys/cpuset.h> /* sched affinity */
#endif
#include "dlock.h"
#define MAX_L3CAT_NUM 16
#define DIM(x) (sizeof(x) / sizeof(x[0]))
static int m_is_chunk_allocated = 0;
static char *m_chunk_start = NULL;
static size_t m_chunk_size = 0;
static unsigned m_num_clos = 0;
static struct {
unsigned id;
struct pqos_l3ca *cos_tab;
} m_l3cat_cos[MAX_L3CAT_NUM];
/**
* @brief Removes the memory block from cache hierarchy
*
* @param p pointer to memory block
* @param s size of memory block in bytes
*/
static void mem_flush(const void *p, const size_t s)
{
const size_t cache_line = 64;
const char *cp = (const char *)p;
size_t i = 0;
if (p == NULL || s <= 0)
return;
for (i = 0; i < s; i += cache_line) {
asm volatile("clflush (%0)\n\t"
:
: "r"(&cp[i])
: "memory");
}
asm volatile("sfence\n\t"
:
:
: "memory");
}
/**
* @brief Reads the memory block which places it in cache hierarchy
*
* @param p pointer to memory block
* @param s size of memory block in bytes
*/
static void mem_read(const void *p, const size_t s)
{
register size_t i;
if (p == NULL || s <= 0)
return;
for (i = 0; i < (s / sizeof(uint32_t)); i++) {
asm volatile("xor (%0,%1,4), %%eax\n\t"
:
: "r" (p), "r" (i)
: "%eax", "memory");
}
for (i = s & (~(sizeof(uint32_t) - 1)); i < s; i++) {
asm volatile("xorb (%0,%1,1), %%al\n\t"
:
: "r" (p), "r" (i)
: "%al", "memory");
}
}
/**
* @brief Initializes the memory block with random data
*
* This is to avoid any page faults or copy-on-write exceptions later on.
*
* @param p pointer to memory block
* @param s size of memory block in bytes
*/
static void mem_init(void *p, const size_t s)
{
char *cp = (char *)p;
size_t i;
if (p == NULL || s <= 0)
return;
for (i = 0; i < s; i++)
cp[i] = (char) rand();
}
/**
* @brief Calculates number of cache ways required to fit a number of \a bytes
*
* @param cat_cap pointer to L3CA PQoS capability structure
* @param bytes number of bytes
* @param ways pointer to store number of required cache ways
*
* @return Operation status
* @retval 0 OK
* @retval <0 error
*/
static int bytes_to_cache_ways(const struct pqos_capability *cat_cap,
const size_t bytes, size_t *ways)
{
size_t llc_size = 0, num_ways = 0;
const struct pqos_cap_l3ca *cat = NULL;
if (cat_cap == NULL || ways == NULL)
return -1;
if (cat_cap->type != PQOS_CAP_TYPE_L3CA)
return -2;
cat = cat_cap->u.l3ca;
llc_size = cat->way_size * cat->num_ways;
if (bytes > llc_size)
return -3;
num_ways = (bytes + cat->way_size - 1) / cat->way_size;
if (num_ways >= cat->num_ways)
return -4;
if (num_ways < 2)
num_ways = 2;
*ways = num_ways;
return 0;
}
int dlock_init(void *ptr, const size_t size, const int clos, const int cpuid)
{
const struct pqos_cpuinfo *p_cpu = NULL;
const struct pqos_cap *p_cap = NULL;
const struct pqos_capability *p_l3ca_cap = NULL;
unsigned *l3cat_ids = NULL;
unsigned l3cat_id_count = 0, i = 0;
int ret = 0, res = 0;
size_t num_cache_ways = 0;
unsigned clos_save = 0;
char err_buf[64];
#ifdef __linux__
cpu_set_t cpuset_save, cpuset;
#endif
#ifdef __FreeBSD__
cpuset_t cpuset_save, cpuset;
#endif
if (m_chunk_start != NULL)
return -1;
if (size <= 0)
return -2;
if (ptr != NULL) {
m_chunk_start = ptr;
m_is_chunk_allocated = 0;
} else {
/**
* For best results allocated memory should be physically
* contiguous. Yet this would require allocating memory in
* kernel space or using huge pages.
* Let's use malloc() and 4K pages for simplicity.
*/
m_chunk_start = malloc(size);
if (m_chunk_start == NULL)
return -3;
m_is_chunk_allocated = 1;
mem_init(m_chunk_start, size);
}
m_chunk_size = size;
/**
* Get task affinity to restore it later
*/
#ifdef __linux__
res = sched_getaffinity(0, sizeof(cpuset_save), &cpuset_save);
#endif
#ifdef __FreeBSD__
res = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1,
sizeof(cpuset_save), &cpuset_save);
#endif
if (res != 0) {
snprintf(err_buf, sizeof(err_buf), "%s() error", __func__);
perror(err_buf);
ret = -4;
goto dlock_init_error1;
}
/**
* Set task affinity to cpuid for data locking phase
*/
CPU_ZERO(&cpuset);
CPU_SET(cpuid, &cpuset);
#ifdef __linux__
res = sched_setaffinity(0, sizeof(cpuset), &cpuset);
#endif
#ifdef __FreeBSD__
res = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1,
sizeof(cpuset), &cpuset);
#endif
if (res != 0) {
snprintf(err_buf, sizeof(err_buf), "%s() error", __func__);
perror(err_buf);
ret = -4;
goto dlock_init_error1;
}
/**
* Clear table for restoring CAT configuration
*/
for (i = 0; i < DIM(m_l3cat_cos); i++) {
m_l3cat_cos[i].id = 0;
m_l3cat_cos[i].cos_tab = NULL;
}
/**
* Retrieve CPU topology and PQoS capabilities
*/
res = pqos_cap_get(&p_cap, &p_cpu);
if (res != PQOS_RETVAL_OK) {
ret = -5;
goto dlock_init_error2;
}
/**
* Retrieve list of CPU l3cat_ids
*/
l3cat_ids = pqos_cpu_get_l3cat_ids(p_cpu, &l3cat_id_count);
if (l3cat_ids == NULL) {
ret = -6;
goto dlock_init_error2;
}
/**
* Get CAT capability structure
*/
res = pqos_cap_get_type(p_cap, PQOS_CAP_TYPE_L3CA, &p_l3ca_cap);
if (res != PQOS_RETVAL_OK) {
ret = -7;
goto dlock_init_error2;
}
/**
* Compute number of cache ways required for the data
*/
res = bytes_to_cache_ways(p_l3ca_cap, size, &num_cache_ways);
if (res != 0) {
ret = -8;
goto dlock_init_error2;
}
/**
* Compute class bit mask for data lock and
* retrieve number of classes of service
*/
m_num_clos = p_l3ca_cap->u.l3ca->num_classes;
for (i = 0; i < l3cat_id_count; i++) {
/**
* This would be enough to run the below code for the l3cat_id
* corresponding to \a cpuid. Yet it is safer to keep CLOS
* definitions coherent across l3cat_ids.
*/
const uint64_t dlock_mask = (1ULL << num_cache_ways) - 1ULL;
ASSERT(m_num_clos > 0);
struct pqos_l3ca cos[m_num_clos];
unsigned num = 0, j;
/* get current CAT classes on this l3cat_ids */
res = pqos_l3ca_get(l3cat_ids[i], m_num_clos, &num, &cos[0]);
if (res != PQOS_RETVAL_OK) {
printf("pqos_l3ca_get() error!\n");
ret = -9;
goto dlock_init_error2;
}
/* paranoia check */
if (m_num_clos != num) {
printf("CLOS number mismatch!\n");
ret = -9;
goto dlock_init_error2;
}
/* save CAT classes to restore it later */
m_l3cat_cos[i].id = l3cat_ids[i];
m_l3cat_cos[i].cos_tab = malloc(m_num_clos * sizeof(cos[0]));
if (m_l3cat_cos[i].cos_tab == NULL) {
printf("malloc() error!\n");
ret = -9;
goto dlock_init_error2;
}
memcpy(m_l3cat_cos[i].cos_tab, cos,
m_num_clos * sizeof(cos[0]));
/**
* Modify the classes in the following way:
* if class_id == clos then
* set class mask so that it has exclusive access to
* \a num_cache_ways
* else
* exclude class from accessing \a num_cache_ways
*/
for (j = 0; j < m_num_clos; j++) {
if (cos[j].cdp) {
if (cos[j].class_id == (unsigned)clos) {
cos[j].u.s.code_mask = dlock_mask;
cos[j].u.s.data_mask = dlock_mask;
} else {
cos[j].u.s.code_mask &= ~dlock_mask;
cos[j].u.s.data_mask &= ~dlock_mask;
}
} else {
if (cos[j].class_id == (unsigned)clos)
cos[j].u.ways_mask = dlock_mask;
else
cos[j].u.ways_mask &= ~dlock_mask;
}
}
res = pqos_l3ca_set(l3cat_ids[i], m_num_clos, &cos[0]);
if (res != PQOS_RETVAL_OK) {
printf("pqos_l3ca_set() error!\n");
ret = -10;
goto dlock_init_error2;
}
}
/**
* Read current cpuid CLOS association and set the new one
*/
res = pqos_alloc_assoc_get(cpuid, &clos_save);
if (res != PQOS_RETVAL_OK) {
printf("pqos_alloc_assoc_get() error!\n");
ret = -11;
goto dlock_init_error2;
}
res = pqos_alloc_assoc_set(cpuid, clos);
if (res != PQOS_RETVAL_OK) {
printf("pqos_alloc_assoc_set() error!\n");
ret = -12;
goto dlock_init_error2;
}
/**
* Remove buffer data from cache hierarchy and read it back into
* selected cache ways.
* WBINVD is another option to remove data from cache but it is
* privileged instruction and as such has to be done in kernel space.
*/
mem_flush(m_chunk_start, m_chunk_size);
/**
* Read the data couple of times. This may help as this is ran in
* user space and code can be interrupted and data removed
* from cache hierarchy.
* Ideally all locking should be done at privileged level with
* full system control.
*/
for (i = 0; i < 10; i++)
mem_read(m_chunk_start, m_chunk_size);
/**
* Restore cpuid clos association
*/
res = pqos_alloc_assoc_set(cpuid, clos_save);
if (res != PQOS_RETVAL_OK) {
printf("pqos_alloc_assoc_set() error (revert)!\n");
ret = -13;
goto dlock_init_error2;
}
dlock_init_error2:
for (i = 0; (i < DIM(m_l3cat_cos)) && (ret != 0); i++)
if (m_l3cat_cos[i].cos_tab != NULL)
free(m_l3cat_cos[i].cos_tab);
#ifdef __linux__
res = sched_setaffinity(0, sizeof(cpuset_save), &cpuset_save);
#endif
#ifdef __FreeBSD__
res = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1,
sizeof(cpuset_save), &cpuset_save);
#endif
if (res != 0) {
snprintf(err_buf, sizeof(err_buf),
"%s() error restoring affinity", __func__);
perror(err_buf);
}
dlock_init_error1:
if (m_is_chunk_allocated && ret != 0)
free(m_chunk_start);
if (ret != 0) {
m_chunk_start = NULL;
m_chunk_size = 0;
m_is_chunk_allocated = 0;
}
if (l3cat_ids != NULL)
free(l3cat_ids);
return ret;
}
int dlock_exit(void)
{
int ret = 0;
unsigned i;
if (m_chunk_start == NULL)
return -1;
for (i = 0; i < DIM(m_l3cat_cos); i++) {
if (m_l3cat_cos[i].cos_tab != NULL) {
int res = pqos_l3ca_set(m_l3cat_cos[i].id, m_num_clos,
m_l3cat_cos[i].cos_tab);
if (res != PQOS_RETVAL_OK)
ret = -2;
}
free(m_l3cat_cos[i].cos_tab);
m_l3cat_cos[i].cos_tab = NULL;
m_l3cat_cos[i].id = 0;
}
if (m_is_chunk_allocated)
free(m_chunk_start);
m_chunk_start = NULL;
m_chunk_size = 0;
m_is_chunk_allocated = 0;
return ret;
}