/*
* BSD LICENSE
*
* Copyright(c) 2014-2017 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
* @brief Implementation of PQoS monitoring API.
*
* CPUID and MSR operations are done on 'local' system.
*
*/
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <dirent.h>
#include "pqos.h"
#include "cap.h"
#include "monitoring.h"
#include "os_monitoring.h"
#include "machine.h"
#include "types.h"
#include "log.h"
/**
* ---------------------------------------
* Local macros
* ---------------------------------------
*/
/**
* Allocation & Monitoring association MSR register
* - bits [63..32] QE COS
* - bits [31..10] Reserved
* - bits [9..0] RMID
*/
#define PQOS_MSR_ASSOC 0xC8F
#define PQOS_MSR_ASSOC_QECOS_SHIFT 32
#define PQOS_MSR_ASSOC_QECOS_MASK 0xffffffff00000000ULL
#define PQOS_MSR_ASSOC_RMID_MASK ((1ULL << 10) - 1ULL)
/**
* Monitoring data read MSR register
*/
#define PQOS_MSR_MON_QMC 0xC8E
#define PQOS_MSR_MON_QMC_DATA_MASK ((1ULL << 62) - 1ULL)
#define PQOS_MSR_MON_QMC_ERROR (1ULL << 63)
#define PQOS_MSR_MON_QMC_UNAVAILABLE (1ULL << 62)
/**
* Monitoring event selection MSR register
* - bits [63..42] Reserved
* - bits [41..32] RMID
* - bits [31..8] Reserved
* - bits [7..0] Event ID
*/
#define PQOS_MSR_MON_EVTSEL 0xC8D
#define PQOS_MSR_MON_EVTSEL_RMID_SHIFT 32
#define PQOS_MSR_MON_EVTSEL_RMID_MASK ((1ULL << 10) - 1ULL)
#define PQOS_MSR_MON_EVTSEL_EVTID_MASK ((1ULL << 8) - 1ULL)
/**
* Allocation class of service (COS) MSR registers
*/
#define PQOS_MSR_L3CA_MASK_START 0xC90
#define PQOS_MSR_L3CA_MASK_END 0xD8F
#define PQOS_MSR_L3CA_MASK_NUMOF \
(PQOS_MSR_L3CA_MASK_END - PQOS_MSR_L3CA_MASK_START + 1)
/**
* MSR's to read instructions retired, unhalted cycles,
* LLC references and LLC misses.
* These MSR's are needed to calculate IPC (instructions per clock) and
* LLC miss ratio.
*/
#define IA32_MSR_INST_RETIRED_ANY 0x309
#define IA32_MSR_CPU_UNHALTED_THREAD 0x30A
#define IA32_MSR_FIXED_CTR_CTRL 0x38D
#define IA32_MSR_PERF_GLOBAL_CTRL 0x38F
#define IA32_MSR_PMC0 0x0C1
#define IA32_MSR_PERFEVTSEL0 0x186
#define IA32_EVENT_LLC_MISS_MASK 0x2EULL
#define IA32_EVENT_LLC_MISS_UMASK 0x41ULL
/**
* Special RMID - after reset all cores are associated with it.
*
* The assumption is that if core is not assigned to it
* then it is subject of monitoring activity by a different process.
*/
#define RMID0 (0)
/**
* Max value of the memory bandwidth data = 2^24
* assuming there is 24 bit space available
*/
#define MBM_MAX_VALUE (1 << 24)
/**
* ---------------------------------------
* Local data types
* ---------------------------------------
*/
/**
* ---------------------------------------
* Local data structures
* ---------------------------------------
*/
static const struct pqos_cap *m_cap = NULL; /**< capabilities structure
passed from cap */
static const struct pqos_cpuinfo *m_cpu = NULL; /**< cpu topology passed
from cap */
static unsigned m_rmid_max = 0; /**< max RMID */
#ifdef __linux__
static int m_interface = PQOS_INTER_MSR;
#endif
/**
* ---------------------------------------
* Local Functions
* ---------------------------------------
*/
static int
mon_assoc_set(const unsigned lcore,
const pqos_rmid_t rmid);
static int
mon_assoc_get(const unsigned lcore,
pqos_rmid_t *rmid);
static int
mon_read(const unsigned lcore,
const pqos_rmid_t rmid,
const enum pqos_mon_event event,
uint64_t *value);
static int
pqos_core_poll(struct pqos_mon_data *group);
static int
rmid_alloc(const unsigned cluster,
const enum pqos_mon_event event,
pqos_rmid_t *rmid);
static unsigned
get_event_id(const enum pqos_mon_event event);
static uint64_t
get_delta(const uint64_t old_value, const uint64_t new_value);
static uint64_t
scale_event(const enum pqos_mon_event event, const uint64_t val);
/*
* =======================================
* =======================================
*
* initialize and shutdown
*
* =======================================
* =======================================
*/
int
pqos_mon_init(const struct pqos_cpuinfo *cpu,
const struct pqos_cap *cap,
const struct pqos_config *cfg)
{
const struct pqos_capability *item = NULL;
int ret;
ASSERT(cfg != NULL);
/**
* If monitoring capability has been discovered
* then get max RMID supported by a CPU socket
* and allocate memory for RMID table
*/
ret = pqos_cap_get_type(cap, PQOS_CAP_TYPE_MON, &item);
if (ret != PQOS_RETVAL_OK) {
ret = PQOS_RETVAL_RESOURCE;
goto pqos_mon_init_exit;
}
ASSERT(item != NULL);
m_rmid_max = item->u.mon->max_rmid;
if (m_rmid_max == 0) {
pqos_mon_fini();
return PQOS_RETVAL_PARAM;
}
LOG_DEBUG("Max RMID per monitoring cluster is %u\n", m_rmid_max);
#ifdef __linux__
if (cfg->interface == PQOS_INTER_OS ||
cfg->interface == PQOS_INTER_OS_RESCTRL_MON)
ret = os_mon_init(cpu, cap);
if (ret != PQOS_RETVAL_OK)
return ret;
#endif
pqos_mon_init_exit:
m_cpu = cpu;
m_cap = cap;
#ifdef __linux__
m_interface = cfg->interface;
#else
UNUSED_PARAM(cfg);
#endif
return ret;
}
int
pqos_mon_fini(void)
{
int ret = PQOS_RETVAL_OK;
m_rmid_max = 0;
#ifdef __linux__
if (m_interface == PQOS_INTER_OS ||
m_interface == PQOS_INTER_OS_RESCTRL_MON)
ret = os_mon_fini();
#endif
m_cpu = NULL;
m_cap = NULL;
return ret;
}
/*
* =======================================
* =======================================
*
* RMID allocation
*
* =======================================
* =======================================
*/
/**
* @brief Allocates RMID for given \a event
*
* @param [in] cluster CPU cluster id
* @param [in] event Monitoring event type
* @param [out] rmid resource monitoring id
*
* @return Operations status
*/
static int
rmid_alloc(const unsigned cluster,
const enum pqos_mon_event event,
pqos_rmid_t *rmid)
{
const struct pqos_capability *item = NULL;
const struct pqos_cap_mon *mon = NULL;
int ret = PQOS_RETVAL_OK;
unsigned max_rmid = 0;
unsigned mask_found = 0;
unsigned i, core_count;
unsigned *core_list = NULL;
pqos_rmid_t *rmid_list = NULL;
if (rmid == NULL)
return PQOS_RETVAL_PARAM;
/**
* This is not so straight forward as it appears to be.
* We first have to figure out max RMID
* for given event type. In order to do so we need:
* - go through capabilities structure
* - find monitoring capability
* - look for the \a event in the event list
* - find max RMID matching the \a event
*/
ASSERT(m_cap != NULL);
ret = pqos_cap_get_type(m_cap, PQOS_CAP_TYPE_MON, &item);
if (ret != PQOS_RETVAL_OK)
return ret;
ASSERT(item != NULL);
mon = item->u.mon;
/* Find which events are supported vs requested */
max_rmid = m_rmid_max;
for (i = 0; i < mon->num_events; i++)
if (event & mon->events[i].type) {
mask_found |= mon->events[i].type;
max_rmid = (max_rmid > mon->events[i].max_rmid) ?
mon->events[i].max_rmid : max_rmid;
}
/**
* Check if all of the events are supported
*/
if (event != mask_found || max_rmid == 0)
return PQOS_RETVAL_ERROR;
ASSERT(m_rmid_max >= max_rmid);
/**
* Check for free RMID in the cluster by reading current associations.
* Do it backwards (from max to 0) in order to preserve low RMID values
* for overlapping RMID ranges for future events.
*/
core_list = pqos_cpu_get_cores_l3id(m_cpu, cluster, &core_count);
if (core_list == NULL)
return PQOS_RETVAL_ERROR;
ASSERT(core_count > 0);
rmid_list = (pqos_rmid_t *)malloc(sizeof(rmid_list[0]) * core_count);
if (rmid_list == NULL) {
ret = PQOS_RETVAL_RESOURCE;
goto rmid_alloc_error;
}
for (i = 0; i < core_count; i++) {
ret = mon_assoc_get(core_list[i], &rmid_list[i]);
if (ret != PQOS_RETVAL_OK)
goto rmid_alloc_error;
}
ret = PQOS_RETVAL_ERROR;
for (i = max_rmid; i > 0; i--) {
const unsigned tmp_rmid = i - 1;
unsigned j = 0;
for (j = 0; j < core_count; j++)
if (tmp_rmid == rmid_list[j])
break;
if (j >= core_count) {
ret = PQOS_RETVAL_OK;
*rmid = tmp_rmid;
break;
}
}
rmid_alloc_error:
if (rmid_list != NULL)
free(rmid_list);
if (core_list != NULL)
free(core_list);
return ret;
}
/*
* =======================================
* =======================================
*
* Monitoring
*
* =======================================
* =======================================
*/
/**
* @brief Scale event values to bytes
*
* Retrieve event scale factor and scale value to bytes
*
* @param event event scale factor to retrieve
* @param val value to be scaled
*
* @return scaled value
* @retval value in bytes
*/
static uint64_t
scale_event(const enum pqos_mon_event event, const uint64_t val)
{
const struct pqos_monitor *pmon;
int ret;
ret = pqos_cap_get_event(m_cap, event, &pmon);
ASSERT(ret == PQOS_RETVAL_OK);
if (ret != PQOS_RETVAL_OK)
return val;
else
return val * pmon->scale_factor;
}
/**
* @brief Associates core with RMID at register level
*
* This function doesn't acquire API lock
* and can be used internally when lock is already taken.
*
* @param lcore logical core id
* @param rmid resource monitoring ID
*
* @return Operation status
* @retval PQOS_RETVAL_OK on success
*/
static int
mon_assoc_set(const unsigned lcore,
const pqos_rmid_t rmid)
{
int ret = 0;
uint32_t reg = 0;
uint64_t val = 0;
reg = PQOS_MSR_ASSOC;
ret = msr_read(lcore, reg, &val);
if (ret != MACHINE_RETVAL_OK)
return PQOS_RETVAL_ERROR;
val &= PQOS_MSR_ASSOC_QECOS_MASK;
val |= (uint64_t)(rmid & PQOS_MSR_ASSOC_RMID_MASK);
ret = msr_write(lcore, reg, val);
if (ret != MACHINE_RETVAL_OK)
return PQOS_RETVAL_ERROR;
return PQOS_RETVAL_OK;
}
/**
* @brief Reads \a lcore to RMID association
*
* @param lcore logical core id
* @param rmid place to store RMID \a lcore is assigned to
*
* @return Operation status
* @retval PQOS_RETVAL_OK success
* @retval PQOS_RETVAL_ERROR on error
*/
static int
mon_assoc_get(const unsigned lcore,
pqos_rmid_t *rmid)
{
int ret = 0;
uint32_t reg = PQOS_MSR_ASSOC;
uint64_t val = 0;
ASSERT(rmid != NULL);
ret = msr_read(lcore, reg, &val);
if (ret != MACHINE_RETVAL_OK)
return PQOS_RETVAL_ERROR;
val &= PQOS_MSR_ASSOC_RMID_MASK;
*rmid = (pqos_rmid_t) val;
return PQOS_RETVAL_OK;
}
int
hw_mon_assoc_get(const unsigned lcore,
pqos_rmid_t *rmid)
{
int ret = PQOS_RETVAL_OK;
ret = _pqos_check_init(1);
if (ret != PQOS_RETVAL_OK)
goto pqos_mon_assoc_get__error;
if (rmid == NULL) {
ret = PQOS_RETVAL_PARAM;
goto pqos_mon_assoc_get__error;
}
ASSERT(m_cpu != NULL);
ret = pqos_cpu_check_core(m_cpu, lcore);
if (ret != PQOS_RETVAL_OK) {
ret = PQOS_RETVAL_PARAM;
goto pqos_mon_assoc_get__error;
}
ret = mon_assoc_get(lcore, rmid);
pqos_mon_assoc_get__error:
return ret;
}
int hw_mon_reset(void)
{
int ret = PQOS_RETVAL_OK;
unsigned i;
ret = _pqos_check_init(1);
if (ret != PQOS_RETVAL_OK)
goto pqos_mon_reset_error;
ASSERT(m_cpu != NULL);
for (i = 0; i < m_cpu->num_cores; i++) {
int retval = mon_assoc_set(m_cpu->cores[i].lcore, RMID0);
if (retval != PQOS_RETVAL_OK)
ret = retval;
}
pqos_mon_reset_error:
return ret;
}
/**
* @brief Reads monitoring event data from given core
*
* This function doesn't acquire API lock.
*
* @param lcore logical core id
* @param rmid RMID to be read
* @param event monitoring event
* @param value place to store read value
*
* @return Operation status
* @retval PQOS_RETVAL_OK on success
*/
static int
mon_read(const unsigned lcore,
const pqos_rmid_t rmid,
const unsigned event,
uint64_t *value)
{
int retries = 3, retval = PQOS_RETVAL_OK;
uint32_t reg = 0;
uint64_t val = 0;
/**
* Set event selection register (RMID + event id)
*/
reg = PQOS_MSR_MON_EVTSEL;
val = ((uint64_t)rmid) & PQOS_MSR_MON_EVTSEL_RMID_MASK;
val <<= PQOS_MSR_MON_EVTSEL_RMID_SHIFT;
val |= ((uint64_t)event) & PQOS_MSR_MON_EVTSEL_EVTID_MASK;
if (msr_write(lcore, reg, val) != MACHINE_RETVAL_OK)
return PQOS_RETVAL_ERROR;
/**
* read selected data associated with previously selected RMID+event
*/
reg = PQOS_MSR_MON_QMC;
do {
if (msr_read(lcore, reg, &val) != MACHINE_RETVAL_OK) {
retval = PQOS_RETVAL_ERROR;
break;
}
if ((val&(PQOS_MSR_MON_QMC_ERROR)) != 0ULL) {
/**
* Unsupported event id or RMID selected
*/
retval = PQOS_RETVAL_ERROR;
break;
}
retries--;
} while ((val&PQOS_MSR_MON_QMC_UNAVAILABLE) != 0ULL && retries > 0);
/**
* Store event value
*/
if (retval == PQOS_RETVAL_OK)
*value = (val & PQOS_MSR_MON_QMC_DATA_MASK);
else
LOG_WARN("Error reading event %u on core %u (RMID%u)!\n",
event, lcore, (unsigned) rmid);
return retval;
}
/**
* @brief Reads monitoring event data from given core
*
* @param p pointer to monitoring structure
*
* @return Operation status
* @retval PQOS_RETVAL_OK on success
*/
static int
pqos_core_poll(struct pqos_mon_data *p)
{
struct pqos_event_values *pv = &p->values;
int retval = PQOS_RETVAL_OK;
unsigned i;
if (p->event & PQOS_MON_EVENT_L3_OCCUP) {
uint64_t total = 0;
for (i = 0; i < p->num_poll_ctx; i++) {
uint64_t tmp = 0;
int ret;
ret = mon_read(p->poll_ctx[i].lcore,
p->poll_ctx[i].rmid,
get_event_id(PQOS_MON_EVENT_L3_OCCUP),
&tmp);
if (ret != PQOS_RETVAL_OK) {
retval = PQOS_RETVAL_ERROR;
goto pqos_core_poll__exit;
}
total += tmp;
}
pv->llc = scale_event(PQOS_MON_EVENT_L3_OCCUP, total);
}
if (p->event & (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_RMEM_BW)) {
uint64_t total = 0, old_value = pv->mbm_local;
for (i = 0; i < p->num_poll_ctx; i++) {
uint64_t tmp = 0;
int ret;
ret = mon_read(p->poll_ctx[i].lcore,
p->poll_ctx[i].rmid,
get_event_id(PQOS_MON_EVENT_LMEM_BW),
&tmp);
if (ret != PQOS_RETVAL_OK) {
retval = PQOS_RETVAL_ERROR;
goto pqos_core_poll__exit;
}
total += tmp;
}
pv->mbm_local = total;
pv->mbm_local_delta = get_delta(old_value, pv->mbm_local);
pv->mbm_local_delta = scale_event(PQOS_MON_EVENT_LMEM_BW,
pv->mbm_local_delta);
}
if (p->event & (PQOS_MON_EVENT_TMEM_BW | PQOS_MON_EVENT_RMEM_BW)) {
uint64_t total = 0, old_value = pv->mbm_total;
for (i = 0; i < p->num_poll_ctx; i++) {
uint64_t tmp = 0;
int ret;
ret = mon_read(p->poll_ctx[i].lcore,
p->poll_ctx[i].rmid,
get_event_id(PQOS_MON_EVENT_TMEM_BW),
&tmp);
if (ret != PQOS_RETVAL_OK) {
retval = PQOS_RETVAL_ERROR;
goto pqos_core_poll__exit;
}
total += tmp;
}
pv->mbm_total = total;
pv->mbm_total_delta = get_delta(old_value, pv->mbm_total);
pv->mbm_total_delta = scale_event(PQOS_MON_EVENT_TMEM_BW,
pv->mbm_total_delta);
}
if (p->event & PQOS_MON_EVENT_RMEM_BW) {
pv->mbm_remote = 0;
if (pv->mbm_total > pv->mbm_local)
pv->mbm_remote = pv->mbm_total - pv->mbm_local;
pv->mbm_remote_delta = 0;
if (pv->mbm_total_delta > pv->mbm_local_delta)
pv->mbm_remote_delta =
pv->mbm_total_delta - pv->mbm_local_delta;
}
if (p->event & PQOS_PERF_EVENT_IPC) {
/**
* If multiple cores monitored in one group
* then we have to accumulate the values in the group.
*/
uint64_t unhalted = 0, retired = 0;
unsigned n;
for (n = 0; n < p->num_cores; n++) {
uint64_t tmp = 0;
int ret = msr_read(p->cores[n],
IA32_MSR_INST_RETIRED_ANY, &tmp);
if (ret != MACHINE_RETVAL_OK) {
retval = PQOS_RETVAL_ERROR;
goto pqos_core_poll__exit;
}
retired += tmp;
ret = msr_read(p->cores[n],
IA32_MSR_CPU_UNHALTED_THREAD, &tmp);
if (ret != MACHINE_RETVAL_OK) {
retval = PQOS_RETVAL_ERROR;
goto pqos_core_poll__exit;
}
unhalted += tmp;
}
pv->ipc_unhalted_delta = unhalted - pv->ipc_unhalted;
pv->ipc_retired_delta = retired - pv->ipc_retired;
pv->ipc_unhalted = unhalted;
pv->ipc_retired = retired;
if (pv->ipc_unhalted_delta == 0)
pv->ipc = 0.0;
else
pv->ipc = (double) pv->ipc_retired_delta /
(double) pv->ipc_unhalted_delta;
}
if (p->event & PQOS_PERF_EVENT_LLC_MISS) {
/**
* If multiple cores monitored in one group
* then we have to accumulate the values in the group.
*/
uint64_t missed = 0;
unsigned n;
for (n = 0; n < p->num_cores; n++) {
uint64_t tmp = 0;
int ret = msr_read(p->cores[n],
IA32_MSR_PMC0, &tmp);
if (ret != MACHINE_RETVAL_OK) {
retval = PQOS_RETVAL_ERROR;
goto pqos_core_poll__exit;
}
missed += tmp;
}
pv->llc_misses_delta = missed - pv->llc_misses;
pv->llc_misses = missed;
}
if (!p->valid_mbm_read) {
/* Report zero memory bandwidth with first read */
pv->mbm_remote_delta = 0;
pv->mbm_local_delta = 0;
pv->mbm_total_delta = 0;
p->valid_mbm_read = 1;
}
pqos_core_poll__exit:
return retval;
}
/**
* @brief Sets up IA32 performance counters for IPC and LLC miss ratio events
*
* @param num_cores number of cores in \a cores table
* @param cores table with core id's
* @param event mask of selected monitoring events
*
* @return Operation status
* @retval PQOS_RETVAL_OK on success
*/
static int
ia32_perf_counter_start(const unsigned num_cores,
const unsigned *cores,
const enum pqos_mon_event event)
{
uint64_t global_ctrl_mask = 0;
unsigned i;
ASSERT(cores != NULL && num_cores > 0);
if (!(event & (PQOS_PERF_EVENT_LLC_MISS | PQOS_PERF_EVENT_IPC)))
return PQOS_RETVAL_OK;
if (event & PQOS_PERF_EVENT_IPC)
global_ctrl_mask |= (0x3ULL << 32); /**< fixed counters 0&1 */
if (event & PQOS_PERF_EVENT_LLC_MISS)
global_ctrl_mask |= 0x1ULL; /**< programmable counter 0 */
/**
* Fixed counters are used for IPC calculations.
* Programmable counters are used for LLC miss calculations.
* Let's check if they are in use.
*/
for (i = 0; i < num_cores; i++) {
uint64_t global_inuse = 0;
int ret;
ret = msr_read(cores[i], IA32_MSR_PERF_GLOBAL_CTRL,
&global_inuse);
if (ret != MACHINE_RETVAL_OK)
return PQOS_RETVAL_ERROR;
if (global_inuse & global_ctrl_mask)
LOG_WARN("Hijacking performance counters on core %u\n",
cores[i]);
}
/**
* - Disable counters in global control and
* reset counter values to 0.
* - Program counters for desired events
* - Enable counters in global control
*/
for (i = 0; i < num_cores; i++) {
const uint64_t fixed_ctrl = 0x33ULL; /**< track usr + os */
int ret;
ret = msr_write(cores[i], IA32_MSR_PERF_GLOBAL_CTRL, 0);
if (ret != MACHINE_RETVAL_OK)
break;
if (event & PQOS_PERF_EVENT_IPC) {
ret = msr_write(cores[i], IA32_MSR_INST_RETIRED_ANY, 0);
if (ret != MACHINE_RETVAL_OK)
break;
ret = msr_write(cores[i],
IA32_MSR_CPU_UNHALTED_THREAD, 0);
if (ret != MACHINE_RETVAL_OK)
break;
ret = msr_write(cores[i],
IA32_MSR_FIXED_CTR_CTRL, fixed_ctrl);
if (ret != MACHINE_RETVAL_OK)
break;
}
if (event & PQOS_PERF_EVENT_LLC_MISS) {
const uint64_t evtsel0_miss = IA32_EVENT_LLC_MISS_MASK |
(IA32_EVENT_LLC_MISS_UMASK << 8) |
(1ULL << 16) | (1ULL << 17) | (1ULL << 22);
ret = msr_write(cores[i], IA32_MSR_PMC0, 0);
if (ret != MACHINE_RETVAL_OK)
break;
ret = msr_write(cores[i], IA32_MSR_PERFEVTSEL0,
evtsel0_miss);
if (ret != MACHINE_RETVAL_OK)
break;
}
ret = msr_write(cores[i],
IA32_MSR_PERF_GLOBAL_CTRL, global_ctrl_mask);
if (ret != MACHINE_RETVAL_OK)
break;
}
if (i < num_cores)
return PQOS_RETVAL_ERROR;
return PQOS_RETVAL_OK;
}
/**
* @brief Disables IA32 performance counters
*
* @param num_cores number of cores in \a cores table
* @param cores table with core id's
* @param event mask of selected monitoring events
*
* @return Operation status
* @retval PQOS_RETVAL_OK on success
*/
static int
ia32_perf_counter_stop(const unsigned num_cores,
const unsigned *cores,
const enum pqos_mon_event event)
{
int retval = PQOS_RETVAL_OK;
unsigned i;
ASSERT(cores != NULL && num_cores > 0);
if (!(event & (PQOS_PERF_EVENT_LLC_MISS | PQOS_PERF_EVENT_IPC)))
return retval;
for (i = 0; i < num_cores; i++) {
int ret = msr_write(cores[i], IA32_MSR_PERF_GLOBAL_CTRL, 0);
if (ret != MACHINE_RETVAL_OK)
retval = PQOS_RETVAL_ERROR;
}
return retval;
}
int
hw_mon_start(const unsigned num_cores,
const unsigned *cores,
const enum pqos_mon_event event,
void *context,
struct pqos_mon_data *group)
{
unsigned core2cluster[num_cores];
struct pqos_mon_poll_ctx ctxs[num_cores];
unsigned num_ctxs = 0;
unsigned i = 0;
int ret = PQOS_RETVAL_OK;
int retval = PQOS_RETVAL_OK;
ASSERT(group != NULL);
ASSERT(cores != NULL);
ASSERT(num_cores > 0);
ASSERT(event > 0);
ASSERT(m_cpu != NULL);
memset(ctxs, 0, sizeof(ctxs));
/**
* Validate if event is listed in capabilities
*/
for (i = 0; i < (sizeof(event) * 8); i++) {
const enum pqos_mon_event evt_mask =
(enum pqos_mon_event)(1 << i);
const struct pqos_monitor *ptr = NULL;
if (!(evt_mask & event))
continue;
ret = pqos_cap_get_event(m_cap, evt_mask, &ptr);
if (ret != PQOS_RETVAL_OK || ptr == NULL)
return PQOS_RETVAL_PARAM;
}
/**
* Check if all requested cores are valid
* and not used by other monitoring processes.
*
* Check if any of requested cores is already subject to monitoring
* within this process.
*
* Initialize poll context table:
* - get core cluster
* - allocate RMID
*/
for (i = 0; i < num_cores; i++) {
const unsigned lcore = cores[i];
unsigned j, cluster = 0;
pqos_rmid_t rmid = RMID0;
ret = pqos_cpu_check_core(m_cpu, lcore);
if (ret != PQOS_RETVAL_OK) {
retval = PQOS_RETVAL_PARAM;
goto pqos_mon_start_error1;
}
ret = mon_assoc_get(lcore, &rmid);
if (ret != PQOS_RETVAL_OK) {
retval = PQOS_RETVAL_PARAM;
goto pqos_mon_start_error1;
}
if (rmid != RMID0) {
/* If not RMID0 then it is already monitored */
LOG_INFO("Core %u is already monitored with "
"RMID%u.\n", lcore, rmid);
retval = PQOS_RETVAL_RESOURCE;
goto pqos_mon_start_error1;
}
ret = pqos_cpu_get_clusterid(m_cpu, lcore, &cluster);
if (ret != PQOS_RETVAL_OK) {
retval = PQOS_RETVAL_PARAM;
goto pqos_mon_start_error1;
}
core2cluster[i] = cluster;
for (j = 0; j < num_ctxs; j++)
if (ctxs[j].lcore == lcore ||
ctxs[j].cluster == cluster)
break;
if (j >= num_ctxs) {
/**
* New cluster is found
* - save cluster id in the table
* - allocate RMID for the cluster
*/
ctxs[num_ctxs].lcore = lcore;
ctxs[num_ctxs].cluster = cluster;
ret = rmid_alloc(cluster,
(enum pqos_mon_event)(event &
(~(PQOS_PERF_EVENT_IPC |
PQOS_PERF_EVENT_LLC_MISS))),
&ctxs[num_ctxs].rmid);
if (ret != PQOS_RETVAL_OK) {
retval = ret;
goto pqos_mon_start_error1;
}
num_ctxs++;
}
}
/**
* Fill in the monitoring group structure
*/
memset(group, 0, sizeof(*group));
group->cores = (unsigned *) malloc(sizeof(group->cores[0]) * num_cores);
if (group->cores == NULL) {
retval = PQOS_RETVAL_RESOURCE;
goto pqos_mon_start_error1;
}
group->poll_ctx = (struct pqos_mon_poll_ctx *)
malloc(sizeof(group->poll_ctx[0]) * num_ctxs);
if (group->poll_ctx == NULL) {
retval = PQOS_RETVAL_RESOURCE;
goto pqos_mon_start_error2;
}
ret = ia32_perf_counter_start(num_cores, cores, event);
if (ret != PQOS_RETVAL_OK) {
retval = ret;
goto pqos_mon_start_error2;
}
/**
* Associate requested cores with
* the allocated RMID
*/
group->num_cores = num_cores;
for (i = 0; i < num_cores; i++) {
unsigned cluster, j;
pqos_rmid_t rmid;
cluster = core2cluster[i];
for (j = 0; j < num_ctxs; j++)
if (ctxs[j].cluster == cluster)
break;
if (j >= num_ctxs) {
retval = PQOS_RETVAL_ERROR;
goto pqos_mon_start_error2;
}
rmid = ctxs[j].rmid;
group->cores[i] = cores[i];
ret = mon_assoc_set(cores[i], rmid);
if (ret != PQOS_RETVAL_OK) {
retval = ret;
goto pqos_mon_start_error2;
}
}
group->num_poll_ctx = num_ctxs;
for (i = 0; i < num_ctxs; i++)
group->poll_ctx[i] = ctxs[i];
group->event = event;
group->context = context;
pqos_mon_start_error2:
if (retval != PQOS_RETVAL_OK) {
for (i = 0; i < num_cores; i++)
(void) mon_assoc_set(cores[i], RMID0);
if (group->poll_ctx != NULL)
free(group->poll_ctx);
if (group->cores != NULL)
free(group->cores);
}
pqos_mon_start_error1:
return retval;
}
int
hw_mon_stop(struct pqos_mon_data *group)
{
int ret = PQOS_RETVAL_OK;
int retval = PQOS_RETVAL_OK;
unsigned i = 0;
ASSERT(group != NULL);
if (group->num_cores == 0 || group->cores == NULL ||
group->num_poll_ctx == 0 || group->poll_ctx == NULL) {
return PQOS_RETVAL_PARAM;
}
ASSERT(m_cpu != NULL);
for (i = 0; i < group->num_poll_ctx; i++) {
/**
* Validate core list in the group structure is correct
*/
const unsigned lcore = group->poll_ctx[i].lcore;
pqos_rmid_t rmid = RMID0;
ret = pqos_cpu_check_core(m_cpu, lcore);
if (ret != PQOS_RETVAL_OK)
return PQOS_RETVAL_PARAM;
ret = mon_assoc_get(lcore, &rmid);
if (ret != PQOS_RETVAL_OK)
return PQOS_RETVAL_PARAM;
if (rmid != group->poll_ctx[i].rmid)
LOG_WARN("Core %u RMID association changed from %u "
"to %u! The core has been hijacked!\n",
lcore, group->poll_ctx[i].rmid, rmid);
}
for (i = 0; i < group->num_cores; i++) {
/**
* Associate cores from the group back with RMID0
*/
ret = mon_assoc_set(group->cores[i], RMID0);
if (ret != PQOS_RETVAL_OK)
retval = PQOS_RETVAL_RESOURCE;
}
/**
* Stop IA32 performance counters
*/
ret = ia32_perf_counter_stop(group->num_cores, group->cores,
group->event);
if (ret != PQOS_RETVAL_OK)
retval = PQOS_RETVAL_RESOURCE;
/**
* Free poll contexts, core list and clear the group structure
*/
free(group->cores);
free(group->poll_ctx);
memset(group, 0, sizeof(*group));
return retval;
}
int
hw_mon_poll(struct pqos_mon_data **groups,
const unsigned num_groups)
{
int ret = PQOS_RETVAL_OK;
unsigned i = 0;
ASSERT(groups != NULL);
ASSERT(num_groups > 0);
for (i = 0; i < num_groups; i++) {
ret = pqos_core_poll(groups[i]);
if (ret != PQOS_RETVAL_OK)
LOG_WARN("Failed to read event on "
"core %u\n", groups[i]->cores[0]);
}
return PQOS_RETVAL_OK;
}
/*
* =======================================
* =======================================
*
* Small utils
*
* =======================================
* =======================================
*/
/**
* @brief Maps PQoS API event onto an MSR event id
*
* @param [in] event PQoS API event id
*
* @return MSR event id
* @retval 0 if not successful
*/
static unsigned
get_event_id(const enum pqos_mon_event event)
{
switch (event) {
case PQOS_MON_EVENT_L3_OCCUP:
return 1;
break;
case PQOS_MON_EVENT_LMEM_BW:
return 3;
break;
case PQOS_MON_EVENT_TMEM_BW:
return 2;
break;
case PQOS_MON_EVENT_RMEM_BW:
default:
ASSERT(0); /**< this means bug */
break;
}
return 0;
}
/**
* @brief Gives the difference between two values with regard to the possible
* overrun
*
* @param old_value previous value
* @param new_value current value
* @return difference between the two values
*/
static uint64_t
get_delta(const uint64_t old_value, const uint64_t new_value)
{
if (old_value > new_value)
return (MBM_MAX_VALUE - old_value) + new_value;
else
return new_value - old_value;
}