/* * BSD LICENSE * * Copyright(c) 2014-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** * @brief Implementation of PQoS monitoring API. * * CPUID and MSR operations are done on 'local' system. * */ #include #include #include #include #include "pqos.h" #include "cap.h" #include "monitoring.h" #include "os_monitoring.h" #include "machine.h" #include "types.h" #include "log.h" /** * --------------------------------------- * Local macros * --------------------------------------- */ /** * Allocation & Monitoring association MSR register * - bits [63..32] QE COS * - bits [31..10] Reserved * - bits [9..0] RMID */ #define PQOS_MSR_ASSOC 0xC8F #define PQOS_MSR_ASSOC_QECOS_SHIFT 32 #define PQOS_MSR_ASSOC_QECOS_MASK 0xffffffff00000000ULL #define PQOS_MSR_ASSOC_RMID_MASK ((1ULL << 10) - 1ULL) /** * Monitoring data read MSR register */ #define PQOS_MSR_MON_QMC 0xC8E #define PQOS_MSR_MON_QMC_DATA_MASK ((1ULL << 62) - 1ULL) #define PQOS_MSR_MON_QMC_ERROR (1ULL << 63) #define PQOS_MSR_MON_QMC_UNAVAILABLE (1ULL << 62) /** * Monitoring event selection MSR register * - bits [63..42] Reserved * - bits [41..32] RMID * - bits [31..8] Reserved * - bits [7..0] Event ID */ #define PQOS_MSR_MON_EVTSEL 0xC8D #define PQOS_MSR_MON_EVTSEL_RMID_SHIFT 32 #define PQOS_MSR_MON_EVTSEL_RMID_MASK ((1ULL << 10) - 1ULL) #define PQOS_MSR_MON_EVTSEL_EVTID_MASK ((1ULL << 8) - 1ULL) /** * Allocation class of service (COS) MSR registers */ #define PQOS_MSR_L3CA_MASK_START 0xC90 #define PQOS_MSR_L3CA_MASK_END 0xD8F #define PQOS_MSR_L3CA_MASK_NUMOF \ (PQOS_MSR_L3CA_MASK_END - PQOS_MSR_L3CA_MASK_START + 1) /** * MSR's to read instructions retired, unhalted cycles, * LLC references and LLC misses. * These MSR's are needed to calculate IPC (instructions per clock) and * LLC miss ratio. */ #define IA32_MSR_INST_RETIRED_ANY 0x309 #define IA32_MSR_CPU_UNHALTED_THREAD 0x30A #define IA32_MSR_FIXED_CTR_CTRL 0x38D #define IA32_MSR_PERF_GLOBAL_CTRL 0x38F #define IA32_MSR_PMC0 0x0C1 #define IA32_MSR_PERFEVTSEL0 0x186 #define IA32_EVENT_LLC_MISS_MASK 0x2EULL #define IA32_EVENT_LLC_MISS_UMASK 0x41ULL /** * Special RMID - after reset all cores are associated with it. * * The assumption is that if core is not assigned to it * then it is subject of monitoring activity by a different process. */ #define RMID0 (0) /** * Max value of the memory bandwidth data = 2^24 * assuming there is 24 bit space available */ #define MBM_MAX_VALUE (1 << 24) /** * --------------------------------------- * Local data types * --------------------------------------- */ /** * --------------------------------------- * Local data structures * --------------------------------------- */ static const struct pqos_cap *m_cap = NULL; /**< capabilities structure passed from cap */ static const struct pqos_cpuinfo *m_cpu = NULL; /**< cpu topology passed from cap */ static unsigned m_rmid_max = 0; /**< max RMID */ #ifdef __linux__ static int m_interface = PQOS_INTER_MSR; #endif /** * --------------------------------------- * Local Functions * --------------------------------------- */ static int mon_assoc_set(const unsigned lcore, const pqos_rmid_t rmid); static int mon_assoc_get(const unsigned lcore, pqos_rmid_t *rmid); static int mon_read(const unsigned lcore, const pqos_rmid_t rmid, const enum pqos_mon_event event, uint64_t *value); static int pqos_core_poll(struct pqos_mon_data *group); static int rmid_alloc(const unsigned cluster, const enum pqos_mon_event event, pqos_rmid_t *rmid); static unsigned get_event_id(const enum pqos_mon_event event); static uint64_t get_delta(const uint64_t old_value, const uint64_t new_value); static uint64_t scale_event(const enum pqos_mon_event event, const uint64_t val); /* * ======================================= * ======================================= * * initialize and shutdown * * ======================================= * ======================================= */ int pqos_mon_init(const struct pqos_cpuinfo *cpu, const struct pqos_cap *cap, const struct pqos_config *cfg) { const struct pqos_capability *item = NULL; int ret; ASSERT(cfg != NULL); /** * If monitoring capability has been discovered * then get max RMID supported by a CPU socket * and allocate memory for RMID table */ ret = pqos_cap_get_type(cap, PQOS_CAP_TYPE_MON, &item); if (ret != PQOS_RETVAL_OK) { ret = PQOS_RETVAL_RESOURCE; goto pqos_mon_init_exit; } ASSERT(item != NULL); m_rmid_max = item->u.mon->max_rmid; if (m_rmid_max == 0) { pqos_mon_fini(); return PQOS_RETVAL_PARAM; } LOG_DEBUG("Max RMID per monitoring cluster is %u\n", m_rmid_max); #ifdef __linux__ if (cfg->interface == PQOS_INTER_OS || cfg->interface == PQOS_INTER_OS_RESCTRL_MON) ret = os_mon_init(cpu, cap); if (ret != PQOS_RETVAL_OK) return ret; #endif pqos_mon_init_exit: m_cpu = cpu; m_cap = cap; #ifdef __linux__ m_interface = cfg->interface; #else UNUSED_PARAM(cfg); #endif return ret; } int pqos_mon_fini(void) { int ret = PQOS_RETVAL_OK; m_rmid_max = 0; #ifdef __linux__ if (m_interface == PQOS_INTER_OS || m_interface == PQOS_INTER_OS_RESCTRL_MON) ret = os_mon_fini(); #endif m_cpu = NULL; m_cap = NULL; return ret; } /* * ======================================= * ======================================= * * RMID allocation * * ======================================= * ======================================= */ /** * @brief Allocates RMID for given \a event * * @param [in] cluster CPU cluster id * @param [in] event Monitoring event type * @param [out] rmid resource monitoring id * * @return Operations status */ static int rmid_alloc(const unsigned cluster, const enum pqos_mon_event event, pqos_rmid_t *rmid) { const struct pqos_capability *item = NULL; const struct pqos_cap_mon *mon = NULL; int ret = PQOS_RETVAL_OK; unsigned max_rmid = 0; unsigned mask_found = 0; unsigned i, core_count; unsigned *core_list = NULL; pqos_rmid_t *rmid_list = NULL; if (rmid == NULL) return PQOS_RETVAL_PARAM; /** * This is not so straight forward as it appears to be. * We first have to figure out max RMID * for given event type. In order to do so we need: * - go through capabilities structure * - find monitoring capability * - look for the \a event in the event list * - find max RMID matching the \a event */ ASSERT(m_cap != NULL); ret = pqos_cap_get_type(m_cap, PQOS_CAP_TYPE_MON, &item); if (ret != PQOS_RETVAL_OK) return ret; ASSERT(item != NULL); mon = item->u.mon; /* Find which events are supported vs requested */ max_rmid = m_rmid_max; for (i = 0; i < mon->num_events; i++) if (event & mon->events[i].type) { mask_found |= mon->events[i].type; max_rmid = (max_rmid > mon->events[i].max_rmid) ? mon->events[i].max_rmid : max_rmid; } /** * Check if all of the events are supported */ if (event != mask_found || max_rmid == 0) return PQOS_RETVAL_ERROR; ASSERT(m_rmid_max >= max_rmid); /** * Check for free RMID in the cluster by reading current associations. * Do it backwards (from max to 0) in order to preserve low RMID values * for overlapping RMID ranges for future events. */ core_list = pqos_cpu_get_cores_l3id(m_cpu, cluster, &core_count); if (core_list == NULL) return PQOS_RETVAL_ERROR; ASSERT(core_count > 0); rmid_list = (pqos_rmid_t *)malloc(sizeof(rmid_list[0]) * core_count); if (rmid_list == NULL) { ret = PQOS_RETVAL_RESOURCE; goto rmid_alloc_error; } for (i = 0; i < core_count; i++) { ret = mon_assoc_get(core_list[i], &rmid_list[i]); if (ret != PQOS_RETVAL_OK) goto rmid_alloc_error; } ret = PQOS_RETVAL_ERROR; for (i = max_rmid; i > 0; i--) { const unsigned tmp_rmid = i - 1; unsigned j = 0; for (j = 0; j < core_count; j++) if (tmp_rmid == rmid_list[j]) break; if (j >= core_count) { ret = PQOS_RETVAL_OK; *rmid = tmp_rmid; break; } } rmid_alloc_error: if (rmid_list != NULL) free(rmid_list); if (core_list != NULL) free(core_list); return ret; } /* * ======================================= * ======================================= * * Monitoring * * ======================================= * ======================================= */ /** * @brief Scale event values to bytes * * Retrieve event scale factor and scale value to bytes * * @param event event scale factor to retrieve * @param val value to be scaled * * @return scaled value * @retval value in bytes */ static uint64_t scale_event(const enum pqos_mon_event event, const uint64_t val) { const struct pqos_monitor *pmon; int ret; ret = pqos_cap_get_event(m_cap, event, &pmon); ASSERT(ret == PQOS_RETVAL_OK); if (ret != PQOS_RETVAL_OK) return val; else return val * pmon->scale_factor; } /** * @brief Associates core with RMID at register level * * This function doesn't acquire API lock * and can be used internally when lock is already taken. * * @param lcore logical core id * @param rmid resource monitoring ID * * @return Operation status * @retval PQOS_RETVAL_OK on success */ static int mon_assoc_set(const unsigned lcore, const pqos_rmid_t rmid) { int ret = 0; uint32_t reg = 0; uint64_t val = 0; reg = PQOS_MSR_ASSOC; ret = msr_read(lcore, reg, &val); if (ret != MACHINE_RETVAL_OK) return PQOS_RETVAL_ERROR; val &= PQOS_MSR_ASSOC_QECOS_MASK; val |= (uint64_t)(rmid & PQOS_MSR_ASSOC_RMID_MASK); ret = msr_write(lcore, reg, val); if (ret != MACHINE_RETVAL_OK) return PQOS_RETVAL_ERROR; return PQOS_RETVAL_OK; } /** * @brief Reads \a lcore to RMID association * * @param lcore logical core id * @param rmid place to store RMID \a lcore is assigned to * * @return Operation status * @retval PQOS_RETVAL_OK success * @retval PQOS_RETVAL_ERROR on error */ static int mon_assoc_get(const unsigned lcore, pqos_rmid_t *rmid) { int ret = 0; uint32_t reg = PQOS_MSR_ASSOC; uint64_t val = 0; ASSERT(rmid != NULL); ret = msr_read(lcore, reg, &val); if (ret != MACHINE_RETVAL_OK) return PQOS_RETVAL_ERROR; val &= PQOS_MSR_ASSOC_RMID_MASK; *rmid = (pqos_rmid_t) val; return PQOS_RETVAL_OK; } int hw_mon_assoc_get(const unsigned lcore, pqos_rmid_t *rmid) { int ret = PQOS_RETVAL_OK; ret = _pqos_check_init(1); if (ret != PQOS_RETVAL_OK) goto pqos_mon_assoc_get__error; if (rmid == NULL) { ret = PQOS_RETVAL_PARAM; goto pqos_mon_assoc_get__error; } ASSERT(m_cpu != NULL); ret = pqos_cpu_check_core(m_cpu, lcore); if (ret != PQOS_RETVAL_OK) { ret = PQOS_RETVAL_PARAM; goto pqos_mon_assoc_get__error; } ret = mon_assoc_get(lcore, rmid); pqos_mon_assoc_get__error: return ret; } int hw_mon_reset(void) { int ret = PQOS_RETVAL_OK; unsigned i; ret = _pqos_check_init(1); if (ret != PQOS_RETVAL_OK) goto pqos_mon_reset_error; ASSERT(m_cpu != NULL); for (i = 0; i < m_cpu->num_cores; i++) { int retval = mon_assoc_set(m_cpu->cores[i].lcore, RMID0); if (retval != PQOS_RETVAL_OK) ret = retval; } pqos_mon_reset_error: return ret; } /** * @brief Reads monitoring event data from given core * * This function doesn't acquire API lock. * * @param lcore logical core id * @param rmid RMID to be read * @param event monitoring event * @param value place to store read value * * @return Operation status * @retval PQOS_RETVAL_OK on success */ static int mon_read(const unsigned lcore, const pqos_rmid_t rmid, const unsigned event, uint64_t *value) { int retries = 3, retval = PQOS_RETVAL_OK; uint32_t reg = 0; uint64_t val = 0; /** * Set event selection register (RMID + event id) */ reg = PQOS_MSR_MON_EVTSEL; val = ((uint64_t)rmid) & PQOS_MSR_MON_EVTSEL_RMID_MASK; val <<= PQOS_MSR_MON_EVTSEL_RMID_SHIFT; val |= ((uint64_t)event) & PQOS_MSR_MON_EVTSEL_EVTID_MASK; if (msr_write(lcore, reg, val) != MACHINE_RETVAL_OK) return PQOS_RETVAL_ERROR; /** * read selected data associated with previously selected RMID+event */ reg = PQOS_MSR_MON_QMC; do { if (msr_read(lcore, reg, &val) != MACHINE_RETVAL_OK) { retval = PQOS_RETVAL_ERROR; break; } if ((val&(PQOS_MSR_MON_QMC_ERROR)) != 0ULL) { /** * Unsupported event id or RMID selected */ retval = PQOS_RETVAL_ERROR; break; } retries--; } while ((val&PQOS_MSR_MON_QMC_UNAVAILABLE) != 0ULL && retries > 0); /** * Store event value */ if (retval == PQOS_RETVAL_OK) *value = (val & PQOS_MSR_MON_QMC_DATA_MASK); else LOG_WARN("Error reading event %u on core %u (RMID%u)!\n", event, lcore, (unsigned) rmid); return retval; } /** * @brief Reads monitoring event data from given core * * @param p pointer to monitoring structure * * @return Operation status * @retval PQOS_RETVAL_OK on success */ static int pqos_core_poll(struct pqos_mon_data *p) { struct pqos_event_values *pv = &p->values; int retval = PQOS_RETVAL_OK; unsigned i; if (p->event & PQOS_MON_EVENT_L3_OCCUP) { uint64_t total = 0; for (i = 0; i < p->num_poll_ctx; i++) { uint64_t tmp = 0; int ret; ret = mon_read(p->poll_ctx[i].lcore, p->poll_ctx[i].rmid, get_event_id(PQOS_MON_EVENT_L3_OCCUP), &tmp); if (ret != PQOS_RETVAL_OK) { retval = PQOS_RETVAL_ERROR; goto pqos_core_poll__exit; } total += tmp; } pv->llc = scale_event(PQOS_MON_EVENT_L3_OCCUP, total); } if (p->event & (PQOS_MON_EVENT_LMEM_BW | PQOS_MON_EVENT_RMEM_BW)) { uint64_t total = 0, old_value = pv->mbm_local; for (i = 0; i < p->num_poll_ctx; i++) { uint64_t tmp = 0; int ret; ret = mon_read(p->poll_ctx[i].lcore, p->poll_ctx[i].rmid, get_event_id(PQOS_MON_EVENT_LMEM_BW), &tmp); if (ret != PQOS_RETVAL_OK) { retval = PQOS_RETVAL_ERROR; goto pqos_core_poll__exit; } total += tmp; } pv->mbm_local = total; pv->mbm_local_delta = get_delta(old_value, pv->mbm_local); pv->mbm_local_delta = scale_event(PQOS_MON_EVENT_LMEM_BW, pv->mbm_local_delta); } if (p->event & (PQOS_MON_EVENT_TMEM_BW | PQOS_MON_EVENT_RMEM_BW)) { uint64_t total = 0, old_value = pv->mbm_total; for (i = 0; i < p->num_poll_ctx; i++) { uint64_t tmp = 0; int ret; ret = mon_read(p->poll_ctx[i].lcore, p->poll_ctx[i].rmid, get_event_id(PQOS_MON_EVENT_TMEM_BW), &tmp); if (ret != PQOS_RETVAL_OK) { retval = PQOS_RETVAL_ERROR; goto pqos_core_poll__exit; } total += tmp; } pv->mbm_total = total; pv->mbm_total_delta = get_delta(old_value, pv->mbm_total); pv->mbm_total_delta = scale_event(PQOS_MON_EVENT_TMEM_BW, pv->mbm_total_delta); } if (p->event & PQOS_MON_EVENT_RMEM_BW) { pv->mbm_remote = 0; if (pv->mbm_total > pv->mbm_local) pv->mbm_remote = pv->mbm_total - pv->mbm_local; pv->mbm_remote_delta = 0; if (pv->mbm_total_delta > pv->mbm_local_delta) pv->mbm_remote_delta = pv->mbm_total_delta - pv->mbm_local_delta; } if (p->event & PQOS_PERF_EVENT_IPC) { /** * If multiple cores monitored in one group * then we have to accumulate the values in the group. */ uint64_t unhalted = 0, retired = 0; unsigned n; for (n = 0; n < p->num_cores; n++) { uint64_t tmp = 0; int ret = msr_read(p->cores[n], IA32_MSR_INST_RETIRED_ANY, &tmp); if (ret != MACHINE_RETVAL_OK) { retval = PQOS_RETVAL_ERROR; goto pqos_core_poll__exit; } retired += tmp; ret = msr_read(p->cores[n], IA32_MSR_CPU_UNHALTED_THREAD, &tmp); if (ret != MACHINE_RETVAL_OK) { retval = PQOS_RETVAL_ERROR; goto pqos_core_poll__exit; } unhalted += tmp; } pv->ipc_unhalted_delta = unhalted - pv->ipc_unhalted; pv->ipc_retired_delta = retired - pv->ipc_retired; pv->ipc_unhalted = unhalted; pv->ipc_retired = retired; if (pv->ipc_unhalted_delta == 0) pv->ipc = 0.0; else pv->ipc = (double) pv->ipc_retired_delta / (double) pv->ipc_unhalted_delta; } if (p->event & PQOS_PERF_EVENT_LLC_MISS) { /** * If multiple cores monitored in one group * then we have to accumulate the values in the group. */ uint64_t missed = 0; unsigned n; for (n = 0; n < p->num_cores; n++) { uint64_t tmp = 0; int ret = msr_read(p->cores[n], IA32_MSR_PMC0, &tmp); if (ret != MACHINE_RETVAL_OK) { retval = PQOS_RETVAL_ERROR; goto pqos_core_poll__exit; } missed += tmp; } pv->llc_misses_delta = missed - pv->llc_misses; pv->llc_misses = missed; } if (!p->valid_mbm_read) { /* Report zero memory bandwidth with first read */ pv->mbm_remote_delta = 0; pv->mbm_local_delta = 0; pv->mbm_total_delta = 0; p->valid_mbm_read = 1; } pqos_core_poll__exit: return retval; } /** * @brief Sets up IA32 performance counters for IPC and LLC miss ratio events * * @param num_cores number of cores in \a cores table * @param cores table with core id's * @param event mask of selected monitoring events * * @return Operation status * @retval PQOS_RETVAL_OK on success */ static int ia32_perf_counter_start(const unsigned num_cores, const unsigned *cores, const enum pqos_mon_event event) { uint64_t global_ctrl_mask = 0; unsigned i; ASSERT(cores != NULL && num_cores > 0); if (!(event & (PQOS_PERF_EVENT_LLC_MISS | PQOS_PERF_EVENT_IPC))) return PQOS_RETVAL_OK; if (event & PQOS_PERF_EVENT_IPC) global_ctrl_mask |= (0x3ULL << 32); /**< fixed counters 0&1 */ if (event & PQOS_PERF_EVENT_LLC_MISS) global_ctrl_mask |= 0x1ULL; /**< programmable counter 0 */ /** * Fixed counters are used for IPC calculations. * Programmable counters are used for LLC miss calculations. * Let's check if they are in use. */ for (i = 0; i < num_cores; i++) { uint64_t global_inuse = 0; int ret; ret = msr_read(cores[i], IA32_MSR_PERF_GLOBAL_CTRL, &global_inuse); if (ret != MACHINE_RETVAL_OK) return PQOS_RETVAL_ERROR; if (global_inuse & global_ctrl_mask) LOG_WARN("Hijacking performance counters on core %u\n", cores[i]); } /** * - Disable counters in global control and * reset counter values to 0. * - Program counters for desired events * - Enable counters in global control */ for (i = 0; i < num_cores; i++) { const uint64_t fixed_ctrl = 0x33ULL; /**< track usr + os */ int ret; ret = msr_write(cores[i], IA32_MSR_PERF_GLOBAL_CTRL, 0); if (ret != MACHINE_RETVAL_OK) break; if (event & PQOS_PERF_EVENT_IPC) { ret = msr_write(cores[i], IA32_MSR_INST_RETIRED_ANY, 0); if (ret != MACHINE_RETVAL_OK) break; ret = msr_write(cores[i], IA32_MSR_CPU_UNHALTED_THREAD, 0); if (ret != MACHINE_RETVAL_OK) break; ret = msr_write(cores[i], IA32_MSR_FIXED_CTR_CTRL, fixed_ctrl); if (ret != MACHINE_RETVAL_OK) break; } if (event & PQOS_PERF_EVENT_LLC_MISS) { const uint64_t evtsel0_miss = IA32_EVENT_LLC_MISS_MASK | (IA32_EVENT_LLC_MISS_UMASK << 8) | (1ULL << 16) | (1ULL << 17) | (1ULL << 22); ret = msr_write(cores[i], IA32_MSR_PMC0, 0); if (ret != MACHINE_RETVAL_OK) break; ret = msr_write(cores[i], IA32_MSR_PERFEVTSEL0, evtsel0_miss); if (ret != MACHINE_RETVAL_OK) break; } ret = msr_write(cores[i], IA32_MSR_PERF_GLOBAL_CTRL, global_ctrl_mask); if (ret != MACHINE_RETVAL_OK) break; } if (i < num_cores) return PQOS_RETVAL_ERROR; return PQOS_RETVAL_OK; } /** * @brief Disables IA32 performance counters * * @param num_cores number of cores in \a cores table * @param cores table with core id's * @param event mask of selected monitoring events * * @return Operation status * @retval PQOS_RETVAL_OK on success */ static int ia32_perf_counter_stop(const unsigned num_cores, const unsigned *cores, const enum pqos_mon_event event) { int retval = PQOS_RETVAL_OK; unsigned i; ASSERT(cores != NULL && num_cores > 0); if (!(event & (PQOS_PERF_EVENT_LLC_MISS | PQOS_PERF_EVENT_IPC))) return retval; for (i = 0; i < num_cores; i++) { int ret = msr_write(cores[i], IA32_MSR_PERF_GLOBAL_CTRL, 0); if (ret != MACHINE_RETVAL_OK) retval = PQOS_RETVAL_ERROR; } return retval; } int hw_mon_start(const unsigned num_cores, const unsigned *cores, const enum pqos_mon_event event, void *context, struct pqos_mon_data *group) { unsigned core2cluster[num_cores]; struct pqos_mon_poll_ctx ctxs[num_cores]; unsigned num_ctxs = 0; unsigned i = 0; int ret = PQOS_RETVAL_OK; int retval = PQOS_RETVAL_OK; ASSERT(group != NULL); ASSERT(cores != NULL); ASSERT(num_cores > 0); ASSERT(event > 0); ASSERT(m_cpu != NULL); memset(ctxs, 0, sizeof(ctxs)); /** * Validate if event is listed in capabilities */ for (i = 0; i < (sizeof(event) * 8); i++) { const enum pqos_mon_event evt_mask = (enum pqos_mon_event)(1 << i); const struct pqos_monitor *ptr = NULL; if (!(evt_mask & event)) continue; ret = pqos_cap_get_event(m_cap, evt_mask, &ptr); if (ret != PQOS_RETVAL_OK || ptr == NULL) return PQOS_RETVAL_PARAM; } /** * Check if all requested cores are valid * and not used by other monitoring processes. * * Check if any of requested cores is already subject to monitoring * within this process. * * Initialize poll context table: * - get core cluster * - allocate RMID */ for (i = 0; i < num_cores; i++) { const unsigned lcore = cores[i]; unsigned j, cluster = 0; pqos_rmid_t rmid = RMID0; ret = pqos_cpu_check_core(m_cpu, lcore); if (ret != PQOS_RETVAL_OK) { retval = PQOS_RETVAL_PARAM; goto pqos_mon_start_error1; } ret = mon_assoc_get(lcore, &rmid); if (ret != PQOS_RETVAL_OK) { retval = PQOS_RETVAL_PARAM; goto pqos_mon_start_error1; } if (rmid != RMID0) { /* If not RMID0 then it is already monitored */ LOG_INFO("Core %u is already monitored with " "RMID%u.\n", lcore, rmid); retval = PQOS_RETVAL_RESOURCE; goto pqos_mon_start_error1; } ret = pqos_cpu_get_clusterid(m_cpu, lcore, &cluster); if (ret != PQOS_RETVAL_OK) { retval = PQOS_RETVAL_PARAM; goto pqos_mon_start_error1; } core2cluster[i] = cluster; for (j = 0; j < num_ctxs; j++) if (ctxs[j].lcore == lcore || ctxs[j].cluster == cluster) break; if (j >= num_ctxs) { /** * New cluster is found * - save cluster id in the table * - allocate RMID for the cluster */ ctxs[num_ctxs].lcore = lcore; ctxs[num_ctxs].cluster = cluster; ret = rmid_alloc(cluster, (enum pqos_mon_event)(event & (~(PQOS_PERF_EVENT_IPC | PQOS_PERF_EVENT_LLC_MISS))), &ctxs[num_ctxs].rmid); if (ret != PQOS_RETVAL_OK) { retval = ret; goto pqos_mon_start_error1; } num_ctxs++; } } /** * Fill in the monitoring group structure */ memset(group, 0, sizeof(*group)); group->cores = (unsigned *) malloc(sizeof(group->cores[0]) * num_cores); if (group->cores == NULL) { retval = PQOS_RETVAL_RESOURCE; goto pqos_mon_start_error1; } group->poll_ctx = (struct pqos_mon_poll_ctx *) malloc(sizeof(group->poll_ctx[0]) * num_ctxs); if (group->poll_ctx == NULL) { retval = PQOS_RETVAL_RESOURCE; goto pqos_mon_start_error2; } ret = ia32_perf_counter_start(num_cores, cores, event); if (ret != PQOS_RETVAL_OK) { retval = ret; goto pqos_mon_start_error2; } /** * Associate requested cores with * the allocated RMID */ group->num_cores = num_cores; for (i = 0; i < num_cores; i++) { unsigned cluster, j; pqos_rmid_t rmid; cluster = core2cluster[i]; for (j = 0; j < num_ctxs; j++) if (ctxs[j].cluster == cluster) break; if (j >= num_ctxs) { retval = PQOS_RETVAL_ERROR; goto pqos_mon_start_error2; } rmid = ctxs[j].rmid; group->cores[i] = cores[i]; ret = mon_assoc_set(cores[i], rmid); if (ret != PQOS_RETVAL_OK) { retval = ret; goto pqos_mon_start_error2; } } group->num_poll_ctx = num_ctxs; for (i = 0; i < num_ctxs; i++) group->poll_ctx[i] = ctxs[i]; group->event = event; group->context = context; pqos_mon_start_error2: if (retval != PQOS_RETVAL_OK) { for (i = 0; i < num_cores; i++) (void) mon_assoc_set(cores[i], RMID0); if (group->poll_ctx != NULL) free(group->poll_ctx); if (group->cores != NULL) free(group->cores); } pqos_mon_start_error1: return retval; } int hw_mon_stop(struct pqos_mon_data *group) { int ret = PQOS_RETVAL_OK; int retval = PQOS_RETVAL_OK; unsigned i = 0; ASSERT(group != NULL); if (group->num_cores == 0 || group->cores == NULL || group->num_poll_ctx == 0 || group->poll_ctx == NULL) { return PQOS_RETVAL_PARAM; } ASSERT(m_cpu != NULL); for (i = 0; i < group->num_poll_ctx; i++) { /** * Validate core list in the group structure is correct */ const unsigned lcore = group->poll_ctx[i].lcore; pqos_rmid_t rmid = RMID0; ret = pqos_cpu_check_core(m_cpu, lcore); if (ret != PQOS_RETVAL_OK) return PQOS_RETVAL_PARAM; ret = mon_assoc_get(lcore, &rmid); if (ret != PQOS_RETVAL_OK) return PQOS_RETVAL_PARAM; if (rmid != group->poll_ctx[i].rmid) LOG_WARN("Core %u RMID association changed from %u " "to %u! The core has been hijacked!\n", lcore, group->poll_ctx[i].rmid, rmid); } for (i = 0; i < group->num_cores; i++) { /** * Associate cores from the group back with RMID0 */ ret = mon_assoc_set(group->cores[i], RMID0); if (ret != PQOS_RETVAL_OK) retval = PQOS_RETVAL_RESOURCE; } /** * Stop IA32 performance counters */ ret = ia32_perf_counter_stop(group->num_cores, group->cores, group->event); if (ret != PQOS_RETVAL_OK) retval = PQOS_RETVAL_RESOURCE; /** * Free poll contexts, core list and clear the group structure */ free(group->cores); free(group->poll_ctx); memset(group, 0, sizeof(*group)); return retval; } int hw_mon_poll(struct pqos_mon_data **groups, const unsigned num_groups) { int ret = PQOS_RETVAL_OK; unsigned i = 0; ASSERT(groups != NULL); ASSERT(num_groups > 0); for (i = 0; i < num_groups; i++) { ret = pqos_core_poll(groups[i]); if (ret != PQOS_RETVAL_OK) LOG_WARN("Failed to read event on " "core %u\n", groups[i]->cores[0]); } return PQOS_RETVAL_OK; } /* * ======================================= * ======================================= * * Small utils * * ======================================= * ======================================= */ /** * @brief Maps PQoS API event onto an MSR event id * * @param [in] event PQoS API event id * * @return MSR event id * @retval 0 if not successful */ static unsigned get_event_id(const enum pqos_mon_event event) { switch (event) { case PQOS_MON_EVENT_L3_OCCUP: return 1; break; case PQOS_MON_EVENT_LMEM_BW: return 3; break; case PQOS_MON_EVENT_TMEM_BW: return 2; break; case PQOS_MON_EVENT_RMEM_BW: default: ASSERT(0); /**< this means bug */ break; } return 0; } /** * @brief Gives the difference between two values with regard to the possible * overrun * * @param old_value previous value * @param new_value current value * @return difference between the two values */ static uint64_t get_delta(const uint64_t old_value, const uint64_t new_value) { if (old_value > new_value) return (MBM_MAX_VALUE - old_value) + new_value; else return new_value - old_value; }