Blob Blame History Raw
/*
 * BSD LICENSE
 *
 * Copyright(c) 2018-2020 Intel Corporation. All rights reserved.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   * Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in
 *     the documentation and/or other materials provided with the
 *     distribution.
 *   * Neither the name of Intel Corporation nor the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "mba_sc.h"
#include "common.h"

#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <errno.h>
#include <string.h>
#include <sys/wait.h>
#include <signal.h>

static const struct pqos_cap *m_cap;
static const struct pqos_cpuinfo *m_cpu;
static const struct pqos_capability *m_cap_mba;
static const struct pqos_capability *m_cap_mon;

struct mba_sc_state {
        struct pqos_mon_data group;
        cpu_set_t cpumask;
        unsigned prev_rate;
        uint64_t prev_time;
        uint64_t max_bw;
        uint64_t prev_bw;
        unsigned delta_comp;
        uint64_t delta_bw;
        uint64_t reg_start_time;
};

static struct mba_sc_state *state = NULL;
static unsigned state_num;
static int supported = 0;

/**
 * @brief Start LMBM monitoring
 *
 * @param[in] cpumask cores to monitor
 * @param[out] group monitoring group pointer
 * @return status
 * @retval 0 on success
 * @retval negative on error (-errno)
 */
static int
mba_sc_mon_start(const cpu_set_t cpumask, struct pqos_mon_data *group)
{
        unsigned *cores = NULL;
        unsigned num_cores = 0;
        int i;
        int ret;

        cores = malloc(CPU_SETSIZE * sizeof(unsigned));
        if (cores == NULL) {
                DBG("MBA SC: memory allocation failed\n");
                return -EFAULT;
        }

        for (i = 0; i < CPU_SETSIZE; i++) {
                if (CPU_ISSET(i, &cpumask) != 1)
                        continue;
                cores[num_cores++] = i;
        }

        ret = pqos_mon_start(num_cores, cores, PQOS_MON_EVENT_LMEM_BW, NULL,
                             group);
        if (ret != PQOS_RETVAL_OK)
                ret = -EFAULT;

        if (cores != NULL)
                free(cores);

        return ret;
}

/**
 * @brief Stop LMBM monitoring
 *
 * @param group monitoring group pointer
 *
 * @return status
 * @retval 0 on success
 * @retval negative on error (-errno)
 */
static int
mba_sc_mon_stop(struct pqos_mon_data *group)
{
        int ret;

        ret = pqos_mon_stop(group);
        if (ret != PQOS_RETVAL_OK)
                return -EFAULT;

        return 0;
}

/**
 * @brief Poll mon values
 *
 * @param group monitoring group
 *
 * @return status
 * @retval 0 on success
 * @retval negative on error (-errno)
 */
static int
mba_sc_mon_poll(struct pqos_mon_data *group)
{
        int ret;

        ret = pqos_mon_poll(&group, 1);
        if (ret != PQOS_RETVAL_OK)
                return -EFAULT;

        return 0;
}

int
mba_sc_init(void)
{
        int ret = 0;
        const struct pqos_monitor *cap_lmbm;

        if (m_cap != NULL || m_cpu != NULL) {
                DBG("MBA SC: module already initialized!\n");
                ret = -EEXIST;
                goto err;
        }

        if (g_cfg.interface != PQOS_INTER_MSR) {
                DBG("MBA SC: Supported only for MSR interface\n");
                ret = -EFAULT;
                goto err;
        }

        /* Get capability and CPU info pointer */
        ret = pqos_cap_get(&m_cap, &m_cpu);
        if (ret != PQOS_RETVAL_OK) {
                DBG("MBA SC: Error retrieving PQoS capabilities!\n");
                ret = -EFAULT;
                goto err;
        }

        /* Get MBA capabilities */
        ret = pqos_cap_get_type(m_cap, PQOS_CAP_TYPE_MBA, &m_cap_mba);
        if (ret != PQOS_RETVAL_OK || !m_cap_mba->u.mba->is_linear) {
                DBG("MBA SC: MBA not supported or not linear.\n");
                ret = -EFAULT;
                goto err;
        }

        /* Get mon capabilities */
        ret = pqos_cap_get_type(m_cap, PQOS_CAP_TYPE_MON, &m_cap_mon);
        if (ret != PQOS_RETVAL_OK) {
                DBG("MBA SC: monitoring not supported.\n");
                ret = -EFAULT;
                goto err;
        }

        /* Check if LMBM monitoring is supported */
        ret = pqos_cap_get_event(m_cap, PQOS_MON_EVENT_LMEM_BW, &cap_lmbm);
        if (ret == PQOS_RETVAL_OK && cap_lmbm != NULL)
                supported = 1;
        else {
                DBG("MBA SC: local BW monitoring not supported.\n");
                ret = -EFAULT;
                goto err;
        }

        supported = 1;

        return 0;
err:
        /* deallocate all the resources */
        mba_sc_fini();
        return ret;
}

void
mba_sc_fini(void)
{
        if (m_cap == NULL && m_cpu == NULL)
                return;

        m_cap = NULL;
        m_cpu = NULL;
        m_cap_mba = NULL;
}

static int
mba_sc_stop(void)
{
        unsigned i;
        int ret = 0;

        if (state == NULL)
                return 0;

        for (i = 0; i < state_num; i++) {
                int retval = mba_sc_mon_stop(&state[i].group);

                if (retval < 0)
                        ret = retval;
        }

        free(state);
        state = NULL;

        return ret;
}

void
mba_sc_exit(void)
{
        mba_sc_stop();
}

/**
 * @brief Check if child process is still running
 *
 * @param[in] pid Child pid
 *
 * @return status
 */
static int
mba_sc_running(const pid_t pid)
{
        int status;
        int ret;

        if (pid != -1) {
                ret = waitpid(pid, &status, WNOHANG);
                if (ret == 0)
                        return 1;
                if (ret == pid &&
                    (!WIFEXITED(status) && WEXITSTATUS(status) != EXIT_SUCCESS))
                        exit(EXIT_FAILURE);

        } else if (!g_cfg.command) {
                unsigned i;

                /* Send sig-null to check if pid is still running */
                for (i = 0; i < g_cfg.pid_count; i++) {
                        ret = kill(g_cfg.pids[i], 0);
                        if (ret == 0)
                                return 1;
                }
        }

        return 0;
}

int
mba_sc_mode(const struct rdtset *cfg)
{
        unsigned i;

        if (cfg->interface != PQOS_INTER_MSR)
                return 0;

        for (i = 0; i < cfg->config_count; i++)
                if (cfg->config[i].mba.ctrl == 1)
                        return 1;

        return 0;
}

/**
 * @brief Sets MBA class of service defined by \a mba_cfg on cores in \a cpumask
 *
 * @param [in] cpumask set of lcores
 * @param [in] mba_cfg mba configuration to be set
 *
 * @return status
 */
static int
mba_sc_mba_set(const cpu_set_t cpumask, struct pqos_mba *mba_cfg)
{
        int ret;
        int lcore;
        unsigned cluster_id = 0;
        unsigned cluster_array[CPU_SETSIZE] = {0};

        for (lcore = 0; lcore < CPU_SETSIZE; lcore++) {
                if (CPU_ISSET(lcore, &cpumask) != 1)
                        continue;

                ret = pqos_cpu_get_clusterid(m_cpu, lcore, &cluster_id);
                if (ret != PQOS_RETVAL_OK) {
                        DBG("MBA SC: error while reading cluster id "
                            "for lcore %d\n",
                            lcore);
                        return -EFAULT;
                }

                if (cluster_array[cluster_id])
                        continue;

                ret = pqos_alloc_assoc_get(lcore, &mba_cfg->class_id);
                if (ret != PQOS_RETVAL_OK) {
                        DBG("MBA SC: error while reading assoc for lcore %d\n",
                            lcore);
                        return -EFAULT;
                }

                ret = pqos_mba_set(cluster_id, 1, mba_cfg, NULL);
                if (ret != PQOS_RETVAL_OK) {
                        DBG("MBA SC: error while setting mba for cluster %u\n",
                            cluster_id);
                        return -EFAULT;
                }

                cluster_array[cluster_id]++;
        }

        return 0;
}

/**
 * @brief Get number of MBA SC instances
 *
 * @param[in] cfg rdtset configuration
 *
 * @return Number of MBA SC instances
 */
static unsigned
mba_sc_count(const struct rdtset *cfg)
{
        unsigned i;
        unsigned count = 0;

        for (i = 0; i < cfg->config_count; i++)
                if (cfg->config[i].mba.ctrl == 1)
                        count++;

        return count;
}

static int
mba_sc_update(struct mba_sc_state *state)
{
        uint64_t cur_time;
        uint64_t delta_time;
        struct pqos_mba mba_cfg;
        uint64_t prev_bw = state->prev_bw;
        uint64_t cur_bw;
        int ret;
        const struct pqos_event_values *pv = &state->group.values;
        const unsigned min_rate = m_cap_mba->u.mba->throttle_step;
        const unsigned step_rate = m_cap_mba->u.mba->throttle_step;
        const unsigned max_rate = 100;

        mba_cfg.ctrl = 0;

        ret = mba_sc_mon_poll(&state->group);
        if (ret != 0)
                return ret;

        cur_time = get_time_usec();
        delta_time = cur_time - state->prev_time;
        state->prev_time = cur_time;

        /* calculate bw in bytes per second */
        cur_bw = pv->mbm_local_delta * 1000000 / delta_time;
        state->prev_bw = cur_bw;

        if (state->delta_comp) {
                state->delta_comp = 0;
                if (cur_bw >= prev_bw)
                        state->delta_bw = cur_bw - prev_bw;
                else
                        state->delta_bw = prev_bw - cur_bw;
        }

        DBG("MBA SC: Current BW %lluMBps",
            (unsigned long long)bytes_to_mb(cur_bw));
        if (state->prev_rate > min_rate && cur_bw > state->max_bw) {
                DBG(" > %lluMBps",
                    (unsigned long long)bytes_to_mb(state->max_bw));
                mba_cfg.mb_max = state->prev_rate - step_rate;
        } else if (state->prev_rate < max_rate &&
                   (cur_bw + state->delta_bw) < state->max_bw) {
                DBG(" < %lluMBps",
                    (unsigned long long)bytes_to_mb(state->max_bw));
                mba_cfg.mb_max = state->prev_rate + step_rate;
        } else {
                if (state->reg_start_time) {
                        DBG(" Max BW %lluMBps, regulation took %.1fs\n",
                            (unsigned long long)bytes_to_mb(state->max_bw),
                            (cur_time - state->reg_start_time) / 1000000.0);
                        state->reg_start_time = 0;
                } else
                        DBG("\n");
                return 0;
        }

        DBG(", setting MBA to %u%%\n", mba_cfg.mb_max);
        ret = mba_sc_mba_set(state->cpumask, &mba_cfg);
        if (ret != 0) {
                DBG(" Failed to update mba rate!\n");
                return ret;
        }

        state->prev_rate = mba_cfg.mb_max;
        state->delta_comp = 1;

        if (!state->reg_start_time)
                state->reg_start_time = get_time_usec();

        return 0;
}

int
mba_sc_main(pid_t pid)
{
        int ret;
        unsigned i;
        unsigned index;

        if (!supported)
                return PQOS_RETVAL_RESOURCE;

        /* allocate memory for state struct */
        state_num = mba_sc_count(&g_cfg);
        state = calloc(state_num, sizeof(*state));
        if (state == NULL) {
                DBG("MBA SC: memory allocation failed\n");
                return -EFAULT;
        }

        for (i = 0, index = 0; i < g_cfg.config_count; i++) {
                const struct rdt_config *config = &g_cfg.config[i];

                if (config->mba.ctrl == 1) {
                        ret = mba_sc_mon_start(config->cpumask,
                                               &state[index].group);
                        if (ret != 0) {
                                DBG("MBA SC: failed to start monitoring\n");
                                goto err;
                        }

                        state[index].max_bw = mb_to_bytes(config->mba.mb_max);
                        state[index].prev_rate = MBA_SC_DEF_INIT_MBA;
                        state[index].cpumask = config->cpumask;

                        index++;
                }
        }

        for (i = 0; i < state_num; i++)
                state[i].prev_time = get_time_usec();

        while (mba_sc_running(pid)) {
                usleep(MBA_SC_SAMPLING_INTERVAL * 1000);

                for (i = 0; i < state_num; i++)
                        mba_sc_update(&state[i]);
        }

err:
        ret = mba_sc_stop();

        return ret;
}