// Copyright(c) 2019-2020, Intel Corporation
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name of Intel Corporation nor the names of its contributors
// may be used to endorse or promote products derived from this software
// without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif // HAVE_CONFIG_H
#include <glob.h>
#include <json-c/json.h>
#include "fpgad/api/opae_events_api.h"
#include "fpgad/api/device_monitoring.h"
#include "fpgad/api/sysfs.h"
#ifdef LOG
#undef LOG
#endif
#define LOG(format, ...) \
log_printf("fpgad-vc: " format, ##__VA_ARGS__)
#define SYSFS_PATH_MAX 256
#define TRIM_LOG_MODULUS 20
#define LOG_MOD(__r, __fmt, ...) \
do { \
\
++(__r); \
if (!((__r) % TRIM_LOG_MODULUS)) { \
log_printf("fpgad-vc: " __fmt, ##__VA_ARGS__); \
} \
} while (0)
typedef struct _vc_sensor {
fpga_object sensor_object;
char *name;
char *type;
uint64_t id;
fpga_object value_object;
uint64_t value;
uint64_t high_fatal;
uint64_t high_warn;
uint64_t low_fatal;
uint64_t low_warn;
uint32_t flags;
#define FPGAD_SENSOR_VC_IGNORE 0x00000001
#define FPGAD_SENSOR_VC_HIGH_FATAL_VALID 0x00000002
#define FPGAD_SENSOR_VC_HIGH_WARN_VALID 0x00000004
#define FPGAD_SENSOR_VC_LOW_FATAL_VALID 0x00000008
#define FPGAD_SENSOR_VC_LOW_WARN_VALID 0x00000010
uint32_t read_errors;
#define FPGAD_SENSOR_VC_MAX_READ_ERRORS 25
} vc_sensor;
typedef struct _vc_config_sensor {
uint64_t id;
uint64_t high_fatal;
uint64_t high_warn;
uint64_t low_fatal;
uint64_t low_warn;
uint32_t flags;
} vc_config_sensor;
#define MAX_VC_SENSORS 128
typedef struct _vc_device {
fpgad_monitored_device *base_device;
fpga_object group_object;
vc_sensor sensors[MAX_VC_SENSORS];
uint32_t num_sensors;
uint64_t max_sensor_id;
uint8_t *state_tripped; // bit set
uint8_t *state_last; // bit set
uint64_t tripped_count;
uint32_t num_config_sensors;
vc_config_sensor *config_sensors;
bool aer_disabled;
uint32_t previous_ecap_aer[2];
} vc_device;
#define BIT_SET_MASK(__n) (1 << ((__n) % 8))
#define BIT_SET_INDEX(__n) ((__n) / 8)
#define BIT_SET_SET(__s, __n) \
((__s)[BIT_SET_INDEX(__n)] |= BIT_SET_MASK(__n))
#define BIT_SET_CLR(__s, __n) \
((__s)[BIT_SET_INDEX(__n)] &= ~BIT_SET_MASK(__n))
#define BIT_IS_SET(__s, __n) \
((__s)[BIT_SET_INDEX(__n)] & BIT_SET_MASK(__n))
#define HIGH_FATAL(__sens) \
(((__sens)->flags & FPGAD_SENSOR_VC_HIGH_FATAL_VALID) && \
((__sens)->value > (__sens)->high_fatal))
#define HIGH_WARN(__sens) \
(((__sens)->flags & FPGAD_SENSOR_VC_HIGH_WARN_VALID) && \
((__sens)->value > (__sens)->high_warn))
#define HIGH_NORMAL(__sens) \
(((__sens)->flags & FPGAD_SENSOR_VC_HIGH_WARN_VALID) && \
((__sens)->value <= (__sens)->high_warn))
#define LOW_FATAL(__sens) \
(((__sens)->flags & FPGAD_SENSOR_VC_LOW_FATAL_VALID) && \
((__sens)->value < (__sens)->low_fatal))
#define LOW_WARN(__sens) \
(((__sens)->flags & FPGAD_SENSOR_VC_LOW_WARN_VALID) && \
((__sens)->value < (__sens)->low_warn))
#define LOW_NORMAL(__sens) \
(((__sens)->flags & FPGAD_SENSOR_VC_LOW_WARN_VALID) && \
((__sens)->value >= (__sens)->low_warn))
STATIC bool vc_threads_running = true;
STATIC void stop_vc_threads(void)
{
vc_threads_running = false;
}
STATIC void vc_destroy_sensor(vc_sensor *sensor)
{
fpgaDestroyObject(&sensor->sensor_object);
if (sensor->name) {
free(sensor->name);
sensor->name = NULL;
}
if (sensor->type) {
free(sensor->type);
sensor->type = NULL;
}
if (sensor->value_object) {
fpgaDestroyObject(&sensor->value_object);
sensor->value_object = NULL;
}
sensor->flags = 0;
sensor->read_errors = 0;
}
STATIC void vc_destroy_sensors(vc_device *vc)
{
uint32_t i;
for (i = 0 ; i < vc->num_sensors ; ++i) {
vc_destroy_sensor(&vc->sensors[i]);
}
vc->num_sensors = 0;
vc->max_sensor_id = 0;
if (vc->group_object) {
fpgaDestroyObject(&vc->group_object);
vc->group_object = NULL;
}
if (vc->state_tripped) {
free(vc->state_tripped);
vc->state_tripped = NULL;
}
if (vc->state_last) {
free(vc->state_last);
vc->state_last = NULL;
}
}
STATIC void vc_destroy_device(vc_device *vc)
{
vc_destroy_sensors(vc);
if (vc->config_sensors) {
free(vc->config_sensors);
vc->config_sensors = NULL;
vc->num_config_sensors = 0;
}
}
STATIC fpga_result vc_sensor_get_string(vc_sensor *sensor,
const char *obj_name,
char **name)
{
fpga_result res;
fpga_object obj = NULL;
char buf[SYSFS_PATH_MAX] = { 0, };
uint32_t len = 0;
res = fpgaObjectGetObject(sensor->sensor_object,
obj_name,
&obj,
0);
if (res != FPGA_OK)
return res;
res = fpgaObjectGetSize(obj, &len, 0);
if (res != FPGA_OK)
goto out_free_obj;
res = fpgaObjectRead(obj, (uint8_t *)buf, 0, len, 0);
if (res != FPGA_OK)
goto out_free_obj;
if (buf[len-1] == '\n')
buf[len-1] = '\0';
*name = cstr_dup((const char *)buf);
if (!(*name))
res = FPGA_NO_MEMORY;
out_free_obj:
fpgaDestroyObject(&obj);
return res;
}
STATIC fpga_result vc_sensor_get_u64(vc_sensor *sensor,
const char *obj_name,
uint64_t *value)
{
fpga_result res;
fpga_object obj = NULL;
res = fpgaObjectGetObject(sensor->sensor_object,
obj_name,
&obj,
0);
if (res != FPGA_OK)
return res;
res = fpgaObjectRead64(obj, value, 0);
fpgaDestroyObject(&obj);
return res;
}
// The percentage by which we adjust the power trip
// points so that we catch anomolies before the hw does.
#define VC_PERCENT_ADJUST_PWR 5
// The number of degrees by which we adjust the
// temperature trip points so that we catch anomolies
// before the hw does.
#define VC_DEGREES_ADJUST_TEMP 5
STATIC fpga_result vc_sensor_get(vc_device *vc, vc_sensor *s)
{
fpga_result res;
bool is_temperature;
int indicator = -1;
vc_config_sensor *cfg_sensor = NULL;
uint32_t i;
if (s->name) {
free(s->name);
s->name = NULL;
}
if (s->type) {
free(s->type);
s->type = NULL;
}
res = vc_sensor_get_string(s, "name", &s->name);
if (res != FPGA_OK)
return res;
res = vc_sensor_get_string(s, "type", &s->type);
if (res != FPGA_OK)
return res;
res = vc_sensor_get_u64(s, "id", &s->id);
if (res != FPGA_OK)
return res;
res = fpgaObjectRead64(s->value_object, &s->value, 0);
if (res != FPGA_OK)
return res;
indicator = strcmp(s->type, "Temperature");
is_temperature = (indicator == 0);
res = vc_sensor_get_u64(s, "high_fatal", &s->high_fatal);
if (res == FPGA_OK) {
s->flags |= FPGAD_SENSOR_VC_HIGH_FATAL_VALID;
if (is_temperature)
s->high_fatal -= VC_DEGREES_ADJUST_TEMP;
else
s->high_fatal -=
(s->high_fatal * VC_PERCENT_ADJUST_PWR) / 100;
} else
s->flags &= ~FPGAD_SENSOR_VC_HIGH_FATAL_VALID;
res = vc_sensor_get_u64(s, "high_warn", &s->high_warn);
if (res == FPGA_OK)
s->flags |= FPGAD_SENSOR_VC_HIGH_WARN_VALID;
else
s->flags &= ~FPGAD_SENSOR_VC_HIGH_WARN_VALID;
res = vc_sensor_get_u64(s, "low_fatal", &s->low_fatal);
if (res == FPGA_OK) {
s->flags |= FPGAD_SENSOR_VC_LOW_FATAL_VALID;
if (is_temperature)
s->low_fatal += VC_DEGREES_ADJUST_TEMP;
else
s->low_fatal +=
(s->low_fatal * VC_PERCENT_ADJUST_PWR) / 100;
} else
s->flags &= ~FPGAD_SENSOR_VC_LOW_FATAL_VALID;
res = vc_sensor_get_u64(s, "low_warn", &s->low_warn);
if (res == FPGA_OK)
s->flags |= FPGAD_SENSOR_VC_LOW_WARN_VALID;
else
s->flags &= ~FPGAD_SENSOR_VC_LOW_WARN_VALID;
/* Do we have a user override (via the config) for
* this sensor? If so, then honor it.
*/
for (i = 0 ; i < vc->num_config_sensors ; ++i) {
if (vc->config_sensors[i].flags &
FPGAD_SENSOR_VC_IGNORE)
continue;
if (vc->config_sensors[i].id == s->id) {
cfg_sensor = &vc->config_sensors[i];
break;
}
}
if (cfg_sensor) {
if (cfg_sensor->flags & FPGAD_SENSOR_VC_HIGH_FATAL_VALID) {
// Cap the sensor at the adjusted max
// allowed by the hardware.
if ((s->flags & FPGAD_SENSOR_VC_HIGH_FATAL_VALID) &&
(cfg_sensor->high_fatal > s->high_fatal))
/* nothing */ ;
else
s->high_fatal = cfg_sensor->high_fatal;
s->flags |= FPGAD_SENSOR_VC_HIGH_FATAL_VALID;
} else
s->flags &= ~FPGAD_SENSOR_VC_HIGH_FATAL_VALID;
if (cfg_sensor->flags & FPGAD_SENSOR_VC_HIGH_WARN_VALID) {
if ((s->flags & FPGAD_SENSOR_VC_HIGH_WARN_VALID) &&
(cfg_sensor->high_warn > s->high_warn))
/* nothing */ ;
else
s->high_warn = cfg_sensor->high_warn;
s->flags |= FPGAD_SENSOR_VC_HIGH_WARN_VALID;
} else
s->flags &= ~FPGAD_SENSOR_VC_HIGH_WARN_VALID;
if (cfg_sensor->flags & FPGAD_SENSOR_VC_LOW_FATAL_VALID) {
if ((s->flags & FPGAD_SENSOR_VC_LOW_FATAL_VALID) &&
(cfg_sensor->low_fatal < s->low_fatal))
/* nothing */ ;
else
s->low_fatal = cfg_sensor->low_fatal;
s->flags |= FPGAD_SENSOR_VC_LOW_FATAL_VALID;
} else
s->flags &= ~FPGAD_SENSOR_VC_LOW_FATAL_VALID;
if (cfg_sensor->flags & FPGAD_SENSOR_VC_LOW_WARN_VALID) {
if ((s->flags & FPGAD_SENSOR_VC_LOW_WARN_VALID) &&
(cfg_sensor->low_warn < s->low_warn))
/* nothing */ ;
else
s->low_warn = cfg_sensor->low_warn;
s->flags |= FPGAD_SENSOR_VC_LOW_WARN_VALID;
} else
s->flags &= ~FPGAD_SENSOR_VC_LOW_WARN_VALID;
}
return FPGA_OK;
}
STATIC fpga_result vc_enum_sensor(vc_device *vc,
const char *name)
{
fpga_result res;
vc_sensor *s;
if (vc->num_sensors == MAX_VC_SENSORS) {
LOG("exceeded max number of sensors.\n");
return FPGA_EXCEPTION;
}
s = &vc->sensors[vc->num_sensors];
res = fpgaObjectGetObject(vc->group_object,
name,
&s->sensor_object,
0);
if (res != FPGA_OK)
return res;
res = fpgaObjectGetObject(s->sensor_object,
"value",
&s->value_object,
0);
if (res != FPGA_OK) {
LOG("failed to get value object for %s.\n", name);
fpgaDestroyObject(&s->sensor_object);
return res;
}
res = vc_sensor_get(vc, s);
if (res == FPGA_OK)
++vc->num_sensors;
else {
LOG("warning: sensor attribute enumeration failed.\n");
vc_destroy_sensor(s);
}
return res;
}
STATIC fpga_result vc_enum_sensors(vc_device *vc)
{
fpga_result res;
char name[SYSFS_PATH_MAX];
int i;
res = fpgaTokenGetObject(vc->base_device->token,
"spi-altera.*.auto/spi_master/spi*/spi*.*",
&vc->group_object,
FPGA_OBJECT_GLOB);
if (res)
return res;
for (i = 0 ; i < MAX_VC_SENSORS ; ++i) {
snprintf(name, sizeof(name), "sensor%d", i);
vc_enum_sensor(vc, name);
}
if (vc->num_sensors > 0) {
vc_sensor *s = &vc->sensors[vc->num_sensors - 1];
vc->max_sensor_id = s->id;
vc->state_tripped = calloc((vc->max_sensor_id + 7) / 8, 1);
vc->state_last = calloc((vc->max_sensor_id + 7) / 8, 1);
return (vc->state_tripped && vc->state_last) ?
FPGA_OK : FPGA_NO_MEMORY;
}
return FPGA_NOT_FOUND;
}
STATIC fpga_result vc_disable_aer(vc_device *vc)
{
fpga_token token;
fpga_result res;
fpga_properties prop = NULL;
char path[PATH_MAX];
char rlpath[PATH_MAX];
char *p;
char cmd[256];
char output[256];
FILE *fp;
size_t sz;
uint16_t seg = 0;
uint8_t bus = 0;
uint8_t dev = 0;
uint8_t fn = 0;
token = vc->base_device->token;
res = fpgaGetProperties(token, &prop);
if (res != FPGA_OK) {
LOG("failed to get fpga properties.\n");
return res;
}
if ((fpgaPropertiesGetSegment(prop, &seg) != FPGA_OK) ||
(fpgaPropertiesGetBus(prop, &bus) != FPGA_OK) ||
(fpgaPropertiesGetDevice(prop, &dev) != FPGA_OK) ||
(fpgaPropertiesGetFunction(prop, &fn) != FPGA_OK)) {
LOG("failed to get PCI attributes.\n");
fpgaDestroyProperties(&prop);
return FPGA_EXCEPTION;
}
fpgaDestroyProperties(&prop);
snprintf(path, sizeof(path),
"/sys/bus/pci/devices/%04x:%02x:%02x.%d",
(int)seg, (int)bus, (int)dev, (int)fn);
memset(rlpath, 0, sizeof(rlpath));
if (readlink(path, rlpath, sizeof(rlpath)) < 0) {
LOG("readlink \"%s\" failed.\n", path);
return FPGA_EXCEPTION;
}
// (rlpath)
// 1111111111
// 01234567890123456789
// ../../../devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/
// 0000:b0:09.0/0000:b2:00.0
p = strstr(rlpath, "devices/pci");
p += 19;
*(p + 12) = '\0';
// Save the current ECAP_AER values.
snprintf(cmd, sizeof(cmd),
"setpci -s %s ECAP_AER+0x08.L", p);
fp = popen(cmd, "r");
if (!fp) {
LOG("failed to read ECAP_AER+0x08 for %s\n", p);
return FPGA_EXCEPTION;
}
sz = fread(output, 1, sizeof(output), fp);
if (sz >= sizeof(output))
sz = sizeof(output) - 1;
output[sz] = '\0';
pclose(fp);
vc->previous_ecap_aer[0] = strtoul(output, NULL, 16);
LOG("saving previous ECAP_AER+0x08 value 0x%08x for %s\n",
vc->previous_ecap_aer[0], p);
snprintf(cmd, sizeof(cmd),
"setpci -s %s ECAP_AER+0x14.L", p);
fp = popen(cmd, "r");
if (!fp) {
LOG("failed to read ECAP_AER+0x14 for %s\n", p);
return FPGA_EXCEPTION;
}
sz = fread(output, 1, sizeof(output), fp);
if (sz >= sizeof(output))
sz = sizeof(output) - 1;
output[sz] = '\0';
pclose(fp);
vc->previous_ecap_aer[1] = strtoul(output, NULL, 16);
LOG("saving previous ECAP_AER+0x14 value 0x%08x for %s\n",
vc->previous_ecap_aer[1], p);
// Disable AER.
snprintf(cmd, sizeof(cmd),
"setpci -s %s ECAP_AER+0x08.L=0xffffffff", p);
fp = popen(cmd, "r");
if (!fp) {
LOG("failed to write ECAP_AER+0x08 for %s\n", p);
return FPGA_EXCEPTION;
}
pclose(fp);
snprintf(cmd, sizeof(cmd),
"setpci -s %s ECAP_AER+0x14.L=0xffffffff", p);
fp = popen(cmd, "r");
if (!fp) {
LOG("failed to write ECAP_AER+0x14 for %s\n", p);
return FPGA_EXCEPTION;
}
pclose(fp);
return FPGA_OK;
}
STATIC fpga_result vc_enable_aer(vc_device *vc)
{
fpga_token token;
fpga_result res;
fpga_properties prop = NULL;
char path[PATH_MAX];
char rlpath[PATH_MAX];
char *p;
char cmd[256];
FILE *fp;
uint16_t seg = 0;
uint8_t bus = 0;
uint8_t dev = 0;
uint8_t fn = 0;
token = vc->base_device->token;
res = fpgaGetProperties(token, &prop);
if (res != FPGA_OK) {
LOG("failed to get fpga properties.\n");
return res;
}
if ((fpgaPropertiesGetSegment(prop, &seg) != FPGA_OK) ||
(fpgaPropertiesGetBus(prop, &bus) != FPGA_OK) ||
(fpgaPropertiesGetDevice(prop, &dev) != FPGA_OK) ||
(fpgaPropertiesGetFunction(prop, &fn) != FPGA_OK)) {
LOG("failed to get PCI attributes.\n");
fpgaDestroyProperties(&prop);
return FPGA_EXCEPTION;
}
fpgaDestroyProperties(&prop);
snprintf(path, sizeof(path),
"/sys/bus/pci/devices/%04x:%02x:%02x.%d",
(int)seg, (int)bus, (int)dev, (int)fn);
memset(rlpath, 0, sizeof(rlpath));
if (readlink(path, rlpath, sizeof(rlpath)) < 0) {
LOG("readlink \"%s\" failed.\n", path);
return FPGA_EXCEPTION;
}
// (rlpath)
// 1111111111
// 01234567890123456789
// ../../../devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/
// 0000:b0:09.0/0000:b2:00.0
p = strstr(rlpath, "devices/pci");
p += 19;
*(p + 12) = '\0';
// Write the saved ECAP_AER values to enable AER.
snprintf(cmd, sizeof(cmd),
"setpci -s %s ECAP_AER+0x08.L=0x%08x",
p, vc->previous_ecap_aer[0]);
fp = popen(cmd, "r");
if (!fp) {
LOG("failed to write ECAP_AER+0x08 for %s\n", p);
return FPGA_EXCEPTION;
}
pclose(fp);
LOG("restored previous ECAP_AER+0x08 value 0x%08x for %s\n",
vc->previous_ecap_aer[0], p);
snprintf(cmd, sizeof(cmd),
"setpci -s %s ECAP_AER+0x14.L=0x%08x",
p, vc->previous_ecap_aer[1]);
fp = popen(cmd, "r");
if (!fp) {
LOG("failed to write ECAP_AER+0x14 for %s\n", p);
return FPGA_EXCEPTION;
}
pclose(fp);
LOG("restored previous ECAP_AER+0x14 value 0x%08x for %s\n",
vc->previous_ecap_aer[1], p);
return FPGA_OK;
}
STATIC bool vc_monitor_sensors(vc_device *vc)
{
uint32_t i;
uint32_t monitoring = 0;
bool negative_trans = false;
bool res = true;
for (i = 0 ; i < vc->num_sensors ; ++i) {
vc_sensor *s = &vc->sensors[i];
if (s->flags & FPGAD_SENSOR_VC_IGNORE)
continue;
if (s->flags & (FPGAD_SENSOR_VC_HIGH_WARN_VALID|
FPGAD_SENSOR_VC_LOW_WARN_VALID))
++monitoring;
if (fpgaObjectRead64(s->value_object,
&s->value,
FPGA_OBJECT_SYNC) != FPGA_OK) {
if (++s->read_errors >=
FPGAD_SENSOR_VC_MAX_READ_ERRORS)
s->flags |= FPGAD_SENSOR_VC_IGNORE;
continue;
}
if (HIGH_WARN(s) || LOW_WARN(s)) {
opae_api_send_EVENT_POWER_THERMAL(vc->base_device);
BIT_SET_SET(vc->state_tripped, s->id);
if (!BIT_IS_SET(vc->state_last, s->id)) {
LOG("sensor '%s' warning.\n", s->name);
if (!vc->aer_disabled) {
if (FPGA_OK == vc_disable_aer(vc))
vc->aer_disabled = true;
}
}
}
if (HIGH_NORMAL(s) || LOW_NORMAL(s)) {
if (BIT_IS_SET(vc->state_last, s->id)) {
negative_trans = true;
LOG("sensor '%s' back to normal.\n", s->name);
}
}
if (BIT_IS_SET(vc->state_last, s->id) &&
BIT_IS_SET(vc->state_tripped, s->id)) {
LOG_MOD(vc->tripped_count,
"sensor '%s' still tripped.\n", s->name);
}
}
if (negative_trans) {
for (i = 0 ; i < vc->num_sensors ; ++i) {
if (BIT_IS_SET(vc->state_tripped, vc->sensors[i].id))
break;
}
if (i == vc->num_sensors) {
// no remaining tripped sensors
vc->tripped_count = 0;
if (vc->aer_disabled) {
if (FPGA_OK == vc_enable_aer(vc))
vc->aer_disabled = false;
}
}
}
/*
** Are we still monitoring any sensor that has a valid
** high/low warn threshold? If not, then this fn should
** return false so that we wait for sensor enumeration
** in the caller. It's possible that we're ignoring all
** of the sensors because they became unavailable due to
** driver removal, eg.
*/
if (!monitoring)
res = false;
for (i = 0 ; i < vc->num_sensors ; ++i) {
vc_sensor *s = &vc->sensors[i];
if (BIT_IS_SET(vc->state_tripped, s->id))
BIT_SET_SET(vc->state_last, s->id);
else
BIT_SET_CLR(vc->state_last, s->id);
}
memset(vc->state_tripped, 0, (vc->max_sensor_id + 7) / 8);
return res;
}
STATIC int cool_down = 30;
STATIC void *monitor_fme_vc_thread(void *arg)
{
fpgad_monitored_device *d =
(fpgad_monitored_device *)arg;
vc_device *vc = (vc_device *)d->thread_context;
uint32_t enum_retries = 0;
uint8_t *save_state_last = NULL;
while (vc_threads_running) {
while (vc_enum_sensors(vc) != FPGA_OK) {
LOG_MOD(enum_retries, "waiting to enumerate sensors.\n");
if (!vc_threads_running)
return NULL;
sleep(1);
}
if (save_state_last) {
free(vc->state_last);
vc->state_last = save_state_last;
save_state_last = NULL;
}
while (vc_monitor_sensors(vc)) {
usleep(d->config->poll_interval_usec);
if (!vc_threads_running) {
vc_destroy_sensors(vc);
return NULL;
}
}
save_state_last = vc->state_last;
vc->state_last = NULL;
vc_destroy_sensors(vc);
}
return NULL;
}
STATIC int vc_parse_config(vc_device *vc, const char *cfg)
{
json_object *root;
enum json_tokener_error j_err = json_tokener_success;
json_object *j_cool_down = NULL;
json_object *j_config_sensors_enabled = NULL;
json_object *j_sensors = NULL;
int res = 1;
int sensor_entries;
int i;
root = json_tokener_parse_verbose(cfg, &j_err);
if (!root) {
LOG("error parsing config: %s\n",
json_tokener_error_desc(j_err));
return 1;
}
if (!json_object_object_get_ex(root,
"cool-down",
&j_cool_down)) {
LOG("failed to find cool-down key in config.\n");
goto out_put;
}
if (!json_object_is_type(j_cool_down, json_type_int)) {
LOG("cool-down key not integer.\n");
goto out_put;
}
cool_down = json_object_get_int(j_cool_down);
if (cool_down < 0)
cool_down = 30;
LOG("set cool-down period to %d seconds.\n", cool_down);
res = 0;
if (!json_object_object_get_ex(root,
"config-sensors-enabled",
&j_config_sensors_enabled)) {
LOG("failed to find config-sensors-enabled key in config.\n"
"Skipping user sensor config.\n");
goto out_put;
}
if (!json_object_is_type(j_config_sensors_enabled, json_type_boolean)) {
LOG("config-sensors-enabled key not Boolean.\n"
"Skipping user sensor config.\n");
goto out_put;
}
if (!json_object_get_boolean(j_config_sensors_enabled)) {
LOG("config-sensors-enabled key set to false.\n"
"Skipping user sensor config.\n");
goto out_put;
}
if (!json_object_object_get_ex(root,
"sensors",
&j_sensors)) {
LOG("failed to find sensors key in config.\n"
"Skipping user sensor config.\n");
goto out_put;
}
if (!json_object_is_type(j_sensors, json_type_array)) {
LOG("sensors key not array.\n"
"Skipping user sensor config.\n");
goto out_put;
}
sensor_entries = json_object_array_length(j_sensors);
if (!sensor_entries) {
LOG("sensors key is empty array.\n"
"Skipping user sensor config.\n");
goto out_put;
}
vc->config_sensors = calloc(sensor_entries, sizeof(vc_config_sensor));
if (!vc->config_sensors) {
LOG("calloc failed. Skipping user sensor config.\n");
goto out_put;
}
vc->num_config_sensors = (uint32_t)sensor_entries;
for (i = 0 ; i < sensor_entries ; ++i) {
json_object *j_sensor_sub_i = json_object_array_get_idx(j_sensors, i);
json_object *j_id;
json_object *j_high_fatal;
json_object *j_high_warn;
json_object *j_low_fatal;
json_object *j_low_warn;
if (!json_object_object_get_ex(j_sensor_sub_i,
"id",
&j_id)) {
LOG("failed to find id key in config sensors[%d].\n"
"Skipping entry %d.\n", i, i);
vc->config_sensors[i].id = MAX_VC_SENSORS;
vc->config_sensors[i].flags = FPGAD_SENSOR_VC_IGNORE;
continue;
}
if (!json_object_is_type(j_id, json_type_int)) {
LOG("sensors[%d].id key not int.\n"
"Skipping entry %d.\n", i, i);
vc->config_sensors[i].id = MAX_VC_SENSORS;
vc->config_sensors[i].flags = FPGAD_SENSOR_VC_IGNORE;
continue;
}
vc->config_sensors[i].id = json_object_get_int(j_id);
if (json_object_object_get_ex(j_sensor_sub_i,
"high-fatal",
&j_high_fatal)) {
if (json_object_is_type(j_high_fatal,
json_type_double)) {
vc->config_sensors[i].high_fatal =
(uint64_t)(json_object_get_double(j_high_fatal)
* 1000.0);
vc->config_sensors[i].flags |=
FPGAD_SENSOR_VC_HIGH_FATAL_VALID;
LOG("user sensor%u high-fatal: %f\n",
vc->config_sensors[i].id,
json_object_get_double(j_high_fatal));
}
}
if (json_object_object_get_ex(j_sensor_sub_i,
"high-warn",
&j_high_warn)) {
if (json_object_is_type(j_high_warn,
json_type_double)) {
vc->config_sensors[i].high_warn =
(uint64_t)(json_object_get_double(j_high_warn)
* 1000.0);
vc->config_sensors[i].flags |=
FPGAD_SENSOR_VC_HIGH_WARN_VALID;
LOG("user sensor%u high-warn: %f\n",
vc->config_sensors[i].id,
json_object_get_double(j_high_warn));
}
}
if (json_object_object_get_ex(j_sensor_sub_i,
"low-fatal",
&j_low_fatal)) {
if (json_object_is_type(j_low_fatal,
json_type_double)) {
vc->config_sensors[i].low_fatal =
(uint64_t)(json_object_get_double(j_low_fatal)
* 1000.0);
vc->config_sensors[i].flags |=
FPGAD_SENSOR_VC_LOW_FATAL_VALID;
LOG("user sensor%u low-fatal: %f\n",
vc->config_sensors[i].id,
json_object_get_double(j_low_fatal));
}
}
if (json_object_object_get_ex(j_sensor_sub_i,
"low-warn",
&j_low_warn)) {
if (json_object_is_type(j_low_warn,
json_type_double)) {
vc->config_sensors[i].low_warn =
(uint64_t)(json_object_get_double(j_low_warn)
* 1000.0);
vc->config_sensors[i].flags |=
FPGAD_SENSOR_VC_LOW_WARN_VALID;
LOG("user sensor%u low-warn: %f\n",
vc->config_sensors[i].id,
json_object_get_double(j_low_warn));
}
}
}
out_put:
json_object_put(root);
return res;
}
int fpgad_plugin_configure(fpgad_monitored_device *d,
const char *cfg)
{
int res = 1;
vc_device *vc;
vc_threads_running = true;
d->type = FPGAD_PLUGIN_TYPE_THREAD;
if (d->object_type == FPGA_DEVICE) {
d->thread_fn = monitor_fme_vc_thread;
d->thread_stop_fn = stop_vc_threads;
vc = calloc(1, sizeof(vc_device));
if (!vc)
return res;
vc->base_device = d;
d->thread_context = vc;
LOG("monitoring vid=0x%04x did=0x%04x (%s)\n",
d->supported->vendor_id,
d->supported->device_id,
d->object_type == FPGA_ACCELERATOR ?
"accelerator" : "device");
res = vc_parse_config(vc, cfg);
if (res) {
free(vc);
}
}
// Not currently monitoring the Port device
return res;
}
void fpgad_plugin_destroy(fpgad_monitored_device *d)
{
LOG("stop monitoring vid=0x%04x did=0x%04x (%s)\n",
d->supported->vendor_id,
d->supported->device_id,
d->object_type == FPGA_ACCELERATOR ?
"accelerator" : "device");
if (d->thread_context) {
vc_destroy_device((vc_device *)d->thread_context);
free(d->thread_context);
d->thread_context = NULL;
}
}