Blob Blame History Raw
// Copyright(c) 2019-2020, Intel Corporation
//
// Redistribution  and  use  in source  and  binary  forms,  with  or  without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of  source code  must retain the  above copyright notice,
//   this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
//   this list of conditions and the following disclaimer in the documentation
//   and/or other materials provided with the distribution.
// * Neither the name  of Intel Corporation  nor the names of its contributors
//   may be used to  endorse or promote  products derived  from this  software
//   without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,  BUT NOT LIMITED TO,  THE
// IMPLIED WARRANTIES OF  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED.  IN NO EVENT  SHALL THE COPYRIGHT OWNER  OR CONTRIBUTORS BE
// LIABLE  FOR  ANY  DIRECT,  INDIRECT,  INCIDENTAL,  SPECIAL,  EXEMPLARY,  OR
// CONSEQUENTIAL  DAMAGES  (INCLUDING,  BUT  NOT LIMITED  TO,  PROCUREMENT  OF
// SUBSTITUTE GOODS OR SERVICES;  LOSS OF USE,  DATA, OR PROFITS;  OR BUSINESS
// INTERRUPTION)  HOWEVER CAUSED  AND ON ANY THEORY  OF LIABILITY,  WHETHER IN
// CONTRACT,  STRICT LIABILITY,  OR TORT  (INCLUDING NEGLIGENCE  OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif // HAVE_CONFIG_H

#include <glob.h>
#include <json-c/json.h>

#include "fpgad/api/opae_events_api.h"
#include "fpgad/api/device_monitoring.h"
#include "fpgad/api/sysfs.h"

#ifdef LOG
#undef LOG
#endif
#define LOG(format, ...) \
log_printf("fpgad-vc: " format, ##__VA_ARGS__)

#define SYSFS_PATH_MAX 256

#define TRIM_LOG_MODULUS 20
#define LOG_MOD(__r, __fmt, ...) \
do { \
\
	++(__r); \
	if (!((__r) % TRIM_LOG_MODULUS)) { \
		log_printf("fpgad-vc: " __fmt, ##__VA_ARGS__); \
	} \
} while (0)


typedef struct _vc_sensor {
	fpga_object sensor_object;
	char *name;
	char *type;
	uint64_t id;
	fpga_object value_object;
	uint64_t value;
	uint64_t high_fatal;
	uint64_t high_warn;
	uint64_t low_fatal;
	uint64_t low_warn;
	uint32_t flags;
#define FPGAD_SENSOR_VC_IGNORE           0x00000001
#define FPGAD_SENSOR_VC_HIGH_FATAL_VALID 0x00000002
#define FPGAD_SENSOR_VC_HIGH_WARN_VALID  0x00000004
#define FPGAD_SENSOR_VC_LOW_FATAL_VALID  0x00000008
#define FPGAD_SENSOR_VC_LOW_WARN_VALID   0x00000010
	uint32_t read_errors;
#define FPGAD_SENSOR_VC_MAX_READ_ERRORS  25
} vc_sensor;

typedef struct _vc_config_sensor {
	uint64_t id;
	uint64_t high_fatal;
	uint64_t high_warn;
	uint64_t low_fatal;
	uint64_t low_warn;
	uint32_t flags;
} vc_config_sensor;

#define MAX_VC_SENSORS 128
typedef struct _vc_device {
	fpgad_monitored_device *base_device;
	fpga_object group_object;
	vc_sensor sensors[MAX_VC_SENSORS];
	uint32_t num_sensors;
	uint64_t max_sensor_id;
	uint8_t *state_tripped; // bit set
	uint8_t *state_last;    // bit set
	uint64_t tripped_count;
	uint32_t num_config_sensors;
	vc_config_sensor *config_sensors;
	bool aer_disabled;
	uint32_t previous_ecap_aer[2];
} vc_device;

#define BIT_SET_MASK(__n)  (1 << ((__n) % 8))
#define BIT_SET_INDEX(__n) ((__n) / 8)

#define BIT_SET_SET(__s, __n) \
((__s)[BIT_SET_INDEX(__n)] |= BIT_SET_MASK(__n))

#define BIT_SET_CLR(__s, __n) \
((__s)[BIT_SET_INDEX(__n)] &= ~BIT_SET_MASK(__n))

#define BIT_IS_SET(__s, __n) \
((__s)[BIT_SET_INDEX(__n)] & BIT_SET_MASK(__n))


#define HIGH_FATAL(__sens) \
(((__sens)->flags & FPGAD_SENSOR_VC_HIGH_FATAL_VALID) && \
 ((__sens)->value > (__sens)->high_fatal))

#define HIGH_WARN(__sens) \
(((__sens)->flags & FPGAD_SENSOR_VC_HIGH_WARN_VALID) && \
 ((__sens)->value > (__sens)->high_warn))

#define HIGH_NORMAL(__sens) \
(((__sens)->flags & FPGAD_SENSOR_VC_HIGH_WARN_VALID) && \
 ((__sens)->value <= (__sens)->high_warn))

#define LOW_FATAL(__sens) \
(((__sens)->flags & FPGAD_SENSOR_VC_LOW_FATAL_VALID) && \
 ((__sens)->value < (__sens)->low_fatal))

#define LOW_WARN(__sens) \
(((__sens)->flags & FPGAD_SENSOR_VC_LOW_WARN_VALID) && \
 ((__sens)->value < (__sens)->low_warn))

#define LOW_NORMAL(__sens) \
(((__sens)->flags & FPGAD_SENSOR_VC_LOW_WARN_VALID) && \
 ((__sens)->value >= (__sens)->low_warn))

STATIC bool vc_threads_running = true;

STATIC void stop_vc_threads(void)
{
	vc_threads_running = false;
}

STATIC void vc_destroy_sensor(vc_sensor *sensor)
{
	fpgaDestroyObject(&sensor->sensor_object);
	if (sensor->name) {
		free(sensor->name);
		sensor->name = NULL;
	}
	if (sensor->type) {
		free(sensor->type);
		sensor->type = NULL;
	}
	if (sensor->value_object) {
		fpgaDestroyObject(&sensor->value_object);
		sensor->value_object = NULL;
	}
	sensor->flags = 0;
	sensor->read_errors = 0;
}

STATIC void vc_destroy_sensors(vc_device *vc)
{
	uint32_t i;

	for (i = 0 ; i < vc->num_sensors ; ++i) {
		vc_destroy_sensor(&vc->sensors[i]);
	}
	vc->num_sensors = 0;
	vc->max_sensor_id = 0;

	if (vc->group_object) {
		fpgaDestroyObject(&vc->group_object);
		vc->group_object = NULL;
	}

	if (vc->state_tripped) {
		free(vc->state_tripped);
		vc->state_tripped = NULL;
	}

	if (vc->state_last) {
		free(vc->state_last);
		vc->state_last = NULL;
	}
}

STATIC void vc_destroy_device(vc_device *vc)
{
	vc_destroy_sensors(vc);
	if (vc->config_sensors) {
		free(vc->config_sensors);
		vc->config_sensors = NULL;
		vc->num_config_sensors = 0;
	}
}

STATIC fpga_result vc_sensor_get_string(vc_sensor *sensor,
					const char *obj_name,
					char **name)
{
	fpga_result res;
	fpga_object obj = NULL;
	char buf[SYSFS_PATH_MAX] = { 0, };
	uint32_t len = 0;

	res = fpgaObjectGetObject(sensor->sensor_object,
				  obj_name,
				  &obj,
				  0);
	if (res != FPGA_OK)
		return res;

	res = fpgaObjectGetSize(obj, &len, 0);
	if (res != FPGA_OK)
		goto out_free_obj;

	res = fpgaObjectRead(obj, (uint8_t *)buf, 0, len, 0);
	if (res != FPGA_OK)
		goto out_free_obj;

	if (buf[len-1] == '\n')
		buf[len-1] = '\0';

	*name = cstr_dup((const char *)buf);
	if (!(*name))
		res = FPGA_NO_MEMORY;

out_free_obj:
	fpgaDestroyObject(&obj);
	return res;
}

STATIC fpga_result vc_sensor_get_u64(vc_sensor *sensor,
				     const char *obj_name,
				     uint64_t *value)
{
	fpga_result res;
	fpga_object obj = NULL;

	res = fpgaObjectGetObject(sensor->sensor_object,
				  obj_name,
				  &obj,
				  0);
	if (res != FPGA_OK)
		return res;

	res = fpgaObjectRead64(obj, value, 0);

	fpgaDestroyObject(&obj);

	return res;
}

// The percentage by which we adjust the power trip
// points so that we catch anomolies before the hw does.
#define VC_PERCENT_ADJUST_PWR 5
// The number of degrees by which we adjust the
// temperature trip points so that we catch anomolies
// before the hw does.
#define VC_DEGREES_ADJUST_TEMP 5
STATIC fpga_result vc_sensor_get(vc_device *vc, vc_sensor *s)
{
	fpga_result res;
	bool is_temperature;
	int indicator = -1;
	vc_config_sensor *cfg_sensor = NULL;
	uint32_t i;

	if (s->name) {
		free(s->name);
		s->name = NULL;
	}

	if (s->type) {
		free(s->type);
		s->type = NULL;
	}

	res = vc_sensor_get_string(s, "name", &s->name);
	if (res != FPGA_OK)
		return res;

	res = vc_sensor_get_string(s, "type", &s->type);
	if (res != FPGA_OK)
		return res;

	res = vc_sensor_get_u64(s, "id", &s->id);
	if (res != FPGA_OK)
		return res;

	res = fpgaObjectRead64(s->value_object, &s->value, 0);
	if (res != FPGA_OK)
		return res;

	indicator = strcmp(s->type, "Temperature");
	is_temperature = (indicator == 0);

	res = vc_sensor_get_u64(s, "high_fatal", &s->high_fatal);
	if (res == FPGA_OK) {
		s->flags |= FPGAD_SENSOR_VC_HIGH_FATAL_VALID;
		if (is_temperature)
			s->high_fatal -= VC_DEGREES_ADJUST_TEMP;
		else
			s->high_fatal -=
				(s->high_fatal * VC_PERCENT_ADJUST_PWR) / 100;
	} else
		s->flags &= ~FPGAD_SENSOR_VC_HIGH_FATAL_VALID;

	res = vc_sensor_get_u64(s, "high_warn", &s->high_warn);
	if (res == FPGA_OK)
		s->flags |= FPGAD_SENSOR_VC_HIGH_WARN_VALID;
	else
		s->flags &= ~FPGAD_SENSOR_VC_HIGH_WARN_VALID;

	res = vc_sensor_get_u64(s, "low_fatal", &s->low_fatal);
	if (res == FPGA_OK) {
		s->flags |= FPGAD_SENSOR_VC_LOW_FATAL_VALID;
		if (is_temperature)
			s->low_fatal += VC_DEGREES_ADJUST_TEMP;
		else
			s->low_fatal +=
				(s->low_fatal * VC_PERCENT_ADJUST_PWR) / 100;
	} else
		s->flags &= ~FPGAD_SENSOR_VC_LOW_FATAL_VALID;

	res = vc_sensor_get_u64(s, "low_warn", &s->low_warn);
	if (res == FPGA_OK)
		s->flags |= FPGAD_SENSOR_VC_LOW_WARN_VALID;
	else
		s->flags &= ~FPGAD_SENSOR_VC_LOW_WARN_VALID;

	/* Do we have a user override (via the config) for
	 * this sensor? If so, then honor it.
	 */
	for (i = 0 ; i < vc->num_config_sensors ; ++i) {
		if (vc->config_sensors[i].flags &
		    FPGAD_SENSOR_VC_IGNORE)
			continue;
		if (vc->config_sensors[i].id == s->id) {
			cfg_sensor = &vc->config_sensors[i];
			break;
		}
	}

	if (cfg_sensor) {

		if (cfg_sensor->flags & FPGAD_SENSOR_VC_HIGH_FATAL_VALID) {
			// Cap the sensor at the adjusted max
			// allowed by the hardware.
			if ((s->flags & FPGAD_SENSOR_VC_HIGH_FATAL_VALID) &&
			    (cfg_sensor->high_fatal > s->high_fatal))
				/* nothing */ ;
			else
				s->high_fatal = cfg_sensor->high_fatal;

			s->flags |= FPGAD_SENSOR_VC_HIGH_FATAL_VALID;
		} else
			s->flags &= ~FPGAD_SENSOR_VC_HIGH_FATAL_VALID;

		if (cfg_sensor->flags & FPGAD_SENSOR_VC_HIGH_WARN_VALID) {

			if ((s->flags & FPGAD_SENSOR_VC_HIGH_WARN_VALID) &&
			    (cfg_sensor->high_warn > s->high_warn))
				/* nothing */ ;
			else
				s->high_warn = cfg_sensor->high_warn;

			s->flags |= FPGAD_SENSOR_VC_HIGH_WARN_VALID;
		} else
			s->flags &= ~FPGAD_SENSOR_VC_HIGH_WARN_VALID;

		if (cfg_sensor->flags & FPGAD_SENSOR_VC_LOW_FATAL_VALID) {

			if ((s->flags & FPGAD_SENSOR_VC_LOW_FATAL_VALID) &&
			    (cfg_sensor->low_fatal < s->low_fatal))
				/* nothing */ ;
			else
				s->low_fatal = cfg_sensor->low_fatal;

			s->flags |= FPGAD_SENSOR_VC_LOW_FATAL_VALID;
		} else
			s->flags &= ~FPGAD_SENSOR_VC_LOW_FATAL_VALID;

		if (cfg_sensor->flags & FPGAD_SENSOR_VC_LOW_WARN_VALID) {

			if ((s->flags & FPGAD_SENSOR_VC_LOW_WARN_VALID) &&
			    (cfg_sensor->low_warn < s->low_warn))
				/* nothing */ ;
			else
				s->low_warn = cfg_sensor->low_warn;

			s->flags |= FPGAD_SENSOR_VC_LOW_WARN_VALID;
		} else
			s->flags &= ~FPGAD_SENSOR_VC_LOW_WARN_VALID;

	}

	return FPGA_OK;
}

STATIC fpga_result vc_enum_sensor(vc_device *vc,
				  const char *name)
{
	fpga_result res;
	vc_sensor *s;

	if (vc->num_sensors == MAX_VC_SENSORS) {
		LOG("exceeded max number of sensors.\n");
		return FPGA_EXCEPTION;
	}

	s = &vc->sensors[vc->num_sensors];

	res = fpgaObjectGetObject(vc->group_object,
				  name,
				  &s->sensor_object,
				  0);

	if (res != FPGA_OK)
		return res;

	res = fpgaObjectGetObject(s->sensor_object,
				  "value",
				  &s->value_object,
				  0);

	if (res != FPGA_OK) {
		LOG("failed to get value object for %s.\n", name);
		fpgaDestroyObject(&s->sensor_object);
		return res;
	}

	res = vc_sensor_get(vc, s);
	if (res == FPGA_OK)
		++vc->num_sensors;
	else {
		LOG("warning: sensor attribute enumeration failed.\n");
		vc_destroy_sensor(s);
	}

	return res;
}

STATIC fpga_result vc_enum_sensors(vc_device *vc)
{
	fpga_result res;
	char name[SYSFS_PATH_MAX];
	int i;

	res = fpgaTokenGetObject(vc->base_device->token,
				 "spi-altera.*.auto/spi_master/spi*/spi*.*",
				 &vc->group_object,
				 FPGA_OBJECT_GLOB);
	if (res)
		return res;

	for (i = 0 ; i < MAX_VC_SENSORS ; ++i) {
		snprintf(name, sizeof(name), "sensor%d", i);
		vc_enum_sensor(vc, name);
	}

	if (vc->num_sensors > 0) {
		vc_sensor *s = &vc->sensors[vc->num_sensors - 1];

		vc->max_sensor_id = s->id;

		vc->state_tripped = calloc((vc->max_sensor_id + 7) / 8, 1);
		vc->state_last = calloc((vc->max_sensor_id + 7) / 8, 1);

		return (vc->state_tripped && vc->state_last) ?
			FPGA_OK : FPGA_NO_MEMORY;
	}

	return FPGA_NOT_FOUND;
}

STATIC fpga_result vc_disable_aer(vc_device *vc)
{
	fpga_token token;
	fpga_result res;
	fpga_properties prop = NULL;
	char path[PATH_MAX];
	char rlpath[PATH_MAX];
	char *p;
	char cmd[256];
	char output[256];
	FILE *fp;
	size_t sz;

	uint16_t seg = 0;
	uint8_t bus = 0;
	uint8_t dev = 0;
	uint8_t fn = 0;

	token = vc->base_device->token;

	res = fpgaGetProperties(token, &prop);
	if (res != FPGA_OK) {
		LOG("failed to get fpga properties.\n");
		return res;
	}

	if ((fpgaPropertiesGetSegment(prop, &seg) != FPGA_OK) ||
	    (fpgaPropertiesGetBus(prop, &bus) != FPGA_OK) ||
	    (fpgaPropertiesGetDevice(prop, &dev) != FPGA_OK) ||
	    (fpgaPropertiesGetFunction(prop, &fn) != FPGA_OK)) {
		LOG("failed to get PCI attributes.\n");
		fpgaDestroyProperties(&prop);
		return FPGA_EXCEPTION;
	}

	fpgaDestroyProperties(&prop);

	snprintf(path, sizeof(path),
			"/sys/bus/pci/devices/%04x:%02x:%02x.%d",
			(int)seg, (int)bus, (int)dev, (int)fn);

	memset(rlpath, 0, sizeof(rlpath));

	if (readlink(path, rlpath, sizeof(rlpath)) < 0) {
		LOG("readlink \"%s\" failed.\n", path);
		return FPGA_EXCEPTION;
	}

	// (rlpath)
	//                    1111111111
	//          01234567890123456789
	// ../../../devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/
	// 0000:b0:09.0/0000:b2:00.0

	p = strstr(rlpath, "devices/pci");

	p += 19;
	*(p + 12) = '\0';

	// Save the current ECAP_AER values.

	snprintf(cmd, sizeof(cmd),
		      "setpci -s %s ECAP_AER+0x08.L", p);

	fp = popen(cmd, "r");
	if (!fp) {
		LOG("failed to read ECAP_AER+0x08 for %s\n", p);
		return FPGA_EXCEPTION;
	}

	sz = fread(output, 1, sizeof(output), fp);

	if (sz >= sizeof(output))
		sz = sizeof(output) - 1;
	output[sz] = '\0';

	pclose(fp);

	vc->previous_ecap_aer[0] = strtoul(output, NULL, 16);

	LOG("saving previous ECAP_AER+0x08 value 0x%08x for %s\n",
	    vc->previous_ecap_aer[0], p);


	snprintf(cmd, sizeof(cmd),
		      "setpci -s %s ECAP_AER+0x14.L", p);

	fp = popen(cmd, "r");
	if (!fp) {
		LOG("failed to read ECAP_AER+0x14 for %s\n", p);
		return FPGA_EXCEPTION;
	}

	sz = fread(output, 1, sizeof(output), fp);

	if (sz >= sizeof(output))
		sz = sizeof(output) - 1;
	output[sz] = '\0';

	pclose(fp);

	vc->previous_ecap_aer[1] = strtoul(output, NULL, 16);

	LOG("saving previous ECAP_AER+0x14 value 0x%08x for %s\n",
	    vc->previous_ecap_aer[1], p);


	// Disable AER.

	snprintf(cmd, sizeof(cmd),
		      "setpci -s %s ECAP_AER+0x08.L=0xffffffff", p);

	fp = popen(cmd, "r");
	if (!fp) {
		LOG("failed to write ECAP_AER+0x08 for %s\n", p);
		return FPGA_EXCEPTION;
	}

	pclose(fp);

	snprintf(cmd, sizeof(cmd),
		      "setpci -s %s ECAP_AER+0x14.L=0xffffffff", p);

	fp = popen(cmd, "r");
	if (!fp) {
		LOG("failed to write ECAP_AER+0x14 for %s\n", p);
		return FPGA_EXCEPTION;
	}

	pclose(fp);

	return FPGA_OK;
}

STATIC fpga_result vc_enable_aer(vc_device *vc)
{
	fpga_token token;
	fpga_result res;
	fpga_properties prop = NULL;
	char path[PATH_MAX];
	char rlpath[PATH_MAX];
	char *p;
	char cmd[256];
	FILE *fp;

	uint16_t seg = 0;
	uint8_t bus = 0;
	uint8_t dev = 0;
	uint8_t fn = 0;

	token = vc->base_device->token;

	res = fpgaGetProperties(token, &prop);
	if (res != FPGA_OK) {
		LOG("failed to get fpga properties.\n");
		return res;
	}

	if ((fpgaPropertiesGetSegment(prop, &seg) != FPGA_OK) ||
	    (fpgaPropertiesGetBus(prop, &bus) != FPGA_OK) ||
	    (fpgaPropertiesGetDevice(prop, &dev) != FPGA_OK) ||
	    (fpgaPropertiesGetFunction(prop, &fn) != FPGA_OK)) {
		LOG("failed to get PCI attributes.\n");
		fpgaDestroyProperties(&prop);
		return FPGA_EXCEPTION;
	}

	fpgaDestroyProperties(&prop);

	snprintf(path, sizeof(path),
			"/sys/bus/pci/devices/%04x:%02x:%02x.%d",
			(int)seg, (int)bus, (int)dev, (int)fn);

	memset(rlpath, 0, sizeof(rlpath));

	if (readlink(path, rlpath, sizeof(rlpath)) < 0) {
		LOG("readlink \"%s\" failed.\n", path);
		return FPGA_EXCEPTION;
	}

	// (rlpath)
	//                    1111111111
	//          01234567890123456789
	// ../../../devices/pci0000:ae/0000:ae:00.0/0000:af:00.0/
	// 0000:b0:09.0/0000:b2:00.0

	p = strstr(rlpath, "devices/pci");

	p += 19;
	*(p + 12) = '\0';

	// Write the saved ECAP_AER values to enable AER.

	snprintf(cmd, sizeof(cmd),
		      "setpci -s %s ECAP_AER+0x08.L=0x%08x",
		      p, vc->previous_ecap_aer[0]);

	fp = popen(cmd, "r");
	if (!fp) {
		LOG("failed to write ECAP_AER+0x08 for %s\n", p);
		return FPGA_EXCEPTION;
	}

	pclose(fp);

	LOG("restored previous ECAP_AER+0x08 value 0x%08x for %s\n",
	    vc->previous_ecap_aer[0], p);


	snprintf(cmd, sizeof(cmd),
		      "setpci -s %s ECAP_AER+0x14.L=0x%08x",
		      p, vc->previous_ecap_aer[1]);

	fp = popen(cmd, "r");
	if (!fp) {
		LOG("failed to write ECAP_AER+0x14 for %s\n", p);
		return FPGA_EXCEPTION;
	}

	pclose(fp);

	LOG("restored previous ECAP_AER+0x14 value 0x%08x for %s\n",
	    vc->previous_ecap_aer[1], p);

	return FPGA_OK;
}

STATIC bool vc_monitor_sensors(vc_device *vc)
{
	uint32_t i;
	uint32_t monitoring = 0;
	bool negative_trans = false;
	bool res = true;

	for (i = 0 ; i < vc->num_sensors ; ++i) {
		vc_sensor *s = &vc->sensors[i];

		if (s->flags & FPGAD_SENSOR_VC_IGNORE)
			continue;

		if (s->flags & (FPGAD_SENSOR_VC_HIGH_WARN_VALID|
				FPGAD_SENSOR_VC_LOW_WARN_VALID))
			++monitoring;

		if (fpgaObjectRead64(s->value_object,
				     &s->value,
				     FPGA_OBJECT_SYNC) != FPGA_OK) {
			if (++s->read_errors >=
				FPGAD_SENSOR_VC_MAX_READ_ERRORS)
				s->flags |= FPGAD_SENSOR_VC_IGNORE;
			continue;
		}

		if (HIGH_WARN(s) || LOW_WARN(s)) {
			opae_api_send_EVENT_POWER_THERMAL(vc->base_device);
			BIT_SET_SET(vc->state_tripped, s->id);
			if (!BIT_IS_SET(vc->state_last, s->id)) {
				LOG("sensor '%s' warning.\n", s->name);
				if (!vc->aer_disabled) {
					if (FPGA_OK == vc_disable_aer(vc))
						vc->aer_disabled = true;
				}
			}
		}

		if (HIGH_NORMAL(s) || LOW_NORMAL(s)) {
			if (BIT_IS_SET(vc->state_last, s->id)) {
				negative_trans = true;
				LOG("sensor '%s' back to normal.\n", s->name);
			}
		}

		if (BIT_IS_SET(vc->state_last, s->id) &&
		    BIT_IS_SET(vc->state_tripped, s->id)) {
			LOG_MOD(vc->tripped_count,
				"sensor '%s' still tripped.\n", s->name);
		}
	}

	if (negative_trans) {
		for (i = 0 ; i < vc->num_sensors ; ++i) {
			if (BIT_IS_SET(vc->state_tripped, vc->sensors[i].id))
				break;
		}

		if (i == vc->num_sensors) {
			// no remaining tripped sensors
			vc->tripped_count = 0;
			if (vc->aer_disabled) {
				if (FPGA_OK == vc_enable_aer(vc))
					vc->aer_disabled = false;
			}
		}
	}

	/*
	** Are we still monitoring any sensor that has a valid
	** high/low warn threshold? If not, then this fn should
	** return false so that we wait for sensor enumeration
	** in the caller. It's possible that we're ignoring all
	** of the sensors because they became unavailable due to
	** driver removal, eg.
	*/
	if (!monitoring)
		res = false;

	for (i = 0 ; i < vc->num_sensors ; ++i) {
		vc_sensor *s = &vc->sensors[i];
		if (BIT_IS_SET(vc->state_tripped, s->id))
			BIT_SET_SET(vc->state_last, s->id);
		else
			BIT_SET_CLR(vc->state_last, s->id);
	}

	memset(vc->state_tripped, 0, (vc->max_sensor_id + 7) / 8);

	return res;
}

STATIC int cool_down = 30;

STATIC void *monitor_fme_vc_thread(void *arg)
{
	fpgad_monitored_device *d =
		(fpgad_monitored_device *)arg;

	vc_device *vc = (vc_device *)d->thread_context;

	uint32_t enum_retries = 0;
	uint8_t *save_state_last = NULL;

	while (vc_threads_running) {

		while (vc_enum_sensors(vc) != FPGA_OK) {
			LOG_MOD(enum_retries, "waiting to enumerate sensors.\n");
			if (!vc_threads_running)
				return NULL;
			sleep(1);
		}

		if (save_state_last) {
			free(vc->state_last);
			vc->state_last = save_state_last;
			save_state_last = NULL;
		}

		while (vc_monitor_sensors(vc)) {
			usleep(d->config->poll_interval_usec);

			if (!vc_threads_running) {
				vc_destroy_sensors(vc);
				return NULL;
			}
		}

		save_state_last = vc->state_last;
		vc->state_last = NULL;

		vc_destroy_sensors(vc);

	}

	return NULL;
}

STATIC int vc_parse_config(vc_device *vc, const char *cfg)
{
	json_object *root;
	enum json_tokener_error j_err = json_tokener_success;
	json_object *j_cool_down = NULL;
	json_object *j_config_sensors_enabled = NULL;
	json_object *j_sensors = NULL;
	int res = 1;
	int sensor_entries;
	int i;

	root = json_tokener_parse_verbose(cfg, &j_err);
	if (!root) {
		LOG("error parsing config: %s\n",
		    json_tokener_error_desc(j_err));
		return 1;
	}

	if (!json_object_object_get_ex(root,
				       "cool-down",
				       &j_cool_down)) {
		LOG("failed to find cool-down key in config.\n");
		goto out_put;
	}

	if (!json_object_is_type(j_cool_down, json_type_int)) {
		LOG("cool-down key not integer.\n");
		goto out_put;
	}

	cool_down = json_object_get_int(j_cool_down);
	if (cool_down < 0)
		cool_down = 30;

	LOG("set cool-down period to %d seconds.\n", cool_down);

	res = 0;

	if (!json_object_object_get_ex(root,
				       "config-sensors-enabled",
				       &j_config_sensors_enabled)) {
		LOG("failed to find config-sensors-enabled key in config.\n"
		    "Skipping user sensor config.\n");
		goto out_put;
	}

	if (!json_object_is_type(j_config_sensors_enabled, json_type_boolean)) {
		LOG("config-sensors-enabled key not Boolean.\n"
		    "Skipping user sensor config.\n");
		goto out_put;
	}

	if (!json_object_get_boolean(j_config_sensors_enabled)) {
		LOG("config-sensors-enabled key set to false.\n"
		    "Skipping user sensor config.\n");
		goto out_put;
	}

	if (!json_object_object_get_ex(root,
				       "sensors",
				       &j_sensors)) {
		LOG("failed to find sensors key in config.\n"
		    "Skipping user sensor config.\n");
		goto out_put;
	}

	if (!json_object_is_type(j_sensors, json_type_array)) {
		LOG("sensors key not array.\n"
		    "Skipping user sensor config.\n");
		goto out_put;
	}

	sensor_entries = json_object_array_length(j_sensors);
	if (!sensor_entries) {
		LOG("sensors key is empty array.\n"
		    "Skipping user sensor config.\n");
		goto out_put;
	}

	vc->config_sensors = calloc(sensor_entries, sizeof(vc_config_sensor));
	if (!vc->config_sensors) {
		LOG("calloc failed. Skipping user sensor config.\n");
		goto out_put;
	}

	vc->num_config_sensors = (uint32_t)sensor_entries;

	for (i = 0 ; i < sensor_entries ; ++i) {
		json_object *j_sensor_sub_i = json_object_array_get_idx(j_sensors, i);
		json_object *j_id;
		json_object *j_high_fatal;
		json_object *j_high_warn;
		json_object *j_low_fatal;
		json_object *j_low_warn;

		if (!json_object_object_get_ex(j_sensor_sub_i,
					       "id",
					       &j_id)) {
			LOG("failed to find id key in config sensors[%d].\n"
			    "Skipping entry %d.\n", i, i);
			vc->config_sensors[i].id = MAX_VC_SENSORS;
			vc->config_sensors[i].flags = FPGAD_SENSOR_VC_IGNORE;
			continue;
		}

		if (!json_object_is_type(j_id, json_type_int)) {
			LOG("sensors[%d].id key not int.\n"
			    "Skipping entry %d.\n", i, i);
			vc->config_sensors[i].id = MAX_VC_SENSORS;
			vc->config_sensors[i].flags = FPGAD_SENSOR_VC_IGNORE;
			continue;
		}

		vc->config_sensors[i].id = json_object_get_int(j_id);

		if (json_object_object_get_ex(j_sensor_sub_i,
					      "high-fatal",
					      &j_high_fatal)) {
			if (json_object_is_type(j_high_fatal,
						json_type_double)) {
				vc->config_sensors[i].high_fatal =
				(uint64_t)(json_object_get_double(j_high_fatal)
					* 1000.0);
				vc->config_sensors[i].flags |=
					FPGAD_SENSOR_VC_HIGH_FATAL_VALID;
				LOG("user sensor%u high-fatal: %f\n",
				    vc->config_sensors[i].id,
				    json_object_get_double(j_high_fatal));
			}
		}

		if (json_object_object_get_ex(j_sensor_sub_i,
					      "high-warn",
					      &j_high_warn)) {
			if (json_object_is_type(j_high_warn,
						json_type_double)) {
				vc->config_sensors[i].high_warn =
				(uint64_t)(json_object_get_double(j_high_warn)
					* 1000.0);
				vc->config_sensors[i].flags |=
					FPGAD_SENSOR_VC_HIGH_WARN_VALID;
				LOG("user sensor%u high-warn: %f\n",
				    vc->config_sensors[i].id,
				    json_object_get_double(j_high_warn));
			}
		}

		if (json_object_object_get_ex(j_sensor_sub_i,
					      "low-fatal",
					      &j_low_fatal)) {
			if (json_object_is_type(j_low_fatal,
						json_type_double)) {
				vc->config_sensors[i].low_fatal =
				(uint64_t)(json_object_get_double(j_low_fatal)
					* 1000.0);
				vc->config_sensors[i].flags |=
					FPGAD_SENSOR_VC_LOW_FATAL_VALID;
				LOG("user sensor%u low-fatal: %f\n",
				    vc->config_sensors[i].id,
				    json_object_get_double(j_low_fatal));
			}
		}

		if (json_object_object_get_ex(j_sensor_sub_i,
					      "low-warn",
					      &j_low_warn)) {
			if (json_object_is_type(j_low_warn,
						json_type_double)) {
				vc->config_sensors[i].low_warn =
				(uint64_t)(json_object_get_double(j_low_warn)
					* 1000.0);
				vc->config_sensors[i].flags |=
					FPGAD_SENSOR_VC_LOW_WARN_VALID;
				LOG("user sensor%u low-warn: %f\n",
				    vc->config_sensors[i].id,
				    json_object_get_double(j_low_warn));
			}
		}
	}

out_put:
	json_object_put(root);
	return res;
}

int fpgad_plugin_configure(fpgad_monitored_device *d,
			   const char *cfg)
{
	int res = 1;
	vc_device *vc;

	vc_threads_running = true;

	d->type = FPGAD_PLUGIN_TYPE_THREAD;

	if (d->object_type == FPGA_DEVICE) {

		d->thread_fn = monitor_fme_vc_thread;
		d->thread_stop_fn = stop_vc_threads;

		vc = calloc(1, sizeof(vc_device));
		if (!vc)
			return res;

		vc->base_device = d;
		d->thread_context = vc;

		LOG("monitoring vid=0x%04x did=0x%04x (%s)\n",
			d->supported->vendor_id,
			d->supported->device_id,
			d->object_type == FPGA_ACCELERATOR ?
			"accelerator" : "device");

		res = vc_parse_config(vc, cfg);
		if (res) {
			free(vc);
		}

	}

	// Not currently monitoring the Port device

	return res;
}

void fpgad_plugin_destroy(fpgad_monitored_device *d)
{
	LOG("stop monitoring vid=0x%04x did=0x%04x (%s)\n",
			d->supported->vendor_id,
			d->supported->device_id,
			d->object_type == FPGA_ACCELERATOR ?
			"accelerator" : "device");

	if (d->thread_context) {
		vc_destroy_device((vc_device *)d->thread_context);
		free(d->thread_context);
		d->thread_context = NULL;
	}
}