|
Packit |
13e616 |
/*
|
|
Packit |
13e616 |
* Copyright (c) 2007 The Regents of the University of California.
|
|
Packit |
13e616 |
* Copyright (c) 2007-2009 Voltaire, Inc. All rights reserved.
|
|
Packit |
13e616 |
* Copyright (c) 2009,2010 HNR Consulting. All rights reserved.
|
|
Packit |
13e616 |
* Copyright (c) 2013 Lawrence Livermore National Security. All rights reserved.
|
|
Packit |
13e616 |
* Copyright (c) 2011-2014 Mellanox Technologies LTD. All rights reserved.
|
|
Packit |
13e616 |
*
|
|
Packit |
13e616 |
* This software is available to you under a choice of one of two
|
|
Packit |
13e616 |
* licenses. You may choose to be licensed under the terms of the GNU
|
|
Packit |
13e616 |
* General Public License (GPL) Version 2, available from the file
|
|
Packit |
13e616 |
* COPYING in the main directory of this source tree, or the
|
|
Packit |
13e616 |
* OpenIB.org BSD license below:
|
|
Packit |
13e616 |
*
|
|
Packit |
13e616 |
* Redistribution and use in source and binary forms, with or
|
|
Packit |
13e616 |
* without modification, are permitted provided that the following
|
|
Packit |
13e616 |
* conditions are met:
|
|
Packit |
13e616 |
*
|
|
Packit |
13e616 |
* - Redistributions of source code must retain the above
|
|
Packit |
13e616 |
* copyright notice, this list of conditions and the following
|
|
Packit |
13e616 |
* disclaimer.
|
|
Packit |
13e616 |
*
|
|
Packit |
13e616 |
* - Redistributions in binary form must reproduce the above
|
|
Packit |
13e616 |
* copyright notice, this list of conditions and the following
|
|
Packit |
13e616 |
* disclaimer in the documentation and/or other materials
|
|
Packit |
13e616 |
* provided with the distribution.
|
|
Packit |
13e616 |
*
|
|
Packit |
13e616 |
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
Packit |
13e616 |
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
Packit |
13e616 |
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
Packit |
13e616 |
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
Packit |
13e616 |
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
Packit |
13e616 |
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
Packit |
13e616 |
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
Packit |
13e616 |
* SOFTWARE.
|
|
Packit |
13e616 |
*
|
|
Packit |
13e616 |
*/
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/*
|
|
Packit |
13e616 |
* Abstract:
|
|
Packit |
13e616 |
* Implementation of osm_perfmgr_t.
|
|
Packit |
13e616 |
* This object implements an IBA performance manager.
|
|
Packit |
13e616 |
*
|
|
Packit |
13e616 |
* Author:
|
|
Packit |
13e616 |
* Ira Weiny, LLNL
|
|
Packit |
13e616 |
*/
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
#if HAVE_CONFIG_H
|
|
Packit |
13e616 |
# include <config.h>
|
|
Packit |
13e616 |
#endif /* HAVE_CONFIG_H */
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
#ifdef ENABLE_OSM_PERF_MGR
|
|
Packit |
13e616 |
#include <stdlib.h>
|
|
Packit |
13e616 |
#include <stdint.h>
|
|
Packit |
13e616 |
#include <string.h>
|
|
Packit |
13e616 |
#include <poll.h>
|
|
Packit |
13e616 |
#include <errno.h>
|
|
Packit |
13e616 |
#include <sys/time.h>
|
|
Packit |
13e616 |
#include <netinet/in.h>
|
|
Packit |
13e616 |
#include <float.h>
|
|
Packit |
13e616 |
#include <arpa/inet.h>
|
|
Packit |
13e616 |
#include <iba/ib_types.h>
|
|
Packit |
13e616 |
#include <complib/cl_debug.h>
|
|
Packit |
13e616 |
#include <complib/cl_thread.h>
|
|
Packit |
13e616 |
#include <opensm/osm_file_ids.h>
|
|
Packit |
13e616 |
#define FILE_ID OSM_FILE_PERFMGR_C
|
|
Packit |
13e616 |
#include <vendor/osm_vendor_api.h>
|
|
Packit |
13e616 |
#include <opensm/osm_perfmgr.h>
|
|
Packit |
13e616 |
#include <opensm/osm_log.h>
|
|
Packit |
13e616 |
#include <opensm/osm_node.h>
|
|
Packit |
13e616 |
#include <opensm/osm_opensm.h>
|
|
Packit |
13e616 |
#include <opensm/osm_helper.h>
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
#define PERFMGR_INITIAL_TID_VALUE 0xcafe
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
#ifdef ENABLE_OSM_PERF_MGR_PROFILE
|
|
Packit |
13e616 |
struct {
|
|
Packit |
13e616 |
double fastest_us;
|
|
Packit |
13e616 |
double slowest_us;
|
|
Packit |
13e616 |
double avg_us;
|
|
Packit |
13e616 |
uint64_t num;
|
|
Packit |
13e616 |
} perfmgr_mad_stats = {
|
|
Packit |
13e616 |
fastest_us: DBL_MAX, slowest_us: DBL_MIN, avg_us: 0, num:0};
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* diff must be something which can fit in a susecond_t */
|
|
Packit |
13e616 |
static inline void update_mad_stats(struct timeval *diff)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
double new = (diff->tv_sec * 1000000) + diff->tv_usec;
|
|
Packit |
13e616 |
if (new < perfmgr_mad_stats.fastest_us)
|
|
Packit |
13e616 |
perfmgr_mad_stats.fastest_us = new;
|
|
Packit |
13e616 |
if (new > perfmgr_mad_stats.slowest_us)
|
|
Packit |
13e616 |
perfmgr_mad_stats.slowest_us = new;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
perfmgr_mad_stats.avg_us =
|
|
Packit |
13e616 |
((perfmgr_mad_stats.avg_us * perfmgr_mad_stats.num) + new)
|
|
Packit |
13e616 |
/ (perfmgr_mad_stats.num + 1);
|
|
Packit |
13e616 |
perfmgr_mad_stats.num++;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static inline void clear_mad_stats(void)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
perfmgr_mad_stats.fastest_us = DBL_MAX;
|
|
Packit |
13e616 |
perfmgr_mad_stats.slowest_us = DBL_MIN;
|
|
Packit |
13e616 |
perfmgr_mad_stats.avg_us = 0;
|
|
Packit |
13e616 |
perfmgr_mad_stats.num = 0;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* after and diff can be the same struct */
|
|
Packit |
13e616 |
static inline void diff_time(struct timeval *before, struct timeval *after,
|
|
Packit |
13e616 |
struct timeval *diff)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
struct timeval tmp = *after;
|
|
Packit |
13e616 |
if (tmp.tv_usec < before->tv_usec) {
|
|
Packit |
13e616 |
tmp.tv_sec--;
|
|
Packit |
13e616 |
tmp.tv_usec += 1000000;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
diff->tv_sec = tmp.tv_sec - before->tv_sec;
|
|
Packit |
13e616 |
diff->tv_usec = tmp.tv_usec - before->tv_usec;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
#endif
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Internal helper functions
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void init_monitored_nodes(osm_perfmgr_t * pm)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
cl_qmap_init(&pm->monitored_map);
|
|
Packit |
13e616 |
pm->remove_list = NULL;
|
|
Packit |
13e616 |
cl_event_construct(&pm->sig_query);
|
|
Packit |
13e616 |
cl_event_init(&pm->sig_query, FALSE);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static void mark_for_removal(osm_perfmgr_t * pm, monitored_node_t * node)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
if (pm->remove_list) {
|
|
Packit |
13e616 |
node->next = pm->remove_list;
|
|
Packit |
13e616 |
pm->remove_list = node;
|
|
Packit |
13e616 |
} else {
|
|
Packit |
13e616 |
node->next = NULL;
|
|
Packit |
13e616 |
pm->remove_list = node;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static void remove_marked_nodes(osm_perfmgr_t * pm)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
while (pm->remove_list) {
|
|
Packit |
13e616 |
monitored_node_t *next = pm->remove_list->next;
|
|
Packit |
13e616 |
unsigned port;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
cl_qmap_remove_item(&pm->monitored_map,
|
|
Packit |
13e616 |
(cl_map_item_t *) (pm->remove_list));
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (pm->rm_nodes)
|
|
Packit |
13e616 |
perfmgr_db_delete_entry(pm->db, pm->remove_list->guid);
|
|
Packit |
13e616 |
else
|
|
Packit |
13e616 |
perfmgr_db_mark_active(pm->db, pm->remove_list->guid, FALSE);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (pm->remove_list->name)
|
|
Packit |
13e616 |
free(pm->remove_list->name);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
for (port = pm->remove_list->esp0 ? 0 : 1;
|
|
Packit |
13e616 |
port < pm->remove_list->num_ports;
|
|
Packit |
13e616 |
port++) {
|
|
Packit |
13e616 |
if (pm->remove_list->port[port].remote_name)
|
|
Packit |
13e616 |
free(pm->remove_list->port[port].remote_name);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
free(pm->remove_list);
|
|
Packit |
13e616 |
pm->remove_list = next;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static inline void decrement_outstanding_queries(osm_perfmgr_t * pm)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
cl_atomic_dec(&pm->outstanding_queries);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (!pm->outstanding_queries) {
|
|
Packit |
13e616 |
cl_spinlock_acquire(&pm->lock);
|
|
Packit |
13e616 |
if (pm->sweep_state == PERFMGR_SWEEP_POST_PROCESSING) {
|
|
Packit |
13e616 |
pm->sweep_state = PERFMGR_SWEEP_SLEEP;
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_INFO,
|
|
Packit |
13e616 |
"PM sweep state exiting Post Processing\n");
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
cl_spinlock_release(&pm->lock);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
cl_event_signal(&pm->sig_query);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Receive the MAD from the vendor layer and post it for processing by
|
|
Packit |
13e616 |
* the dispatcher
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void perfmgr_mad_recv_callback(osm_madw_t * p_madw, void *bind_context,
|
|
Packit |
13e616 |
osm_madw_t * p_req_madw)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
CL_ASSERT(p_madw);
|
|
Packit |
13e616 |
CL_ASSERT(p_req_madw != NULL);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
osm_madw_copy_context(p_madw, p_req_madw);
|
|
Packit |
13e616 |
osm_mad_pool_put(pm->mad_pool, p_req_madw);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
decrement_outstanding_queries(pm);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* post this message for later processing. */
|
|
Packit |
13e616 |
if (cl_disp_post(pm->pc_disp_h, OSM_MSG_MAD_PORT_COUNTERS,
|
|
Packit |
13e616 |
p_madw, NULL, NULL) != CL_SUCCESS) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5401: "
|
|
Packit |
13e616 |
"PerfMgr Dispatcher post failed\n");
|
|
Packit |
13e616 |
osm_mad_pool_put(pm->mad_pool, p_madw);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Process MAD send errors
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void perfmgr_mad_send_err_callback(void *bind_context,
|
|
Packit |
13e616 |
osm_madw_t * p_madw)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
osm_perfmgr_t *pm = (osm_perfmgr_t *) bind_context;
|
|
Packit |
13e616 |
osm_madw_context_t *context = &p_madw->context;
|
|
Packit |
13e616 |
uint64_t node_guid = context->perfmgr_context.node_guid;
|
|
Packit |
13e616 |
uint8_t port = context->perfmgr_context.port;
|
|
Packit |
13e616 |
cl_map_item_t *p_node;
|
|
Packit |
13e616 |
monitored_node_t *p_mon_node;
|
|
Packit |
13e616 |
ib_net16_t orig_lid;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/*
|
|
Packit |
13e616 |
* get the monitored node struct to have the printable name
|
|
Packit |
13e616 |
* for log messages
|
|
Packit |
13e616 |
*/
|
|
Packit |
13e616 |
if ((p_node = cl_qmap_get(&pm->monitored_map, node_guid)) ==
|
|
Packit |
13e616 |
cl_qmap_end(&pm->monitored_map)) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5415: GUID 0x%016"
|
|
Packit |
13e616 |
PRIx64 " not found in monitored map\n", node_guid);
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
p_mon_node = (monitored_node_t *) p_node;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5402: %s (0x%" PRIx64
|
|
Packit |
13e616 |
") port %u LID %u TID 0x%" PRIx64 "\n",
|
|
Packit |
13e616 |
p_mon_node->name, p_mon_node->guid, port,
|
|
Packit |
13e616 |
cl_ntoh16(p_madw->mad_addr.dest_lid),
|
|
Packit |
13e616 |
cl_ntoh64(p_madw->p_mad->trans_id));
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (pm->subn->opt.perfmgr_redir && p_madw->status == IB_TIMEOUT) {
|
|
Packit |
13e616 |
/* First, find the node in the monitored map */
|
|
Packit |
13e616 |
cl_plock_acquire(&pm->osm->lock);
|
|
Packit |
13e616 |
/* Now, validate port number */
|
|
Packit |
13e616 |
if (port >= p_mon_node->num_ports) {
|
|
Packit |
13e616 |
cl_plock_release(&pm->osm->lock);
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5416: "
|
|
Packit |
13e616 |
"Invalid port num %u for %s (GUID 0x%016"
|
|
Packit |
13e616 |
PRIx64 ") num ports %u\n", port,
|
|
Packit |
13e616 |
p_mon_node->name, p_mon_node->guid,
|
|
Packit |
13e616 |
p_mon_node->num_ports);
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
/* Clear redirection info for this port except orig_lid */
|
|
Packit |
13e616 |
orig_lid = p_mon_node->port[port].orig_lid;
|
|
Packit |
13e616 |
memset(&p_mon_node->port[port], 0, sizeof(monitored_port_t));
|
|
Packit |
13e616 |
p_mon_node->port[port].orig_lid = orig_lid;
|
|
Packit |
13e616 |
p_mon_node->port[port].valid = TRUE;
|
|
Packit |
13e616 |
cl_plock_release(&pm->osm->lock);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Exit:
|
|
Packit |
13e616 |
osm_mad_pool_put(pm->mad_pool, p_madw);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
decrement_outstanding_queries(pm);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Bind the PerfMgr to the vendor layer for MAD sends/receives
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
ib_api_status_t osm_perfmgr_bind(osm_perfmgr_t * pm, ib_net64_t port_guid)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
osm_bind_info_t bind_info;
|
|
Packit |
13e616 |
ib_api_status_t status = IB_SUCCESS;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (pm->bind_handle != OSM_BIND_INVALID_HANDLE) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"ERR 5403: Multiple binds not allowed\n");
|
|
Packit |
13e616 |
status = IB_ERROR;
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
bind_info.port_guid = pm->port_guid = port_guid;
|
|
Packit |
13e616 |
bind_info.mad_class = IB_MCLASS_PERF;
|
|
Packit |
13e616 |
bind_info.class_version = 1;
|
|
Packit |
13e616 |
bind_info.is_responder = FALSE;
|
|
Packit |
13e616 |
bind_info.is_report_processor = FALSE;
|
|
Packit |
13e616 |
bind_info.is_trap_processor = FALSE;
|
|
Packit |
13e616 |
bind_info.recv_q_size = OSM_PM_DEFAULT_QP1_RCV_SIZE;
|
|
Packit |
13e616 |
bind_info.send_q_size = OSM_PM_DEFAULT_QP1_SEND_SIZE;
|
|
Packit |
13e616 |
bind_info.timeout = pm->subn->opt.transaction_timeout;
|
|
Packit |
13e616 |
bind_info.retries = pm->subn->opt.transaction_retries;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
|
|
Packit |
13e616 |
"Binding to port GUID 0x%" PRIx64 "\n", cl_ntoh64(port_guid));
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
pm->bind_handle = osm_vendor_bind(pm->vendor, &bind_info, pm->mad_pool,
|
|
Packit |
13e616 |
perfmgr_mad_recv_callback,
|
|
Packit |
13e616 |
perfmgr_mad_send_err_callback, pm);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) {
|
|
Packit |
13e616 |
status = IB_ERROR;
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"ERR 5404: Vendor specific bind failed (%s)\n",
|
|
Packit |
13e616 |
ib_get_err_str(status));
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Exit:
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
return status;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Unbind the PerfMgr from the vendor layer for MAD sends/receives
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void perfmgr_mad_unbind(osm_perfmgr_t * pm)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
if (pm->bind_handle == OSM_BIND_INVALID_HANDLE) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5405: No previous bind\n");
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
osm_vendor_unbind(pm->bind_handle);
|
|
Packit |
13e616 |
Exit:
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Given a monitored node and a port, return the qp
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static ib_net32_t get_qp(monitored_node_t * mon_node, uint8_t port)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
ib_net32_t qp = IB_QP1;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (mon_node && mon_node->num_ports && port < mon_node->num_ports &&
|
|
Packit |
13e616 |
mon_node->port[port].redirection && mon_node->port[port].qp)
|
|
Packit |
13e616 |
qp = mon_node->port[port].qp;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
return qp;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static ib_net16_t get_base_lid(osm_node_t * p_node, uint8_t port)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
switch (p_node->node_info.node_type) {
|
|
Packit |
13e616 |
case IB_NODE_TYPE_CA:
|
|
Packit |
13e616 |
case IB_NODE_TYPE_ROUTER:
|
|
Packit |
13e616 |
return osm_node_get_base_lid(p_node, port);
|
|
Packit |
13e616 |
case IB_NODE_TYPE_SWITCH:
|
|
Packit |
13e616 |
return osm_node_get_base_lid(p_node, 0);
|
|
Packit |
13e616 |
default:
|
|
Packit |
13e616 |
return 0;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Given a node, a port, and an optional monitored node,
|
|
Packit |
13e616 |
* return the lid appropriate to query that port
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static ib_net16_t get_lid(osm_node_t * p_node, uint8_t port,
|
|
Packit |
13e616 |
monitored_node_t * mon_node)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
if (mon_node && mon_node->num_ports && port < mon_node->num_ports &&
|
|
Packit |
13e616 |
mon_node->port[port].lid)
|
|
Packit |
13e616 |
return mon_node->port[port].lid;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
return get_base_lid(p_node, port);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Build a Performance Management class MAD
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static osm_madw_t *perfmgr_build_mad(osm_perfmgr_t * perfmgr,
|
|
Packit |
13e616 |
ib_net16_t dest_lid,
|
|
Packit |
13e616 |
uint8_t sl,
|
|
Packit |
13e616 |
ib_net32_t dest_qp,
|
|
Packit |
13e616 |
uint16_t pkey_ix,
|
|
Packit |
13e616 |
uint8_t mad_method,
|
|
Packit |
13e616 |
ib_net16_t attr_id,
|
|
Packit |
13e616 |
osm_madw_context_t * p_context,
|
|
Packit |
13e616 |
ib_perfmgt_mad_t ** p_pm_mad)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
ib_perfmgt_mad_t *pm_mad = NULL;
|
|
Packit |
13e616 |
osm_madw_t *p_madw = NULL;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(perfmgr->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
p_madw = osm_mad_pool_get(perfmgr->mad_pool, perfmgr->bind_handle,
|
|
Packit |
13e616 |
MAD_BLOCK_SIZE, NULL);
|
|
Packit |
13e616 |
if (p_madw == NULL)
|
|
Packit |
13e616 |
return NULL;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
pm_mad = osm_madw_get_perfmgt_mad_ptr(p_madw);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* build the mad */
|
|
Packit |
13e616 |
pm_mad->header.base_ver = 1;
|
|
Packit |
13e616 |
pm_mad->header.mgmt_class = IB_MCLASS_PERF;
|
|
Packit |
13e616 |
pm_mad->header.class_ver = 1;
|
|
Packit |
13e616 |
pm_mad->header.method = mad_method;
|
|
Packit |
13e616 |
pm_mad->header.status = 0;
|
|
Packit |
13e616 |
pm_mad->header.class_spec = 0;
|
|
Packit |
13e616 |
pm_mad->header.trans_id =
|
|
Packit |
13e616 |
cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) &
|
|
Packit |
13e616 |
(uint64_t) (0xFFFFFFFF));
|
|
Packit |
13e616 |
if (perfmgr->trans_id == 0)
|
|
Packit |
13e616 |
pm_mad->header.trans_id =
|
|
Packit |
13e616 |
cl_hton64((uint64_t) cl_atomic_inc(&perfmgr->trans_id) &
|
|
Packit |
13e616 |
(uint64_t) (0xFFFFFFFF));
|
|
Packit |
13e616 |
pm_mad->header.attr_id = attr_id;
|
|
Packit |
13e616 |
pm_mad->header.resv = 0;
|
|
Packit |
13e616 |
pm_mad->header.attr_mod = 0;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
p_madw->mad_addr.dest_lid = dest_lid;
|
|
Packit |
13e616 |
p_madw->mad_addr.addr_type.gsi.remote_qp = dest_qp;
|
|
Packit |
13e616 |
p_madw->mad_addr.addr_type.gsi.remote_qkey =
|
|
Packit |
13e616 |
cl_hton32(IB_QP1_WELL_KNOWN_Q_KEY);
|
|
Packit |
13e616 |
p_madw->mad_addr.addr_type.gsi.pkey_ix = pkey_ix;
|
|
Packit |
13e616 |
p_madw->mad_addr.addr_type.gsi.service_level = sl;
|
|
Packit |
13e616 |
p_madw->mad_addr.addr_type.gsi.global_route = FALSE;
|
|
Packit |
13e616 |
p_madw->resp_expected = TRUE;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (p_context)
|
|
Packit |
13e616 |
p_madw->context = *p_context;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (p_pm_mad)
|
|
Packit |
13e616 |
*p_pm_mad = pm_mad;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_EXIT(perfmgr->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
return (p_madw);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Send a Performance Management class MAD
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static ib_api_status_t perfmgr_send_mad(osm_perfmgr_t *perfmgr,
|
|
Packit |
13e616 |
osm_madw_t * const p_madw)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
cl_status_t sts;
|
|
Packit |
13e616 |
ib_api_status_t status = osm_vendor_send(perfmgr->bind_handle, p_madw,
|
|
Packit |
13e616 |
TRUE);
|
|
Packit |
13e616 |
if (status == IB_SUCCESS) {
|
|
Packit |
13e616 |
/* pause thread if there are too many outstanding requests */
|
|
Packit |
13e616 |
cl_atomic_inc(&(perfmgr->outstanding_queries));
|
|
Packit |
13e616 |
while (perfmgr->outstanding_queries >
|
|
Packit |
13e616 |
(int32_t)perfmgr->max_outstanding_queries) {
|
|
Packit |
13e616 |
cl_spinlock_acquire(&perfmgr->lock);
|
|
Packit |
13e616 |
if (perfmgr->sweep_state == PERFMGR_SWEEP_SLEEP) {
|
|
Packit |
13e616 |
perfmgr->sweep_state = PERFMGR_SWEEP_POST_PROCESSING;
|
|
Packit |
13e616 |
OSM_LOG(perfmgr->log, OSM_LOG_INFO,
|
|
Packit |
13e616 |
"PM sweep state going into Post Processing\n");
|
|
Packit |
13e616 |
} else if (perfmgr->sweep_state == PERFMGR_SWEEP_ACTIVE)
|
|
Packit |
13e616 |
perfmgr->sweep_state = PERFMGR_SWEEP_SUSPENDED;
|
|
Packit |
13e616 |
cl_spinlock_release(&perfmgr->lock);
|
|
Packit |
13e616 |
wait:
|
|
Packit |
13e616 |
sts = cl_event_wait_on(&perfmgr->sig_query,
|
|
Packit |
13e616 |
EVENT_NO_TIMEOUT, TRUE);
|
|
Packit |
13e616 |
if (sts != CL_SUCCESS)
|
|
Packit |
13e616 |
goto wait;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
cl_spinlock_acquire(&perfmgr->lock);
|
|
Packit |
13e616 |
if (perfmgr->sweep_state == PERFMGR_SWEEP_SUSPENDED)
|
|
Packit |
13e616 |
perfmgr->sweep_state = PERFMGR_SWEEP_ACTIVE;
|
|
Packit |
13e616 |
cl_spinlock_release(&perfmgr->lock);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
return (status);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Form and send the PortCounters MAD for a single port
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static ib_api_status_t perfmgr_send_pc_mad(osm_perfmgr_t * perfmgr,
|
|
Packit |
13e616 |
ib_net16_t dest_lid,
|
|
Packit |
13e616 |
ib_net32_t dest_qp, uint16_t pkey_ix,
|
|
Packit |
13e616 |
uint8_t port, uint8_t mad_method,
|
|
Packit |
13e616 |
uint16_t counter_select,
|
|
Packit |
13e616 |
uint8_t counter_select2,
|
|
Packit |
13e616 |
osm_madw_context_t * p_context,
|
|
Packit |
13e616 |
uint8_t sl)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
ib_api_status_t status = IB_SUCCESS;
|
|
Packit |
13e616 |
ib_port_counters_t *port_counter = NULL;
|
|
Packit |
13e616 |
ib_perfmgt_mad_t *pm_mad = NULL;
|
|
Packit |
13e616 |
osm_madw_t *p_madw = NULL;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(perfmgr->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_PORT_CNTRS;
|
|
Packit |
13e616 |
p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix,
|
|
Packit |
13e616 |
mad_method, IB_MAD_ATTR_PORT_CNTRS, p_context,
|
|
Packit |
13e616 |
&pm_mad);
|
|
Packit |
13e616 |
if (p_madw == NULL)
|
|
Packit |
13e616 |
return IB_INSUFFICIENT_MEMORY;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
port_counter = (ib_port_counters_t *) & pm_mad->data;
|
|
Packit |
13e616 |
memset(port_counter, 0, sizeof(*port_counter));
|
|
Packit |
13e616 |
port_counter->port_select = port;
|
|
Packit |
13e616 |
port_counter->counter_select = cl_hton16(counter_select);
|
|
Packit |
13e616 |
port_counter->counter_select2 = counter_select2;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
status = perfmgr_send_mad(perfmgr, p_madw);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_EXIT(perfmgr->log);
|
|
Packit |
13e616 |
return status;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* sweep the node_guid_tbl and collect the node guids to be tracked
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void collect_guids(cl_map_item_t * p_map_item, void *context)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
osm_node_t *node = (osm_node_t *) p_map_item;
|
|
Packit |
13e616 |
uint64_t node_guid = cl_ntoh64(node->node_info.node_guid);
|
|
Packit |
13e616 |
osm_perfmgr_t *pm = (osm_perfmgr_t *) context;
|
|
Packit |
13e616 |
monitored_node_t *mon_node = NULL;
|
|
Packit |
13e616 |
uint32_t num_ports;
|
|
Packit |
13e616 |
unsigned port;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (cl_qmap_get(&pm->monitored_map, node_guid) ==
|
|
Packit |
13e616 |
cl_qmap_end(&pm->monitored_map)) {
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (pm->ignore_cas &&
|
|
Packit |
13e616 |
(node->node_info.node_type == IB_NODE_TYPE_CA))
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* if not already in map add it */
|
|
Packit |
13e616 |
num_ports = osm_node_get_num_physp(node);
|
|
Packit |
13e616 |
mon_node = malloc(sizeof(*mon_node) +
|
|
Packit |
13e616 |
sizeof(monitored_port_t) * num_ports);
|
|
Packit |
13e616 |
if (!mon_node) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5406: "
|
|
Packit |
13e616 |
"malloc failed: not handling node %s"
|
|
Packit |
13e616 |
"(GUID 0x%" PRIx64 ")\n", node->print_desc,
|
|
Packit |
13e616 |
node_guid);
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
memset(mon_node, 0,
|
|
Packit |
13e616 |
sizeof(*mon_node) + sizeof(monitored_port_t) * num_ports);
|
|
Packit |
13e616 |
mon_node->guid = node_guid;
|
|
Packit |
13e616 |
mon_node->name = strdup(node->print_desc);
|
|
Packit |
13e616 |
mon_node->num_ports = num_ports;
|
|
Packit |
13e616 |
mon_node->node_type = node->node_info.node_type;
|
|
Packit |
13e616 |
/* check for enhanced switch port 0 */
|
|
Packit |
13e616 |
mon_node->esp0 = (node->sw &&
|
|
Packit |
13e616 |
ib_switch_info_is_enhanced_port0(&node->sw->
|
|
Packit |
13e616 |
switch_info));
|
|
Packit |
13e616 |
for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) {
|
|
Packit |
13e616 |
monitored_port_t *mon_port = &mon_node->port[port];
|
|
Packit |
13e616 |
osm_physp_t *p_physp = &node->physp_table[port];
|
|
Packit |
13e616 |
osm_physp_t *p_remote_physp = p_physp->p_remote_physp;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
mon_port->orig_lid = 0;
|
|
Packit |
13e616 |
mon_port->valid = FALSE;
|
|
Packit |
13e616 |
if (osm_physp_is_valid(p_physp)) {
|
|
Packit |
13e616 |
mon_port->orig_lid = get_base_lid(node, port);
|
|
Packit |
13e616 |
mon_port->valid = TRUE;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
mon_port->remote_valid = FALSE;
|
|
Packit |
13e616 |
mon_port->remote_name = NULL;
|
|
Packit |
13e616 |
if (p_remote_physp && osm_physp_is_valid(p_remote_physp)) {
|
|
Packit |
13e616 |
osm_node_t *p_remote_node = p_remote_physp->p_node;
|
|
Packit |
13e616 |
mon_port->remote_valid = TRUE;
|
|
Packit |
13e616 |
mon_port->remote_guid = p_remote_node->node_info.node_guid;
|
|
Packit |
13e616 |
mon_port->remote_name = strdup(p_remote_node->print_desc);
|
|
Packit |
13e616 |
mon_port->remote_port = p_remote_physp->port_num;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
cl_qmap_insert(&pm->monitored_map, node_guid,
|
|
Packit |
13e616 |
(cl_map_item_t *) mon_node);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Exit:
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Form and send the ClassPortInfo MAD for a single port
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static ib_api_status_t perfmgr_send_cpi_mad(osm_perfmgr_t * pm,
|
|
Packit |
13e616 |
ib_net16_t dest_lid,
|
|
Packit |
13e616 |
ib_net32_t dest_qp,
|
|
Packit |
13e616 |
uint16_t pkey_ix,
|
|
Packit |
13e616 |
uint8_t port,
|
|
Packit |
13e616 |
osm_madw_context_t * p_context,
|
|
Packit |
13e616 |
uint8_t sl)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
ib_api_status_t status = IB_SUCCESS;
|
|
Packit |
13e616 |
osm_madw_t *p_madw = NULL;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_CLASS_PORT_INFO;
|
|
Packit |
13e616 |
p_madw = perfmgr_build_mad(pm, dest_lid, sl, dest_qp,
|
|
Packit |
13e616 |
pkey_ix, IB_MAD_METHOD_GET,
|
|
Packit |
13e616 |
IB_MAD_ATTR_CLASS_PORT_INFO, p_context,
|
|
Packit |
13e616 |
NULL);
|
|
Packit |
13e616 |
if (p_madw == NULL)
|
|
Packit |
13e616 |
return IB_INSUFFICIENT_MEMORY;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
status = perfmgr_send_mad(pm, p_madw);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
return status;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* return if some form of PortCountersExtended (PCE || PCE NoIETF) are supported
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static inline boolean_t pce_supported(monitored_node_t *mon_node, uint8_t port)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
monitored_port_t *mon_port = &(mon_node->port[port]);
|
|
Packit |
13e616 |
return (mon_port->cpi_valid
|
|
Packit |
13e616 |
&& (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED
|
|
Packit |
13e616 |
|| mon_port->cap_mask & IB_PM_EXT_WIDTH_NOIETF_SUP));
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* return if CapMask.PortCountersXmitWaitSupported is set
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static inline boolean_t xmit_wait_supported(monitored_node_t *mon_node, uint8_t port)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
monitored_port_t *mon_port = &(mon_node->port[port]);
|
|
Packit |
13e616 |
return (mon_port->cpi_valid
|
|
Packit |
13e616 |
&& (mon_port->cap_mask & IB_PM_PC_XMIT_WAIT_SUP));
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* return if "full" PortCountersExtended (IETF) is indicated
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static inline boolean_t ietf_supported(monitored_node_t *mon_node, uint8_t port)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
monitored_port_t *mon_port = &(mon_node->port[port]);
|
|
Packit |
13e616 |
return (mon_port->cpi_valid
|
|
Packit |
13e616 |
&& (mon_port->cap_mask & IB_PM_EXT_WIDTH_SUPPORTED));
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Form and send the PortCountersExtended MAD for a single port
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static ib_api_status_t perfmgr_send_pce_mad(osm_perfmgr_t * perfmgr,
|
|
Packit |
13e616 |
ib_net16_t dest_lid,
|
|
Packit |
13e616 |
ib_net32_t dest_qp,
|
|
Packit |
13e616 |
uint16_t pkey_ix,
|
|
Packit |
13e616 |
uint8_t port, uint8_t mad_method,
|
|
Packit |
13e616 |
osm_madw_context_t * p_context,
|
|
Packit |
13e616 |
uint8_t sl)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
ib_api_status_t status = IB_SUCCESS;
|
|
Packit |
13e616 |
ib_port_counters_ext_t *port_counter_ext = NULL;
|
|
Packit |
13e616 |
ib_perfmgt_mad_t *pm_mad = NULL;
|
|
Packit |
13e616 |
osm_madw_t *p_madw = NULL;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(perfmgr->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
p_context->perfmgr_context.mad_attr_id = IB_MAD_ATTR_PORT_CNTRS_EXT;
|
|
Packit |
13e616 |
p_madw = perfmgr_build_mad(perfmgr, dest_lid, sl, dest_qp, pkey_ix,
|
|
Packit |
13e616 |
mad_method, IB_MAD_ATTR_PORT_CNTRS_EXT, p_context,
|
|
Packit |
13e616 |
&pm_mad);
|
|
Packit |
13e616 |
if (p_madw == NULL)
|
|
Packit |
13e616 |
return IB_INSUFFICIENT_MEMORY;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
port_counter_ext = (ib_port_counters_ext_t *) & pm_mad->data;
|
|
Packit |
13e616 |
memset(port_counter_ext, 0, sizeof(*port_counter_ext));
|
|
Packit |
13e616 |
port_counter_ext->port_select = port;
|
|
Packit |
13e616 |
port_counter_ext->counter_select = cl_hton16(0x00FF);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
status = perfmgr_send_mad(perfmgr, p_madw);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_EXIT(perfmgr->log);
|
|
Packit |
13e616 |
return status;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* query the Port Counters of all the nodes in the subnet
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void perfmgr_query_counters(cl_map_item_t * p_map_item, void *context)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
ib_api_status_t status = IB_SUCCESS;
|
|
Packit |
13e616 |
osm_perfmgr_t *pm = context;
|
|
Packit |
13e616 |
osm_node_t *node = NULL;
|
|
Packit |
13e616 |
monitored_node_t *mon_node = (monitored_node_t *) p_map_item;
|
|
Packit |
13e616 |
osm_madw_context_t mad_context;
|
|
Packit |
13e616 |
uint64_t node_guid = 0;
|
|
Packit |
13e616 |
ib_net32_t remote_qp;
|
|
Packit |
13e616 |
uint8_t port, num_ports = 0;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
cl_plock_acquire(&pm->osm->lock);
|
|
Packit |
13e616 |
node = osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid));
|
|
Packit |
13e616 |
if (!node) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"ERR 5407: Node \"%s\" (guid 0x%" PRIx64
|
|
Packit |
13e616 |
") no longer exists so removing from PerfMgr monitoring\n",
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid);
|
|
Packit |
13e616 |
mark_for_removal(pm, mon_node);
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
num_ports = osm_node_get_num_physp(node);
|
|
Packit |
13e616 |
node_guid = cl_ntoh64(node->node_info.node_guid);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* make sure there is a database object ready to store this info */
|
|
Packit |
13e616 |
if (perfmgr_db_create_entry(pm->db, node_guid, mon_node->esp0,
|
|
Packit |
13e616 |
num_ports, node->print_desc) !=
|
|
Packit |
13e616 |
PERFMGR_EVENT_DB_SUCCESS) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"ERR 5408: DB create entry failed for 0x%"
|
|
Packit |
13e616 |
PRIx64 " (%s) : %s\n", node_guid, node->print_desc,
|
|
Packit |
13e616 |
strerror(errno));
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
perfmgr_db_mark_active(pm->db, node_guid, TRUE);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* issue the query for each port */
|
|
Packit |
13e616 |
for (port = mon_node->esp0 ? 0 : 1; port < num_ports; port++) {
|
|
Packit |
13e616 |
ib_net16_t lid;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (!osm_node_get_physp_ptr(node, port))
|
|
Packit |
13e616 |
continue;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (!mon_node->port[port].valid)
|
|
Packit |
13e616 |
continue;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
lid = get_lid(node, port, mon_node);
|
|
Packit |
13e616 |
if (lid == 0) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_DEBUG, "WARN: node 0x%" PRIx64
|
|
Packit |
13e616 |
" port %d (%s): port out of range, skipping\n",
|
|
Packit |
13e616 |
cl_ntoh64(node->node_info.node_guid), port,
|
|
Packit |
13e616 |
node->print_desc);
|
|
Packit |
13e616 |
continue;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
remote_qp = get_qp(mon_node, port);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
mad_context.perfmgr_context.node_guid = node_guid;
|
|
Packit |
13e616 |
mad_context.perfmgr_context.port = port;
|
|
Packit |
13e616 |
mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_GET;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (pm->query_cpi && !mon_node->port[port].cpi_valid) {
|
|
Packit |
13e616 |
status = perfmgr_send_cpi_mad(pm, lid, remote_qp,
|
|
Packit |
13e616 |
mon_node->port[port].pkey_ix,
|
|
Packit |
13e616 |
port, &mad_context,
|
|
Packit |
13e616 |
0); /* FIXME SL != 0 */
|
|
Packit |
13e616 |
if (status != IB_SUCCESS)
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5410: "
|
|
Packit |
13e616 |
"Failed to issue ClassPortInfo query "
|
|
Packit |
13e616 |
"for node 0x%" PRIx64
|
|
Packit |
13e616 |
" port %d (%s)\n",
|
|
Packit |
13e616 |
node->node_info.node_guid, port,
|
|
Packit |
13e616 |
node->print_desc);
|
|
Packit |
13e616 |
if (mon_node->node_type == IB_NODE_TYPE_SWITCH)
|
|
Packit |
13e616 |
goto Exit; /* only need to issue 1 CPI query
|
|
Packit |
13e616 |
for switches */
|
|
Packit |
13e616 |
} else {
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
#ifdef ENABLE_OSM_PERF_MGR_PROFILE
|
|
Packit |
13e616 |
gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
|
|
Packit |
13e616 |
#endif
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Getting stats for node 0x%"
|
|
Packit |
13e616 |
PRIx64 " port %d (lid %u) (%s)\n",
|
|
Packit |
13e616 |
node_guid, port, cl_ntoh16(lid),
|
|
Packit |
13e616 |
node->print_desc);
|
|
Packit |
13e616 |
status = perfmgr_send_pc_mad(pm, lid, remote_qp,
|
|
Packit |
13e616 |
mon_node->port[port].pkey_ix,
|
|
Packit |
13e616 |
port, IB_MAD_METHOD_GET,
|
|
Packit |
13e616 |
0xffff,
|
|
Packit |
13e616 |
1,
|
|
Packit |
13e616 |
&mad_context,
|
|
Packit |
13e616 |
0); /* FIXME SL != 0 */
|
|
Packit |
13e616 |
if (status != IB_SUCCESS)
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5409: "
|
|
Packit |
13e616 |
"Failed to issue port counter query for node 0x%"
|
|
Packit |
13e616 |
PRIx64 " port %d (%s)\n",
|
|
Packit |
13e616 |
node->node_info.node_guid, port,
|
|
Packit |
13e616 |
node->print_desc);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (pce_supported(mon_node, port)) {
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
#if ENABLE_OSM_PERF_MGR_PROFILE
|
|
Packit |
13e616 |
gettimeofday(&mad_context.perfmgr_context.query_start, NULL);
|
|
Packit |
13e616 |
#endif
|
|
Packit |
13e616 |
status = perfmgr_send_pce_mad(pm, lid, remote_qp,
|
|
Packit |
13e616 |
mon_node->port[port].pkey_ix,
|
|
Packit |
13e616 |
port,
|
|
Packit |
13e616 |
IB_MAD_METHOD_GET,
|
|
Packit |
13e616 |
&mad_context,
|
|
Packit |
13e616 |
0); /* FIXME SL != 0 */
|
|
Packit |
13e616 |
if (status != IB_SUCCESS)
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"ERR 5417: Failed to issue "
|
|
Packit |
13e616 |
"port counter query for "
|
|
Packit |
13e616 |
"node 0x%" PRIx64 " port "
|
|
Packit |
13e616 |
"%d (%s)\n",
|
|
Packit |
13e616 |
node->node_info.node_guid,
|
|
Packit |
13e616 |
port,
|
|
Packit |
13e616 |
node->print_desc);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
Exit:
|
|
Packit |
13e616 |
cl_plock_release(&pm->osm->lock);
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Discovery stuff
|
|
Packit |
13e616 |
* This code should not be here, but merged with main OpenSM
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
extern int wait_for_pending_transactions(osm_stats_t * stats);
|
|
Packit |
13e616 |
extern void osm_drop_mgr_process(IN osm_sm_t * sm);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static int sweep_hop_1(osm_sm_t * sm)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
ib_api_status_t status = IB_SUCCESS;
|
|
Packit |
13e616 |
osm_madw_context_t context;
|
|
Packit |
13e616 |
osm_node_t *p_node;
|
|
Packit |
13e616 |
osm_port_t *p_port;
|
|
Packit |
13e616 |
osm_dr_path_t hop_1_path;
|
|
Packit |
13e616 |
ib_net64_t port_guid;
|
|
Packit |
13e616 |
uint8_t port_num;
|
|
Packit |
13e616 |
uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
|
|
Packit |
13e616 |
uint8_t num_ports;
|
|
Packit |
13e616 |
osm_physp_t *p_ext_physp;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
port_guid = sm->p_subn->sm_port_guid;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
|
|
Packit |
13e616 |
if (!p_port) {
|
|
Packit |
13e616 |
OSM_LOG(sm->p_log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"ERR 5481: No SM port object\n");
|
|
Packit |
13e616 |
return -1;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
p_node = p_port->p_node;
|
|
Packit |
13e616 |
port_num = ib_node_info_get_local_port_num(&p_node->node_info);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
|
|
Packit |
13e616 |
"Probing hop 1 on local port %u\n", port_num);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
memset(path_array, 0, sizeof(path_array));
|
|
Packit |
13e616 |
/* the hop_1 operations depend on the type of our node.
|
|
Packit |
13e616 |
* Currently - legal nodes that can host SM are SW and CA */
|
|
Packit |
13e616 |
switch (osm_node_get_type(p_node)) {
|
|
Packit |
13e616 |
case IB_NODE_TYPE_CA:
|
|
Packit |
13e616 |
case IB_NODE_TYPE_ROUTER:
|
|
Packit |
13e616 |
memset(&context, 0, sizeof(context));
|
|
Packit |
13e616 |
context.ni_context.node_guid = osm_node_get_node_guid(p_node);
|
|
Packit |
13e616 |
context.ni_context.port_num = port_num;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
path_array[1] = port_num;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
osm_dr_path_init(&hop_1_path, 1, path_array);
|
|
Packit |
13e616 |
CL_PLOCK_ACQUIRE(sm->p_lock);
|
|
Packit |
13e616 |
status = osm_req_get(sm, &hop_1_path, IB_MAD_ATTR_NODE_INFO, 0,
|
|
Packit |
13e616 |
TRUE, 0, 0, CL_DISP_MSGID_NONE, &context);
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(sm->p_lock);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (status != IB_SUCCESS)
|
|
Packit |
13e616 |
OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5482: "
|
|
Packit |
13e616 |
"Request for NodeInfo failed\n");
|
|
Packit |
13e616 |
break;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
case IB_NODE_TYPE_SWITCH:
|
|
Packit |
13e616 |
/* Need to go over all the ports of the switch, and send a node_info
|
|
Packit |
13e616 |
* from them. This doesn't include the port 0 of the switch, which
|
|
Packit |
13e616 |
* hosts the SM.
|
|
Packit |
13e616 |
* Note: We'll send another switchInfo on port 0, since if no ports
|
|
Packit |
13e616 |
* are connected, we still want to get some response, and have the
|
|
Packit |
13e616 |
* subnet come up.
|
|
Packit |
13e616 |
*/
|
|
Packit |
13e616 |
num_ports = osm_node_get_num_physp(p_node);
|
|
Packit |
13e616 |
for (port_num = 0; port_num < num_ports; port_num++) {
|
|
Packit |
13e616 |
/* go through the port only if the port is not DOWN */
|
|
Packit |
13e616 |
p_ext_physp = osm_node_get_physp_ptr(p_node, port_num);
|
|
Packit |
13e616 |
if (!p_ext_physp || ib_port_info_get_port_state
|
|
Packit |
13e616 |
(&p_ext_physp->port_info) <= IB_LINK_DOWN)
|
|
Packit |
13e616 |
continue;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
memset(&context, 0, sizeof(context));
|
|
Packit |
13e616 |
context.ni_context.node_guid =
|
|
Packit |
13e616 |
osm_node_get_node_guid(p_node);
|
|
Packit |
13e616 |
context.ni_context.port_num = port_num;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
path_array[1] = port_num;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
osm_dr_path_init(&hop_1_path, 1, path_array);
|
|
Packit |
13e616 |
CL_PLOCK_ACQUIRE(sm->p_lock);
|
|
Packit |
13e616 |
status = osm_req_get(sm, &hop_1_path,
|
|
Packit |
13e616 |
IB_MAD_ATTR_NODE_INFO, 0, TRUE, 0,
|
|
Packit |
13e616 |
0, CL_DISP_MSGID_NONE, &context);
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(sm->p_lock);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (status != IB_SUCCESS)
|
|
Packit |
13e616 |
OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5484: "
|
|
Packit |
13e616 |
"Request for NodeInfo failed\n");
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
break;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
default:
|
|
Packit |
13e616 |
OSM_LOG(sm->p_log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"ERR 5483: Unknown node type %d\n",
|
|
Packit |
13e616 |
osm_node_get_type(p_node));
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
return status;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static unsigned is_sm_port_down(osm_sm_t * sm)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
ib_net64_t port_guid;
|
|
Packit |
13e616 |
osm_port_t *p_port;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
port_guid = sm->p_subn->sm_port_guid;
|
|
Packit |
13e616 |
if (port_guid == 0)
|
|
Packit |
13e616 |
return 1;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
CL_PLOCK_ACQUIRE(sm->p_lock);
|
|
Packit |
13e616 |
p_port = osm_get_port_by_guid(sm->p_subn, port_guid);
|
|
Packit |
13e616 |
if (!p_port) {
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(sm->p_lock);
|
|
Packit |
13e616 |
OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 5485: "
|
|
Packit |
13e616 |
"SM port with GUID:%016" PRIx64 " is unknown\n",
|
|
Packit |
13e616 |
cl_ntoh64(port_guid));
|
|
Packit |
13e616 |
return 1;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(sm->p_lock);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (p_port->p_node->sw &&
|
|
Packit |
13e616 |
!ib_switch_info_is_enhanced_port0(&p_port->p_node->sw->switch_info))
|
|
Packit |
13e616 |
return 0; /* base SP0 */
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
return osm_physp_get_port_state(p_port->p_physp) == IB_LINK_DOWN;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static int sweep_hop_0(osm_sm_t * sm)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
ib_api_status_t status;
|
|
Packit |
13e616 |
osm_dr_path_t dr_path;
|
|
Packit |
13e616 |
osm_bind_handle_t h_bind;
|
|
Packit |
13e616 |
uint8_t path_array[IB_SUBNET_PATH_HOPS_MAX];
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
memset(path_array, 0, sizeof(path_array));
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
h_bind = osm_sm_mad_ctrl_get_bind_handle(&sm->mad_ctrl);
|
|
Packit |
13e616 |
if (h_bind == OSM_BIND_INVALID_HANDLE) {
|
|
Packit |
13e616 |
OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "No bound ports\n");
|
|
Packit |
13e616 |
return -1;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
osm_dr_path_init(&dr_path, 0, path_array);
|
|
Packit |
13e616 |
CL_PLOCK_ACQUIRE(sm->p_lock);
|
|
Packit |
13e616 |
status = osm_req_get(sm, &dr_path, IB_MAD_ATTR_NODE_INFO, 0,
|
|
Packit |
13e616 |
TRUE, 0, 0, CL_DISP_MSGID_NONE, NULL);
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(sm->p_lock);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (status != IB_SUCCESS)
|
|
Packit |
13e616 |
OSM_LOG(sm->p_log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"ERR 5486: Request for NodeInfo failed\n");
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
return status;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static void reset_node_count(cl_map_item_t * p_map_item, void *cxt)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
osm_node_t *p_node = (osm_node_t *) p_map_item;
|
|
Packit |
13e616 |
p_node->discovery_count = 0;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
memset(p_node->physp_discovered, 0,
|
|
Packit |
13e616 |
sizeof(uint8_t) * p_node->physp_tbl_size);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static void reset_port_count(cl_map_item_t * p_map_item, void *cxt)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
osm_port_t *p_port = (osm_port_t *) p_map_item;
|
|
Packit |
13e616 |
p_port->discovery_count = 0;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static void reset_switch_count(cl_map_item_t * p_map_item, void *cxt)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
osm_switch_t *p_sw = (osm_switch_t *) p_map_item;
|
|
Packit |
13e616 |
p_sw->need_update = 0;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static int perfmgr_discovery(osm_opensm_t * osm)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
int ret;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
CL_PLOCK_ACQUIRE(&osm->lock);
|
|
Packit |
13e616 |
cl_qmap_apply_func(&osm->subn.node_guid_tbl, reset_node_count, NULL);
|
|
Packit |
13e616 |
cl_qmap_apply_func(&osm->subn.port_guid_tbl, reset_port_count, NULL);
|
|
Packit |
13e616 |
cl_qmap_apply_func(&osm->subn.sw_guid_tbl, reset_switch_count, NULL);
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(&osm->lock);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
osm->subn.in_sweep_hop_0 = TRUE;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
ret = sweep_hop_0(&osm->sm);
|
|
Packit |
13e616 |
if (ret)
|
|
Packit |
13e616 |
goto _exit;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (wait_for_pending_transactions(&osm->stats))
|
|
Packit |
13e616 |
goto _exit;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (is_sm_port_down(&osm->sm)) {
|
|
Packit |
13e616 |
OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "SM port is down\n");
|
|
Packit |
13e616 |
goto _drop;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
osm->subn.in_sweep_hop_0 = FALSE;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
ret = sweep_hop_1(&osm->sm);
|
|
Packit |
13e616 |
if (ret)
|
|
Packit |
13e616 |
goto _exit;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (wait_for_pending_transactions(&osm->stats))
|
|
Packit |
13e616 |
goto _exit;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
_drop:
|
|
Packit |
13e616 |
osm_drop_mgr_process(&osm->sm);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
_exit:
|
|
Packit |
13e616 |
return ret;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Main PerfMgr processor - query the performance counters
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
void osm_perfmgr_process(osm_perfmgr_t * pm)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
#ifdef ENABLE_OSM_PERF_MGR_PROFILE
|
|
Packit |
13e616 |
struct timeval before, after;
|
|
Packit |
13e616 |
#endif
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (pm->state != PERFMGR_STATE_ENABLED)
|
|
Packit |
13e616 |
return;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
cl_spinlock_acquire(&pm->lock);
|
|
Packit |
13e616 |
if (pm->sweep_state == PERFMGR_SWEEP_ACTIVE ||
|
|
Packit |
13e616 |
pm->sweep_state == PERFMGR_SWEEP_SUSPENDED ||
|
|
Packit |
13e616 |
pm->sweep_state == PERFMGR_SWEEP_POST_PROCESSING) {
|
|
Packit |
13e616 |
cl_spinlock_release(&pm->lock);
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_INFO,
|
|
Packit |
13e616 |
"PM sweep state %d, skipping sweep\n",
|
|
Packit |
13e616 |
pm->sweep_state);
|
|
Packit |
13e616 |
return;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
pm->sweep_state = PERFMGR_SWEEP_ACTIVE;
|
|
Packit |
13e616 |
cl_spinlock_release(&pm->lock);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (pm->subn->sm_state == IB_SMINFO_STATE_STANDBY ||
|
|
Packit |
13e616 |
pm->subn->sm_state == IB_SMINFO_STATE_NOTACTIVE)
|
|
Packit |
13e616 |
perfmgr_discovery(pm->subn->p_osm);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* if redirection enabled, determine local port */
|
|
Packit |
13e616 |
if (pm->subn->opt.perfmgr_redir && pm->local_port == -1) {
|
|
Packit |
13e616 |
osm_node_t *p_node;
|
|
Packit |
13e616 |
osm_port_t *p_port;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
CL_PLOCK_ACQUIRE(pm->sm->p_lock);
|
|
Packit |
13e616 |
p_port = osm_get_port_by_guid(pm->subn, pm->port_guid);
|
|
Packit |
13e616 |
if (p_port) {
|
|
Packit |
13e616 |
p_node = p_port->p_node;
|
|
Packit |
13e616 |
CL_ASSERT(p_node);
|
|
Packit |
13e616 |
pm->local_port =
|
|
Packit |
13e616 |
ib_node_info_get_local_port_num(&p_node->node_info);
|
|
Packit |
13e616 |
} else
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"ERR 5487: No PerfMgr port object for "
|
|
Packit |
13e616 |
"port GUID 0x%" PRIx64 "\n",
|
|
Packit |
13e616 |
cl_ntoh64(pm->port_guid));
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(pm->sm->p_lock);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
#ifdef ENABLE_OSM_PERF_MGR_PROFILE
|
|
Packit |
13e616 |
gettimeofday(&before, NULL);
|
|
Packit |
13e616 |
#endif
|
|
Packit |
13e616 |
/* With the global lock held, collect the node guids */
|
|
Packit |
13e616 |
/* FIXME we should be able to track SA notices
|
|
Packit |
13e616 |
* and not have to sweep the node_guid_tbl each pass
|
|
Packit |
13e616 |
*/
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Gathering PerfMgr stats\n");
|
|
Packit |
13e616 |
cl_plock_acquire(&pm->osm->lock);
|
|
Packit |
13e616 |
cl_qmap_apply_func(&pm->subn->node_guid_tbl, collect_guids, pm);
|
|
Packit |
13e616 |
cl_plock_release(&pm->osm->lock);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* then for each node query their counters */
|
|
Packit |
13e616 |
cl_qmap_apply_func(&pm->monitored_map, perfmgr_query_counters, pm);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* clean out any nodes found to be removed during the sweep */
|
|
Packit |
13e616 |
remove_marked_nodes(pm);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
#ifdef ENABLE_OSM_PERF_MGR_PROFILE
|
|
Packit |
13e616 |
gettimeofday(&after, NULL);
|
|
Packit |
13e616 |
diff_time(&before, &after, &after);
|
|
Packit |
13e616 |
osm_log_v2(pm->log, OSM_LOG_INFO, FILE_ID,
|
|
Packit |
13e616 |
"PerfMgr total sweep time : %ld.%06ld s\n"
|
|
Packit |
13e616 |
" fastest mad : %g us\n"
|
|
Packit |
13e616 |
" slowest mad : %g us\n"
|
|
Packit |
13e616 |
" average mad : %g us\n",
|
|
Packit |
13e616 |
after.tv_sec, after.tv_usec, perfmgr_mad_stats.fastest_us,
|
|
Packit |
13e616 |
perfmgr_mad_stats.slowest_us, perfmgr_mad_stats.avg_us);
|
|
Packit |
13e616 |
clear_mad_stats();
|
|
Packit |
13e616 |
#endif
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
cl_spinlock_acquire(&pm->lock);
|
|
Packit |
13e616 |
pm->sweep_state = PERFMGR_SWEEP_SLEEP;
|
|
Packit |
13e616 |
cl_spinlock_release(&pm->lock);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* PerfMgr timer - loop continuously and signal SM to run PerfMgr
|
|
Packit |
13e616 |
* processor if enabled
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void perfmgr_sweep(void *arg)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
osm_perfmgr_t *pm = arg;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
osm_sm_signal(pm->sm, OSM_SIGNAL_PERFMGR_SWEEP);
|
|
Packit |
13e616 |
cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
void osm_perfmgr_shutdown(osm_perfmgr_t * pm)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
cl_timer_stop(&pm->sweep_timer);
|
|
Packit |
13e616 |
cl_disp_unregister(pm->pc_disp_h);
|
|
Packit |
13e616 |
perfmgr_mad_unbind(pm);
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
void osm_perfmgr_destroy(osm_perfmgr_t * pm)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
perfmgr_db_destroy(pm->db);
|
|
Packit |
13e616 |
cl_timer_destroy(&pm->sweep_timer);
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Detect if someone else on the network could have cleared the counters
|
|
Packit |
13e616 |
* without us knowing. This is easy to detect because the counters never
|
|
Packit |
13e616 |
* wrap but are "sticky".
|
|
Packit |
13e616 |
*
|
|
Packit |
13e616 |
* The one time this will not work is if the port is getting errors fast
|
|
Packit |
13e616 |
* enough to have the reading overtake the previous reading. In this case,
|
|
Packit |
13e616 |
* counters will be missed.
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void perfmgr_check_oob_clear(osm_perfmgr_t * pm,
|
|
Packit |
13e616 |
monitored_node_t * mon_node, uint8_t port,
|
|
Packit |
13e616 |
perfmgr_db_err_reading_t * cr)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
perfmgr_db_err_reading_t prev_err;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_err)
|
|
Packit |
13e616 |
!= PERFMGR_EVENT_DB_SUCCESS) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous "
|
|
Packit |
13e616 |
"error reading for %s (guid 0x%" PRIx64 ") port %u\n",
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port);
|
|
Packit |
13e616 |
return;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_DEBUG,
|
|
Packit |
13e616 |
"Errors vs previous node %s (0x%" PRIx64 ") port %u\n"
|
|
Packit |
13e616 |
"SE: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"LE: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"LD: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"RE: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"RPE: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"SRE: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"XD: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"XCE: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"RCE: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"LI: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"BO: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"VL15: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"XW: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
,
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port,
|
|
Packit |
13e616 |
cr->symbol_err_cnt, prev_err.symbol_err_cnt,
|
|
Packit |
13e616 |
cr->link_err_recover, prev_err.link_err_recover,
|
|
Packit |
13e616 |
cr->link_downed, prev_err.link_downed,
|
|
Packit |
13e616 |
cr->rcv_err, prev_err.rcv_err,
|
|
Packit |
13e616 |
cr->rcv_rem_phys_err, prev_err.rcv_rem_phys_err,
|
|
Packit |
13e616 |
cr->rcv_switch_relay_err, prev_err.rcv_switch_relay_err,
|
|
Packit |
13e616 |
cr->xmit_discards, prev_err.xmit_discards,
|
|
Packit |
13e616 |
cr->xmit_constraint_err, prev_err.xmit_constraint_err,
|
|
Packit |
13e616 |
cr->rcv_constraint_err, prev_err.rcv_constraint_err,
|
|
Packit |
13e616 |
cr->link_integrity, prev_err.link_integrity,
|
|
Packit |
13e616 |
cr->buffer_overrun, prev_err.buffer_overrun,
|
|
Packit |
13e616 |
cr->vl15_dropped, prev_err.vl15_dropped,
|
|
Packit |
13e616 |
cr->xmit_wait, prev_err.xmit_wait);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (cr->symbol_err_cnt < prev_err.symbol_err_cnt ||
|
|
Packit |
13e616 |
cr->link_err_recover < prev_err.link_err_recover ||
|
|
Packit |
13e616 |
cr->link_downed < prev_err.link_downed ||
|
|
Packit |
13e616 |
cr->rcv_err < prev_err.rcv_err ||
|
|
Packit |
13e616 |
cr->rcv_rem_phys_err < prev_err.rcv_rem_phys_err ||
|
|
Packit |
13e616 |
cr->rcv_switch_relay_err < prev_err.rcv_switch_relay_err ||
|
|
Packit |
13e616 |
cr->xmit_discards < prev_err.xmit_discards ||
|
|
Packit |
13e616 |
cr->xmit_constraint_err < prev_err.xmit_constraint_err ||
|
|
Packit |
13e616 |
cr->rcv_constraint_err < prev_err.rcv_constraint_err ||
|
|
Packit |
13e616 |
cr->link_integrity < prev_err.link_integrity ||
|
|
Packit |
13e616 |
cr->buffer_overrun < prev_err.buffer_overrun ||
|
|
Packit |
13e616 |
cr->vl15_dropped < prev_err.vl15_dropped ||
|
|
Packit |
13e616 |
cr->xmit_wait < prev_err.xmit_wait) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540A: "
|
|
Packit |
13e616 |
"Detected an out of band error clear "
|
|
Packit |
13e616 |
"on %s (0x%" PRIx64 ") port %u\n",
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port);
|
|
Packit |
13e616 |
perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Return 1 if the value is "close" to overflowing
|
|
Packit |
13e616 |
* "close" is defined at 25% for now
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static int counter_overflow_4(uint8_t val)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
return (val >= 10);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static int counter_overflow_8(uint8_t val)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
return (val >= (UINT8_MAX - (UINT8_MAX / 4)));
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static int counter_overflow_16(ib_net16_t val)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
return (cl_ntoh16(val) >= (UINT16_MAX - (UINT16_MAX / 4)));
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static int counter_overflow_32(ib_net32_t val)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
return (cl_ntoh32(val) >= (UINT32_MAX - (UINT32_MAX / 4)));
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static int counter_overflow_64(ib_net64_t val)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
return (cl_ntoh64(val) >= (UINT64_MAX - (UINT64_MAX / 4)));
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Check if the port counters have overflowed and if so issue a clear
|
|
Packit |
13e616 |
* MAD to the port
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void perfmgr_check_overflow(osm_perfmgr_t * pm,
|
|
Packit |
13e616 |
monitored_node_t * mon_node, int16_t pkey_ix,
|
|
Packit |
13e616 |
uint8_t port, ib_port_counters_t * pc,
|
|
Packit |
13e616 |
boolean_t xmit_wait_sup)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
osm_madw_context_t mad_context;
|
|
Packit |
13e616 |
ib_api_status_t status;
|
|
Packit |
13e616 |
ib_net32_t remote_qp;
|
|
Packit |
13e616 |
uint16_t counter_select;
|
|
Packit |
13e616 |
uint8_t counter_select2;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (counter_overflow_16(pc->symbol_err_cnt) ||
|
|
Packit |
13e616 |
counter_overflow_8(pc->link_err_recover) ||
|
|
Packit |
13e616 |
counter_overflow_8(pc->link_downed) ||
|
|
Packit |
13e616 |
counter_overflow_16(pc->rcv_err) ||
|
|
Packit |
13e616 |
counter_overflow_16(pc->rcv_rem_phys_err) ||
|
|
Packit |
13e616 |
counter_overflow_16(pc->rcv_switch_relay_err) ||
|
|
Packit |
13e616 |
counter_overflow_16(pc->xmit_discards) ||
|
|
Packit |
13e616 |
counter_overflow_8(pc->xmit_constraint_err) ||
|
|
Packit |
13e616 |
counter_overflow_8(pc->rcv_constraint_err) ||
|
|
Packit |
13e616 |
counter_overflow_4(PC_LINK_INT(pc->link_int_buffer_overrun)) ||
|
|
Packit |
13e616 |
counter_overflow_4(PC_BUF_OVERRUN(pc->link_int_buffer_overrun)) ||
|
|
Packit |
13e616 |
counter_overflow_16(pc->vl15_dropped) ||
|
|
Packit |
13e616 |
(xmit_wait_sup && counter_overflow_32(pc->xmit_wait)) ||
|
|
Packit |
13e616 |
(!pce_supported(mon_node, port) &&
|
|
Packit |
13e616 |
(counter_overflow_32(pc->xmit_data) ||
|
|
Packit |
13e616 |
counter_overflow_32(pc->rcv_data) ||
|
|
Packit |
13e616 |
counter_overflow_32(pc->xmit_pkts) ||
|
|
Packit |
13e616 |
counter_overflow_32(pc->rcv_pkts)))) {
|
|
Packit |
13e616 |
osm_node_t *p_node = NULL;
|
|
Packit |
13e616 |
ib_net16_t lid = 0;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (!mon_node->port[port].valid)
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
osm_log_v2(pm->log, OSM_LOG_VERBOSE, FILE_ID,
|
|
Packit |
13e616 |
"PerfMgr: Counter overflow: %s (0x%" PRIx64
|
|
Packit |
13e616 |
") port %d; clearing counters\n",
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
cl_plock_acquire(&pm->osm->lock);
|
|
Packit |
13e616 |
p_node =
|
|
Packit |
13e616 |
osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid));
|
|
Packit |
13e616 |
lid = get_lid(p_node, port, mon_node);
|
|
Packit |
13e616 |
cl_plock_release(&pm->osm->lock);
|
|
Packit |
13e616 |
if (lid == 0) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 540C: "
|
|
Packit |
13e616 |
"Failed to clear counters for %s (0x%"
|
|
Packit |
13e616 |
PRIx64 ") port %d; failed to get lid\n",
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port);
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
remote_qp = get_qp(NULL, port);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
mad_context.perfmgr_context.node_guid = mon_node->guid;
|
|
Packit |
13e616 |
mad_context.perfmgr_context.port = port;
|
|
Packit |
13e616 |
mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* apparently some HW uses the same counters for the 32 and 64
|
|
Packit |
13e616 |
* bit versions and a clear of them in the PortCounters
|
|
Packit |
13e616 |
* attribute also clears the ExtendedPortCounters equivalant
|
|
Packit |
13e616 |
* counters
|
|
Packit |
13e616 |
*/
|
|
Packit |
13e616 |
if (pce_supported(mon_node, port))
|
|
Packit |
13e616 |
counter_select = 0x0fff;
|
|
Packit |
13e616 |
else
|
|
Packit |
13e616 |
counter_select = 0xffff;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (xmit_wait_sup)
|
|
Packit |
13e616 |
counter_select2 = 1;
|
|
Packit |
13e616 |
else
|
|
Packit |
13e616 |
counter_select2 = 0;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
status = perfmgr_send_pc_mad(pm, lid, remote_qp, pkey_ix,
|
|
Packit |
13e616 |
port, IB_MAD_METHOD_SET,
|
|
Packit |
13e616 |
counter_select,
|
|
Packit |
13e616 |
counter_select2,
|
|
Packit |
13e616 |
&mad_context,
|
|
Packit |
13e616 |
0); /* FIXME SL != 0 */
|
|
Packit |
13e616 |
if (status != IB_SUCCESS)
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5411: "
|
|
Packit |
13e616 |
"Failed to send clear counters MAD for %s (0x%"
|
|
Packit |
13e616 |
PRIx64 ") port %d\n",
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
perfmgr_db_clear_prev_err(pm->db, mon_node->guid, port);
|
|
Packit |
13e616 |
if (!pce_supported(mon_node, port))
|
|
Packit |
13e616 |
perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Exit:
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Check if the port counters have overflowed and if so issue a clear
|
|
Packit |
13e616 |
* MAD to the port
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void perfmgr_check_pce_overflow(osm_perfmgr_t * pm,
|
|
Packit |
13e616 |
monitored_node_t * mon_node,
|
|
Packit |
13e616 |
int16_t pkey_ix,
|
|
Packit |
13e616 |
uint8_t port,
|
|
Packit |
13e616 |
ib_port_counters_ext_t * pc)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
osm_madw_context_t mad_context;
|
|
Packit |
13e616 |
ib_api_status_t status;
|
|
Packit |
13e616 |
ib_net32_t remote_qp;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (counter_overflow_64(pc->xmit_data) ||
|
|
Packit |
13e616 |
counter_overflow_64(pc->rcv_data) ||
|
|
Packit |
13e616 |
counter_overflow_64(pc->xmit_pkts) ||
|
|
Packit |
13e616 |
counter_overflow_64(pc->rcv_pkts) ||
|
|
Packit |
13e616 |
(ietf_supported(mon_node, port) &&
|
|
Packit |
13e616 |
(counter_overflow_64(pc->unicast_xmit_pkts) ||
|
|
Packit |
13e616 |
counter_overflow_64(pc->unicast_rcv_pkts) ||
|
|
Packit |
13e616 |
counter_overflow_64(pc->multicast_xmit_pkts) ||
|
|
Packit |
13e616 |
counter_overflow_64(pc->multicast_rcv_pkts)))) {
|
|
Packit |
13e616 |
osm_node_t *p_node = NULL;
|
|
Packit |
13e616 |
ib_net16_t lid = 0;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (!mon_node->port[port].valid)
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
osm_log(pm->log, OSM_LOG_VERBOSE,
|
|
Packit |
13e616 |
"PerfMgr: PortCountersExtended overflow: %s (0x%"
|
|
Packit |
13e616 |
PRIx64 ") port %d; clearing counters\n",
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
cl_plock_acquire(&pm->osm->lock);
|
|
Packit |
13e616 |
p_node =
|
|
Packit |
13e616 |
osm_get_node_by_guid(pm->subn, cl_hton64(mon_node->guid));
|
|
Packit |
13e616 |
lid = get_lid(p_node, port, mon_node);
|
|
Packit |
13e616 |
cl_plock_release(&pm->osm->lock);
|
|
Packit |
13e616 |
if (lid == 0) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5418: "
|
|
Packit |
13e616 |
"Failed to clear counters for %s (0x%"
|
|
Packit |
13e616 |
PRIx64 ") port %d; failed to get lid\n",
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port);
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
remote_qp = get_qp(NULL, port);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
mad_context.perfmgr_context.node_guid = mon_node->guid;
|
|
Packit |
13e616 |
mad_context.perfmgr_context.port = port;
|
|
Packit |
13e616 |
mad_context.perfmgr_context.mad_method = IB_MAD_METHOD_SET;
|
|
Packit |
13e616 |
/* clear port counters */
|
|
Packit |
13e616 |
status = perfmgr_send_pce_mad(pm, lid, remote_qp, pkey_ix,
|
|
Packit |
13e616 |
port, IB_MAD_METHOD_SET,
|
|
Packit |
13e616 |
&mad_context,
|
|
Packit |
13e616 |
0); /* FIXME SL != 0 */
|
|
Packit |
13e616 |
if (status != IB_SUCCESS)
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "PerfMgr: ERR 5419: "
|
|
Packit |
13e616 |
"Failed to send clear counters MAD for %s (0x%"
|
|
Packit |
13e616 |
PRIx64 ") port %d\n",
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Exit:
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Check values for logging of errors
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void perfmgr_log_errors(osm_perfmgr_t * pm,
|
|
Packit |
13e616 |
monitored_node_t * mon_node, uint8_t port,
|
|
Packit |
13e616 |
perfmgr_db_err_reading_t * reading)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
perfmgr_db_err_reading_t prev_read;
|
|
Packit |
13e616 |
perfmgr_db_err_t err =
|
|
Packit |
13e616 |
perfmgr_db_get_prev_err(pm->db, mon_node->guid, port, &prev_read);
|
|
Packit |
13e616 |
uint64_t cur, prev;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (err != PERFMGR_EVENT_DB_SUCCESS) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Failed to find previous "
|
|
Packit |
13e616 |
"reading for %s (0x%" PRIx64 ") port %u\n",
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port);
|
|
Packit |
13e616 |
return;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
#define LOG_ERR_CNT(errname, errnum, counter_name) \
|
|
Packit |
13e616 |
if (reading->counter_name > prev_read.counter_name) { \
|
|
Packit |
13e616 |
if (mon_node->port[port].remote_valid == TRUE) \
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \
|
|
Packit |
13e616 |
"%s : %" PRIu64 " : node " \
|
|
Packit |
13e616 |
"\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u " \
|
|
Packit |
13e616 |
"connected to \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \
|
|
Packit |
13e616 |
errnum, errname, \
|
|
Packit |
13e616 |
reading->counter_name - prev_read.counter_name, \
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port, \
|
|
Packit |
13e616 |
mon_node->port[port].remote_name, \
|
|
Packit |
13e616 |
mon_node->port[port].remote_guid, \
|
|
Packit |
13e616 |
mon_node->port[port].remote_port); \
|
|
Packit |
13e616 |
else \
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR %s: " \
|
|
Packit |
13e616 |
"%s : %" PRIu64 " : node " \
|
|
Packit |
13e616 |
"\"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n", \
|
|
Packit |
13e616 |
errnum, errname, \
|
|
Packit |
13e616 |
reading->counter_name - prev_read.counter_name, \
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port); \
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
LOG_ERR_CNT("SymbolErrorCounter", "5431", symbol_err_cnt);
|
|
Packit |
13e616 |
LOG_ERR_CNT("LinkErrorRecoveryCounter", "5432", link_err_recover);
|
|
Packit |
13e616 |
LOG_ERR_CNT("LinkDownedCounter", "5433", link_downed);
|
|
Packit |
13e616 |
LOG_ERR_CNT("PortRcvErrors", "5434", rcv_err);
|
|
Packit |
13e616 |
LOG_ERR_CNT("PortRcvRemotePhysicalErrors", "5435", rcv_rem_phys_err);
|
|
Packit |
13e616 |
LOG_ERR_CNT("PortRcvSwitchRelayErrors", "5436", rcv_switch_relay_err);
|
|
Packit |
13e616 |
LOG_ERR_CNT("PortXmitDiscards", "5437", xmit_discards);
|
|
Packit |
13e616 |
LOG_ERR_CNT("PortXmitConstraintErrors", "5438", xmit_constraint_err);
|
|
Packit |
13e616 |
LOG_ERR_CNT("PortRcvConstraintErrors", "5439", rcv_constraint_err);
|
|
Packit |
13e616 |
LOG_ERR_CNT("LocalLinkIntegrityErrors", "543A", link_integrity);
|
|
Packit |
13e616 |
LOG_ERR_CNT("ExcessiveBufferOverrunErrors", "543B", buffer_overrun);
|
|
Packit |
13e616 |
LOG_ERR_CNT("VL15Dropped", "543C", vl15_dropped);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
cur = reading->xmit_wait;
|
|
Packit |
13e616 |
prev = prev_read.xmit_wait;
|
|
Packit |
13e616 |
if (pm->xmit_wait_log && cur > prev &&
|
|
Packit |
13e616 |
(cur - prev) >= pm->xmit_wait_threshold) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 543D: XmitWait : %" PRIu64
|
|
Packit |
13e616 |
" : node \"%s\" (NodeGUID: 0x%" PRIx64 ") : port %u\n",
|
|
Packit |
13e616 |
cur - prev, mon_node->name, mon_node->guid, port);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static int16_t validate_redir_pkey(osm_perfmgr_t *pm, ib_net16_t pkey)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
int16_t pkey_ix = -1;
|
|
Packit |
13e616 |
osm_port_t *p_port;
|
|
Packit |
13e616 |
osm_pkey_tbl_t *p_pkey_tbl;
|
|
Packit |
13e616 |
ib_net16_t *p_orig_pkey;
|
|
Packit |
13e616 |
uint16_t block;
|
|
Packit |
13e616 |
uint8_t index;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
CL_PLOCK_ACQUIRE(pm->sm->p_lock);
|
|
Packit |
13e616 |
p_port = osm_get_port_by_guid(pm->subn, pm->port_guid);
|
|
Packit |
13e616 |
if (!p_port) {
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(pm->sm->p_lock);
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"ERR 541E: No PerfMgr port object\n");
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
if (p_port->p_physp && osm_physp_is_valid(p_port->p_physp)) {
|
|
Packit |
13e616 |
p_pkey_tbl = &p_port->p_physp->pkeys;
|
|
Packit |
13e616 |
if (!p_pkey_tbl) {
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(pm->sm->p_lock);
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
|
|
Packit |
13e616 |
"No PKey table found for PerfMgr port\n");
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
p_orig_pkey = cl_map_get(&p_pkey_tbl->keys,
|
|
Packit |
13e616 |
ib_pkey_get_base(pkey));
|
|
Packit |
13e616 |
if (!p_orig_pkey) {
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(pm->sm->p_lock);
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
|
|
Packit |
13e616 |
"PKey 0x%x not found for PerfMgr port\n",
|
|
Packit |
13e616 |
cl_ntoh16(pkey));
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
if (osm_pkey_tbl_get_block_and_idx(p_pkey_tbl, p_orig_pkey,
|
|
Packit |
13e616 |
&block, &index) == IB_SUCCESS) {
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(pm->sm->p_lock);
|
|
Packit |
13e616 |
pkey_ix = block * IB_NUM_PKEY_ELEMENTS_IN_BLOCK + index;
|
|
Packit |
13e616 |
} else {
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(pm->sm->p_lock);
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"ERR 541F: Failed to obtain P_Key 0x%04x "
|
|
Packit |
13e616 |
"block and index for PerfMgr port\n",
|
|
Packit |
13e616 |
cl_ntoh16(pkey));
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
} else {
|
|
Packit |
13e616 |
CL_PLOCK_RELEASE(pm->sm->p_lock);
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"ERR 5420: Local PerfMgt port physp invalid\n");
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Exit:
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
return pkey_ix;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
static boolean_t handle_redirect(osm_perfmgr_t *pm,
|
|
Packit |
13e616 |
ib_class_port_info_t *cpi,
|
|
Packit |
13e616 |
monitored_node_t *p_mon_node,
|
|
Packit |
13e616 |
uint8_t port,
|
|
Packit |
13e616 |
osm_madw_context_t *mad_context)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
char gid_str[INET6_ADDRSTRLEN];
|
|
Packit |
13e616 |
ib_api_status_t status;
|
|
Packit |
13e616 |
boolean_t valid = TRUE;
|
|
Packit |
13e616 |
int16_t pkey_ix = 0;
|
|
Packit |
13e616 |
uint8_t mad_method;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
|
|
Packit |
13e616 |
"Redirection to LID %u GID %s QP 0x%x received\n",
|
|
Packit |
13e616 |
cl_ntoh16(cpi->redir_lid),
|
|
Packit |
13e616 |
inet_ntop(AF_INET6, cpi->redir_gid.raw, gid_str,
|
|
Packit |
13e616 |
sizeof gid_str), cl_ntoh32(cpi->redir_qp));
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (!pm->subn->opt.perfmgr_redir) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
|
|
Packit |
13e616 |
"Redirection requested but disabled\n");
|
|
Packit |
13e616 |
valid = FALSE;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* valid redirection ? */
|
|
Packit |
13e616 |
if (cpi->redir_lid == 0) {
|
|
Packit |
13e616 |
if (!ib_gid_is_notzero(&cpi->redir_gid)) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
|
|
Packit |
13e616 |
"Invalid redirection "
|
|
Packit |
13e616 |
"(both redirect LID and GID are zero)\n");
|
|
Packit |
13e616 |
valid = FALSE;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
if (cpi->redir_qp == 0) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQP\n");
|
|
Packit |
13e616 |
valid = FALSE;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
if (cpi->redir_pkey == 0) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectP_Key\n");
|
|
Packit |
13e616 |
valid = FALSE;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
if (cpi->redir_qkey != IB_QP1_WELL_KNOWN_Q_KEY) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE, "Invalid RedirectQ_Key\n");
|
|
Packit |
13e616 |
valid = FALSE;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
pkey_ix = validate_redir_pkey(pm, cpi->redir_pkey);
|
|
Packit |
13e616 |
if (pkey_ix == -1) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
|
|
Packit |
13e616 |
"Index for Pkey 0x%x not found\n",
|
|
Packit |
13e616 |
cl_ntoh16(cpi->redir_pkey));
|
|
Packit |
13e616 |
valid = FALSE;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (cpi->redir_lid == 0) {
|
|
Packit |
13e616 |
/* GID redirection: get PathRecord information */
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
|
|
Packit |
13e616 |
"GID redirection not currently supported\n");
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (!valid)
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* LID redirection support (easier than GID redirection) */
|
|
Packit |
13e616 |
cl_plock_acquire(&pm->osm->lock);
|
|
Packit |
13e616 |
p_mon_node->port[port].redirection = TRUE;
|
|
Packit |
13e616 |
p_mon_node->port[port].valid = valid;
|
|
Packit |
13e616 |
memcpy(&p_mon_node->port[port].gid, &cpi->redir_gid,
|
|
Packit |
13e616 |
sizeof(ib_gid_t));
|
|
Packit |
13e616 |
p_mon_node->port[port].lid = cpi->redir_lid;
|
|
Packit |
13e616 |
p_mon_node->port[port].qp = cpi->redir_qp;
|
|
Packit |
13e616 |
p_mon_node->port[port].pkey = cpi->redir_pkey;
|
|
Packit |
13e616 |
if (pkey_ix != -1)
|
|
Packit |
13e616 |
p_mon_node->port[port].pkey_ix = pkey_ix;
|
|
Packit |
13e616 |
cl_plock_release(&pm->osm->lock);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* either */
|
|
Packit |
13e616 |
if (pm->query_cpi)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
/* issue a CPI query to the redirected location */
|
|
Packit |
13e616 |
mad_method = IB_MAD_METHOD_GET;
|
|
Packit |
13e616 |
p_mon_node->port[port].cpi_valid = FALSE;
|
|
Packit |
13e616 |
status = perfmgr_send_cpi_mad(pm, cpi->redir_lid,
|
|
Packit |
13e616 |
cpi->redir_qp, pkey_ix,
|
|
Packit |
13e616 |
port, mad_context,
|
|
Packit |
13e616 |
0); /* FIXME SL != 0 */
|
|
Packit |
13e616 |
} else {
|
|
Packit |
13e616 |
/* reissue the original query to the redirected location */
|
|
Packit |
13e616 |
uint8_t counter_select2;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (xmit_wait_supported(p_mon_node, port))
|
|
Packit |
13e616 |
counter_select2 = 1;
|
|
Packit |
13e616 |
else
|
|
Packit |
13e616 |
counter_select2 = 0;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
mad_method = mad_context->perfmgr_context.mad_method;
|
|
Packit |
13e616 |
if (mad_context->perfmgr_context.mad_attr_id
|
|
Packit |
13e616 |
== IB_MAD_ATTR_PORT_CNTRS) {
|
|
Packit |
13e616 |
status = perfmgr_send_pc_mad(pm, cpi->redir_lid,
|
|
Packit |
13e616 |
cpi->redir_qp,
|
|
Packit |
13e616 |
pkey_ix, port,
|
|
Packit |
13e616 |
mad_method,
|
|
Packit |
13e616 |
0xffff,
|
|
Packit |
13e616 |
counter_select2,
|
|
Packit |
13e616 |
mad_context,
|
|
Packit |
13e616 |
0); /* FIXME SL != 0 */
|
|
Packit |
13e616 |
} else {
|
|
Packit |
13e616 |
status = perfmgr_send_pce_mad(pm, cpi->redir_lid,
|
|
Packit |
13e616 |
cpi->redir_qp,
|
|
Packit |
13e616 |
pkey_ix, port,
|
|
Packit |
13e616 |
mad_method,
|
|
Packit |
13e616 |
mad_context,
|
|
Packit |
13e616 |
0); /* FIXME SL != 0 */
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
if (status != IB_SUCCESS)
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5414: "
|
|
Packit |
13e616 |
"Failed to send redirected MAD "
|
|
Packit |
13e616 |
"with method 0x%x for node %s "
|
|
Packit |
13e616 |
"(NodeGuid 0x%" PRIx64 ") port %d\n",
|
|
Packit |
13e616 |
mad_method, p_mon_node->name, p_mon_node->guid, port);
|
|
Packit |
13e616 |
Exit:
|
|
Packit |
13e616 |
return (valid);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Detect if someone else on the network could have cleared the counters
|
|
Packit |
13e616 |
* without us knowing. This is easy to detect because the counters never
|
|
Packit |
13e616 |
* wrap but are "sticky" PortCountersExtended version.
|
|
Packit |
13e616 |
*
|
|
Packit |
13e616 |
* The one time this will not work is if the port is getting errors fast
|
|
Packit |
13e616 |
* enough to have the reading overtake the previous reading. In this case,
|
|
Packit |
13e616 |
* counters will be missed.
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void perfmgr_check_data_cnt_oob_clear(osm_perfmgr_t * pm,
|
|
Packit |
13e616 |
monitored_node_t * mon_node,
|
|
Packit |
13e616 |
uint8_t port,
|
|
Packit |
13e616 |
perfmgr_db_data_cnt_reading_t * dc)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
perfmgr_db_data_cnt_reading_t prev_dc;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (perfmgr_db_get_prev_dc(pm->db, mon_node->guid, port, &prev_dc)
|
|
Packit |
13e616 |
!= PERFMGR_EVENT_DB_SUCCESS) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
|
|
Packit |
13e616 |
"Failed to find previous data count "
|
|
Packit |
13e616 |
"reading for %s (0x%" PRIx64 ") port %u\n",
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port);
|
|
Packit |
13e616 |
return;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_DEBUG,
|
|
Packit |
13e616 |
"Data vs previous node %s (0x%" PRIx64 ") port %u\n"
|
|
Packit |
13e616 |
"TX: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"RX: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"TXP: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"RXP: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"UTXP: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"URXP: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"MTXP: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
"MRXP: %"PRIu64" ?< %"PRIu64"\n"
|
|
Packit |
13e616 |
,
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port,
|
|
Packit |
13e616 |
dc->xmit_data, prev_dc.xmit_data,
|
|
Packit |
13e616 |
dc->rcv_data, prev_dc.rcv_data,
|
|
Packit |
13e616 |
dc->xmit_pkts, prev_dc.xmit_pkts,
|
|
Packit |
13e616 |
dc->rcv_pkts, prev_dc.rcv_pkts,
|
|
Packit |
13e616 |
dc->unicast_xmit_pkts, prev_dc.unicast_xmit_pkts,
|
|
Packit |
13e616 |
dc->unicast_rcv_pkts, prev_dc.unicast_rcv_pkts,
|
|
Packit |
13e616 |
dc->multicast_xmit_pkts, prev_dc.multicast_xmit_pkts,
|
|
Packit |
13e616 |
dc->multicast_rcv_pkts, prev_dc.multicast_rcv_pkts);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (dc->xmit_data < prev_dc.xmit_data ||
|
|
Packit |
13e616 |
dc->rcv_data < prev_dc.rcv_data ||
|
|
Packit |
13e616 |
dc->xmit_pkts < prev_dc.xmit_pkts ||
|
|
Packit |
13e616 |
dc->rcv_pkts < prev_dc.rcv_pkts ||
|
|
Packit |
13e616 |
(ietf_supported(mon_node, port) &&
|
|
Packit |
13e616 |
(dc->unicast_xmit_pkts < prev_dc.unicast_xmit_pkts ||
|
|
Packit |
13e616 |
dc->unicast_rcv_pkts < prev_dc.unicast_rcv_pkts ||
|
|
Packit |
13e616 |
dc->multicast_xmit_pkts < prev_dc.multicast_xmit_pkts ||
|
|
Packit |
13e616 |
dc->multicast_rcv_pkts < prev_dc.multicast_rcv_pkts))) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR,
|
|
Packit |
13e616 |
"PerfMgr: ERR 540B: Detected an out of band data counter "
|
|
Packit |
13e616 |
"clear on node %s (0x%" PRIx64 ") port %u\n",
|
|
Packit |
13e616 |
mon_node->name, mon_node->guid, port);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
perfmgr_db_clear_prev_dc(pm->db, mon_node->guid, port);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* The dispatcher uses a thread pool which will call this function when
|
|
Packit |
13e616 |
* there is a thread available to process the mad received on the wire
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
static void pc_recv_process(void *context, void *data)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
osm_perfmgr_t *pm = context;
|
|
Packit |
13e616 |
osm_madw_t *p_madw = data;
|
|
Packit |
13e616 |
osm_madw_context_t *mad_context = &p_madw->context;
|
|
Packit |
13e616 |
ib_mad_t *p_mad = osm_madw_get_mad_ptr(p_madw);
|
|
Packit |
13e616 |
uint64_t node_guid = mad_context->perfmgr_context.node_guid;
|
|
Packit |
13e616 |
uint8_t port = mad_context->perfmgr_context.port;
|
|
Packit |
13e616 |
perfmgr_db_err_reading_t err_reading;
|
|
Packit |
13e616 |
perfmgr_db_data_cnt_reading_t data_reading;
|
|
Packit |
13e616 |
cl_map_item_t *p_node;
|
|
Packit |
13e616 |
monitored_node_t *p_mon_node;
|
|
Packit |
13e616 |
ib_class_port_info_t *cpi = NULL;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(pm->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/*
|
|
Packit |
13e616 |
* get the monitored node struct to have the printable name
|
|
Packit |
13e616 |
* for log messages
|
|
Packit |
13e616 |
*/
|
|
Packit |
13e616 |
if ((p_node = cl_qmap_get(&pm->monitored_map, node_guid)) ==
|
|
Packit |
13e616 |
cl_qmap_end(&pm->monitored_map)) {
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5412: GUID 0x%016"
|
|
Packit |
13e616 |
PRIx64 " not found in monitored map\n", node_guid);
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
p_mon_node = (monitored_node_t *) p_node;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_VERBOSE,
|
|
Packit |
13e616 |
"Processing received MAD status 0x%x context 0x%"
|
|
Packit |
13e616 |
PRIx64 " port %u\n", cl_ntoh16(p_mad->status), node_guid, port);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
CL_ASSERT(p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS ||
|
|
Packit |
13e616 |
p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT ||
|
|
Packit |
13e616 |
p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
cl_plock_acquire(&pm->osm->lock);
|
|
Packit |
13e616 |
/* validate port number */
|
|
Packit |
13e616 |
if (port >= p_mon_node->num_ports) {
|
|
Packit |
13e616 |
cl_plock_release(&pm->osm->lock);
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "ERR 5413: "
|
|
Packit |
13e616 |
"Invalid port num %d for GUID 0x%016"
|
|
Packit |
13e616 |
PRIx64 " num ports %d\n", port, node_guid,
|
|
Packit |
13e616 |
p_mon_node->num_ports);
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
cl_plock_release(&pm->osm->lock);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* capture CLASS_PORT_INFO data */
|
|
Packit |
13e616 |
if (p_mad->attr_id == IB_MAD_ATTR_CLASS_PORT_INFO) {
|
|
Packit |
13e616 |
boolean_t cpi_valid = TRUE;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
cpi = (ib_class_port_info_t *) &
|
|
Packit |
13e616 |
(osm_madw_get_perfmgt_mad_ptr(p_madw)->data);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* Response could be redirection (IBM eHCA PMA does this) */
|
|
Packit |
13e616 |
if (p_mad->status & IB_MAD_STATUS_REDIRECT)
|
|
Packit |
13e616 |
cpi_valid = handle_redirect(pm, cpi, p_mon_node, port,
|
|
Packit |
13e616 |
mad_context);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (pm->query_cpi && cpi_valid) {
|
|
Packit |
13e616 |
cl_plock_acquire(&pm->osm->lock);
|
|
Packit |
13e616 |
if (p_mon_node->node_type == IB_NODE_TYPE_SWITCH) {
|
|
Packit |
13e616 |
unsigned i;
|
|
Packit |
13e616 |
for (i = p_mon_node->esp0 ? 0 : 1;
|
|
Packit |
13e616 |
i < p_mon_node->num_ports;
|
|
Packit |
13e616 |
i++) {
|
|
Packit |
13e616 |
p_mon_node->port[i].cap_mask = cpi->cap_mask;
|
|
Packit |
13e616 |
p_mon_node->port[i].cpi_valid = cpi_valid;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
} else {
|
|
Packit |
13e616 |
p_mon_node->port[port].cap_mask = cpi->cap_mask;
|
|
Packit |
13e616 |
p_mon_node->port[port].cpi_valid = cpi_valid;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
cl_plock_release(&pm->osm->lock);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (p_mad->attr_id == IB_MAD_ATTR_PORT_CNTRS_EXT) {
|
|
Packit |
13e616 |
ib_port_counters_ext_t *ext_wire_read =
|
|
Packit |
13e616 |
(ib_port_counters_ext_t *)
|
|
Packit |
13e616 |
&osm_madw_get_perfmgt_mad_ptr(p_madw)->data;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* convert wire data to perfmgr data counter reading */
|
|
Packit |
13e616 |
perfmgr_db_fill_data_cnt_read_pce(ext_wire_read, &data_reading,
|
|
Packit |
13e616 |
ietf_supported(p_mon_node,
|
|
Packit |
13e616 |
port));
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* add counter */
|
|
Packit |
13e616 |
if (mad_context->perfmgr_context.mad_method
|
|
Packit |
13e616 |
== IB_MAD_METHOD_GET) {
|
|
Packit |
13e616 |
/* detect an out of band clear on the port */
|
|
Packit |
13e616 |
perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port,
|
|
Packit |
13e616 |
&data_reading);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
perfmgr_db_add_dc_reading(pm->db, node_guid, port,
|
|
Packit |
13e616 |
&data_reading,
|
|
Packit |
13e616 |
ietf_supported(p_mon_node,
|
|
Packit |
13e616 |
port));
|
|
Packit |
13e616 |
} else {
|
|
Packit |
13e616 |
perfmgr_db_clear_prev_dc(pm->db, node_guid, port);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
perfmgr_check_pce_overflow(pm, p_mon_node,
|
|
Packit |
13e616 |
p_mon_node->port[port].pkey_ix,
|
|
Packit |
13e616 |
port, ext_wire_read);
|
|
Packit |
13e616 |
} else {
|
|
Packit |
13e616 |
boolean_t pce_sup = pce_supported(p_mon_node, port);
|
|
Packit |
13e616 |
boolean_t xmit_wait_sup = xmit_wait_supported(p_mon_node, port);
|
|
Packit |
13e616 |
ib_port_counters_t *wire_read =
|
|
Packit |
13e616 |
(ib_port_counters_t *)
|
|
Packit |
13e616 |
&osm_madw_get_perfmgt_mad_ptr(p_madw)->data;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
perfmgr_db_fill_err_read(wire_read, &err_reading, xmit_wait_sup);
|
|
Packit |
13e616 |
if (!pce_sup)
|
|
Packit |
13e616 |
perfmgr_db_fill_data_cnt_read_pc(wire_read, &data_reading);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (mad_context->perfmgr_context.mad_method == IB_MAD_METHOD_GET) {
|
|
Packit |
13e616 |
/* detect an out of band clear on the port */
|
|
Packit |
13e616 |
perfmgr_check_oob_clear(pm, p_mon_node, port, &err_reading);
|
|
Packit |
13e616 |
if (!pce_sup)
|
|
Packit |
13e616 |
perfmgr_check_data_cnt_oob_clear(pm, p_mon_node, port,
|
|
Packit |
13e616 |
&data_reading);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/* log errors from this reading */
|
|
Packit |
13e616 |
if (pm->subn->opt.perfmgr_log_errors)
|
|
Packit |
13e616 |
perfmgr_log_errors(pm, p_mon_node, port, &err_reading);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
perfmgr_db_add_err_reading(pm->db, node_guid, port,
|
|
Packit |
13e616 |
&err_reading);
|
|
Packit |
13e616 |
if (!pce_sup)
|
|
Packit |
13e616 |
perfmgr_db_add_dc_reading(pm->db, node_guid, port,
|
|
Packit |
13e616 |
&data_reading, 0);
|
|
Packit |
13e616 |
} else {
|
|
Packit |
13e616 |
perfmgr_db_clear_prev_err(pm->db, node_guid, port);
|
|
Packit |
13e616 |
if (!pce_sup)
|
|
Packit |
13e616 |
perfmgr_db_clear_prev_dc(pm->db, node_guid, port);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
perfmgr_check_overflow(pm, p_mon_node, p_mon_node->port[port].pkey_ix,
|
|
Packit |
13e616 |
port, wire_read, xmit_wait_sup);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
#ifdef ENABLE_OSM_PERF_MGR_PROFILE
|
|
Packit |
13e616 |
do {
|
|
Packit |
13e616 |
struct timeval proc_time;
|
|
Packit |
13e616 |
gettimeofday(&proc_time, NULL);
|
|
Packit |
13e616 |
diff_time(&p_madw->context.perfmgr_context.query_start,
|
|
Packit |
13e616 |
&proc_time, &proc_time);
|
|
Packit |
13e616 |
update_mad_stats(&proc_time);
|
|
Packit |
13e616 |
} while (0);
|
|
Packit |
13e616 |
#endif
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
Exit:
|
|
Packit |
13e616 |
osm_mad_pool_put(pm->mad_pool, p_madw);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Initialize the PerfMgr object
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
ib_api_status_t osm_perfmgr_init(osm_perfmgr_t * pm, osm_opensm_t * osm,
|
|
Packit |
13e616 |
const osm_subn_opt_t * p_opt)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
ib_api_status_t status;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG_ENTER(&osm->log);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
OSM_LOG(&osm->log, OSM_LOG_VERBOSE, "Initializing PerfMgr\n");
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
memset(pm, 0, sizeof(*pm));
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
pm->subn = &osm->subn;
|
|
Packit |
13e616 |
pm->sm = &osm->sm;
|
|
Packit |
13e616 |
pm->log = &osm->log;
|
|
Packit |
13e616 |
pm->mad_pool = &osm->mad_pool;
|
|
Packit |
13e616 |
pm->vendor = osm->p_vendor;
|
|
Packit |
13e616 |
pm->trans_id = PERFMGR_INITIAL_TID_VALUE;
|
|
Packit |
13e616 |
pm->state =
|
|
Packit |
13e616 |
p_opt->perfmgr ? PERFMGR_STATE_ENABLED : PERFMGR_STATE_DISABLE;
|
|
Packit |
13e616 |
pm->sweep_state = PERFMGR_SWEEP_SLEEP;
|
|
Packit |
13e616 |
cl_spinlock_init(&pm->lock);
|
|
Packit |
13e616 |
pm->sweep_time_s = p_opt->perfmgr_sweep_time_s;
|
|
Packit |
13e616 |
pm->max_outstanding_queries = p_opt->perfmgr_max_outstanding_queries;
|
|
Packit |
13e616 |
pm->ignore_cas = p_opt->perfmgr_ignore_cas;
|
|
Packit |
13e616 |
pm->osm = osm;
|
|
Packit |
13e616 |
pm->local_port = -1;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
status = cl_timer_init(&pm->sweep_timer, perfmgr_sweep, pm);
|
|
Packit |
13e616 |
if (status != IB_SUCCESS)
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
status = IB_INSUFFICIENT_RESOURCES;
|
|
Packit |
13e616 |
pm->db = perfmgr_db_construct(pm);
|
|
Packit |
13e616 |
if (!pm->db) {
|
|
Packit |
13e616 |
pm->state = PERFMGR_STATE_NO_DB;
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
pm->pc_disp_h = cl_disp_register(&osm->disp, OSM_MSG_MAD_PORT_COUNTERS,
|
|
Packit |
13e616 |
pc_recv_process, pm);
|
|
Packit |
13e616 |
if (pm->pc_disp_h == CL_DISP_INVALID_HANDLE) {
|
|
Packit |
13e616 |
perfmgr_db_destroy(pm->db);
|
|
Packit |
13e616 |
goto Exit;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
init_monitored_nodes(pm);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
if (pm->state == PERFMGR_STATE_ENABLED)
|
|
Packit |
13e616 |
cl_timer_start(&pm->sweep_timer, pm->sweep_time_s * 1000);
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
pm->rm_nodes = p_opt->perfmgr_rm_nodes;
|
|
Packit |
13e616 |
pm->query_cpi = p_opt->perfmgr_query_cpi;
|
|
Packit |
13e616 |
pm->xmit_wait_log = p_opt->perfmgr_xmit_wait_log;
|
|
Packit |
13e616 |
pm->xmit_wait_threshold = p_opt->perfmgr_xmit_wait_threshold;
|
|
Packit |
13e616 |
status = IB_SUCCESS;
|
|
Packit |
13e616 |
Exit:
|
|
Packit |
13e616 |
OSM_LOG_EXIT(pm->log);
|
|
Packit |
13e616 |
return status;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/**********************************************************************
|
|
Packit |
13e616 |
* Clear the counters from the db
|
|
Packit |
13e616 |
**********************************************************************/
|
|
Packit |
13e616 |
void osm_perfmgr_clear_counters(osm_perfmgr_t * pm)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
/**
|
|
Packit |
13e616 |
* FIXME todo issue clear on the fabric?
|
|
Packit |
13e616 |
*/
|
|
Packit |
13e616 |
perfmgr_db_clear_counters(pm->db);
|
|
Packit |
13e616 |
osm_log_v2(pm->log, OSM_LOG_INFO, FILE_ID, "PerfMgr counters cleared\n");
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/*******************************************************************
|
|
Packit |
13e616 |
* Dump the DB information to the file specified
|
|
Packit |
13e616 |
*******************************************************************/
|
|
Packit |
13e616 |
void osm_perfmgr_dump_counters(osm_perfmgr_t * pm, perfmgr_db_dump_t dump_type)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
char path[256];
|
|
Packit |
13e616 |
char *file_name;
|
|
Packit |
13e616 |
if (pm->subn->opt.event_db_dump_file)
|
|
Packit |
13e616 |
file_name = pm->subn->opt.event_db_dump_file;
|
|
Packit |
13e616 |
else {
|
|
Packit |
13e616 |
snprintf(path, sizeof(path), "%s/%s",
|
|
Packit |
13e616 |
pm->subn->opt.dump_files_dir,
|
|
Packit |
13e616 |
OSM_PERFMGR_DEFAULT_DUMP_FILE);
|
|
Packit |
13e616 |
file_name = path;
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
if (perfmgr_db_dump(pm->db, file_name, dump_type) != 0)
|
|
Packit |
13e616 |
OSM_LOG(pm->log, OSM_LOG_ERROR, "Failed to dump file %s : %s",
|
|
Packit |
13e616 |
file_name, strerror(errno));
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
/*******************************************************************
|
|
Packit |
13e616 |
* Print the DB information to the fp specified
|
|
Packit |
13e616 |
*******************************************************************/
|
|
Packit |
13e616 |
void osm_perfmgr_print_counters(osm_perfmgr_t * pm, char *nodename, FILE * fp,
|
|
Packit |
13e616 |
char *port, int err_only)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
if (nodename) {
|
|
Packit |
13e616 |
char *end = NULL;
|
|
Packit |
13e616 |
uint64_t guid = strtoull(nodename, &end, 0);
|
|
Packit |
13e616 |
if (nodename + strlen(nodename) != end)
|
|
Packit |
13e616 |
perfmgr_db_print_by_name(pm->db, nodename, fp, port,
|
|
Packit |
13e616 |
err_only);
|
|
Packit |
13e616 |
else
|
|
Packit |
13e616 |
perfmgr_db_print_by_guid(pm->db, guid, fp, port,
|
|
Packit |
13e616 |
err_only);
|
|
Packit |
13e616 |
} else
|
|
Packit |
13e616 |
perfmgr_db_print_all(pm->db, fp, err_only);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
|
|
Packit |
13e616 |
void osm_perfmgr_update_nodename(osm_perfmgr_t *pm, uint64_t node_guid,
|
|
Packit |
13e616 |
char *nodename)
|
|
Packit |
13e616 |
{
|
|
Packit |
13e616 |
if (pm->db)
|
|
Packit |
13e616 |
perfmgr_db_update_name(pm->db, node_guid, nodename);
|
|
Packit |
13e616 |
}
|
|
Packit |
13e616 |
#endif /* ENABLE_OSM_PERF_MGR */
|