/* Query SA for Switch Cost Matrix data * and print it to a file. Monitor for any * changes and update. * */ // core API #include // swcost query #include // fabric change notice #include #include typedef struct opa_switch{ STL_LID lid; const char * name; struct opa_switch *next; }opa_switch; FILE * matrix_file; void free_cost(uint16_t ***cost, int num_rows){ int i; if (!*cost) return; for(i = 0; i < num_rows; ++i) { if ((*cost)[i]) { free((*cost)[i]); (*cost)[i] = NULL; } } free(*cost); *cost = NULL; } void free_switch_list(opa_switch **switchlist_head) { if (!*switchlist_head) return; opa_switch * temp = *switchlist_head, *temp2; while(temp){ temp2 = temp->next; free(temp); temp = temp2; } *switchlist_head = NULL; } /* Helper function to get the associated name for a particular lid */ const char * get_name(STL_LID lid, STL_NODE_RECORD *node_records, int num_node_records) { int i; for (i = 0; i < num_node_records; ++i) { if (node_records[i].RID.LID == lid) return (const char*) node_records[i].NodeDesc.NodeString; } return ""; } /* Print Cost Matrix using the Switch Names */ void print_matrix(uint16_t **cost, opa_switch *switchlist_head) { fprintf(matrix_file, "%64s", ""); opa_switch * iter = switchlist_head; while(iter){ // Node Descriptions are up to 64 bytes fprintf(matrix_file, "%64s", iter->name); iter = iter->next; } fprintf(matrix_file, "\n"); iter = switchlist_head; opa_switch * iter2; while(iter) { fprintf(matrix_file, "%-64s", iter->name); iter2 = switchlist_head; while(iter2) { if(cost[iter->lid]) fprintf(matrix_file, "%64d" , cost[iter->lid][iter2->lid]); iter2 = iter2->next; } iter = iter->next; fprintf(matrix_file, "\n"); } fprintf(matrix_file, "\n\n\n"); } int main(int argc, char **argv) { OMGT_STATUS_T status = OMGT_STATUS_SUCCESS; int exitcode = 0; struct omgt_port *port = NULL; omgt_sa_selector_t selector; int num_classportinfo_records, num_fabricinfo_records, num_cost_records, num_switch_records; int num_nodes = 0; STL_CLASS_PORT_INFO *classportinfo_records = NULL; STL_FABRICINFO_RECORD *fabricinfo_records = NULL; STL_SWITCH_COST_RECORD *cost_records = NULL; STL_NODE_RECORD *switch_records = NULL; STL_NOTICE *notice = NULL; size_t notice_len = 0; struct omgt_port *context = NULL; uint16_t **cost = NULL; int i,j; if (argc < 2) { fprintf(stderr, "Usage: %s \n", argv[0]); exitcode = 1; goto done; } matrix_file = fopen(argv[1], "w"); if (!matrix_file) { fprintf(stderr, "could not open output file for writing\n"); exitcode = 1; goto done; } // create a session status = omgt_open_port_by_num(&port, 1, 1, NULL); if (OMGT_STATUS_SUCCESS != status) { fprintf(stderr, "failed to open port\n"); exitcode = 1; goto close_file; } // Determine if SA has Switch Cost Record capabiility selector.InputType = InputTypeNoInput; status = omgt_sa_get_classportinfo_records(port, &selector, &num_classportinfo_records, &classportinfo_records); if (OMGT_STATUS_SUCCESS != status || num_classportinfo_records != 1) { fprintf(stderr, "failed to execute classportinfo record query. MADStatus=0x%x\n", omgt_get_sa_mad_status(port)); exitcode = 1; goto close_port; } if (! (classportinfo_records[0].CapMask && STL_SA_CAPABILITY2_SWCOSTRECORD_SUPPORT)) { fprintf(stderr, "SA does not support switchcost records\n"); exitcode = 1; goto close_port; } // Register for Cost Matrix Change Trap if ((status = omgt_sa_register_trap(port, STL_TRAP_COST_MATRIX_CHANGE, port)) != OMGT_STATUS_SUCCESS) { fprintf(stderr, " Error: Could not register for Trap %u: %s (%u)\n", STL_TRAP_COST_MATRIX_CHANGE, omgt_status_totext(status), status); exitcode = 1; goto close_port; } opa_switch *head_switch = NULL, *temp_switch = NULL; while(!exitcode){ selector.InputType = InputTypeNoInput; /* Query SA for fabric information to determine how big to make * our own cost matrix storage */ status = omgt_sa_get_fabric_info_records(port, &selector, &num_fabricinfo_records, &fabricinfo_records); if (OMGT_STATUS_SUCCESS != status || num_fabricinfo_records < 1) { fprintf(stderr, "failed to get fabricinfo. MADStatus=0x%x\n", omgt_get_sa_mad_status(port)); exitcode = 1; goto cleanup; } /* Using total number of nodes will allocate more space than necessary, * but allows simpler processing of cost data */ num_nodes = fabricinfo_records[0].NumSwitches + fabricinfo_records[0].NumHFIs; if ((cost = calloc(1, num_nodes * sizeof(uint16_t*))) == NULL) { fprintf(stderr, "failed to allocate memory\n"); exitcode = 1; goto cleanup; } /* Query SA for switch records so we can have the names of the switches * as the cost records are defined with LIDs */ selector.InputType = InputTypeNodeType; // select records by type selector.InputValue.NodeRecord.NodeType = IBA_NODE_SWITCH; // select only Switches status = omgt_sa_get_node_records(port, &selector, &num_switch_records, &switch_records); if (OMGT_STATUS_SUCCESS != status) { fprintf(stderr, "failed to execute node record query. MADStatus=0x%x\n", omgt_get_sa_mad_status(port)); exitcode = 1; goto cleanup; } /* Create a linked list of switches in this fabric */ for (i = 0; i < num_switch_records; ++i) { opa_switch * next_switch; if ((next_switch = malloc(sizeof(opa_switch))) == NULL) { fprintf(stderr, "failed to allocate memory\n"); exitcode = 1; goto cleanup; } next_switch->lid = switch_records[i].RID.LID; next_switch->name = get_name(switch_records[i].RID.LID, switch_records, num_switch_records); next_switch->next = NULL; if (temp_switch) temp_switch->next = next_switch; else head_switch = next_switch; temp_switch = next_switch; } temp_switch = NULL; // reset selector to appropriate type for cost query selector.InputType = InputTypeNoInput; status = omgt_sa_get_switchcost_records(port, &selector, &num_cost_records, &cost_records); if (OMGT_STATUS_SUCCESS != status) { fprintf(stderr, "failed to execute cost record query. MADStatus=0x%x\n", omgt_get_sa_mad_status(port)); exitcode = 1; goto cleanup; } /* Copy returned Cost matrix data into our own format * When requesting the entire matrix, only the lower * diagonal is returned as it is symmetric */ int slid, dlid; STL_SWITCH_COST_RECORD cost_record; for (i = 0; i < num_cost_records; ++i) { cost_record = cost_records[i]; slid = cost_record.SLID; /* Cost records contain 64 entries, but all may not be filled * Checking the DLID is > 0 ensures the entry is valid */ for (j = 0; j < STL_SWITCH_COST_NUM_ENTRIES && cost_record.Cost[j].DLID > 0; ++j) { dlid = cost_record.Cost[j].DLID; // Initialize all entries in cost to zero. // The entries of interest will be overwritten with // actual cost values from the SA if (!cost[slid]) { if ((cost[slid] = calloc(1, num_nodes * sizeof(uint16_t))) == NULL) { fprintf(stderr, "failed to allocate memory\n"); exitcode = 1; goto cleanup; } } if (!cost[dlid]) { if ((cost[dlid] = calloc(1, num_nodes * sizeof(uint16_t))) == NULL) { fprintf(stderr, "failed to allocate memory\n"); exitcode = 1; goto cleanup; } } // add this cost to matrix for slid/dlid cost[slid][dlid] = cost[dlid][slid] = cost_record.Cost[j].value; } } print_matrix(cost, head_switch); printf("\nMonitoring for any changes...\n"); fflush(matrix_file); // monitor cluster for changes, -1 = indefinite wait time if ((status = omgt_sa_get_notice_report(port, ¬ice, ¬ice_len, (void **)&context, -1)) != OMGT_STATUS_SUCCESS) { fprintf(stderr, "Error: Could not wait for Notice: %s (%u)\n", omgt_status_totext(status), status); exitcode = 1; goto cleanup; } // we only registered for Cost Matrix Change notice, if we got something else it's an error if (notice->Attributes.Generic.TrapNumber != STL_TRAP_COST_MATRIX_CHANGE) { fprintf(stderr, "Unhandled Trap Received: %u\n", notice->Attributes.Generic.TrapNumber); exitcode = 1; goto cleanup; } cleanup: if (notice) { free(notice); notice = NULL; } if (fabricinfo_records) { omgt_sa_free_records(fabricinfo_records); fabricinfo_records = NULL; } if (cost_records) { omgt_sa_free_records(cost_records); cost_records = NULL; } if (switch_records) { omgt_sa_free_records(switch_records); switch_records = NULL; } free_switch_list(&head_switch); free_cost(&cost, num_nodes); } //end while if ((status = omgt_sa_unregister_trap(port, STL_TRAP_COST_MATRIX_CHANGE)) != OMGT_STATUS_SUCCESS) { fprintf(stderr, "Error: Could not unregister for Trap %u: %s (%u)\n", STL_TRAP_COST_MATRIX_CHANGE, omgt_status_totext(status), status); if (!exitcode) exitcode = 1; } close_port: if (classportinfo_records) omgt_sa_free_records(classportinfo_records); //close our session omgt_close_port(port); close_file: fclose(matrix_file); done: return exitcode; }