/* This sample demonstrates the use of the SA swcost query * to select a set of nodes/switches for a job * */ // core API #include // swcost query #include // "Edge" Switch typedef struct { STL_LID lid; // identify switches by LID. Easiest when using cost records STL_NODE_RECORD *nodes [48]; // list of hosts attached to this switch int last_host_index; // index in previous list of last host } opa_switch; /* Helper function to retrieve the LID * of the neighbor of the supplied node * Used to determine edge switches, those with hosts attached */ STL_LID get_neighbor (STL_NODE_RECORD *node_record, STL_LINK_RECORD *link_records, int num_link_records) { STL_LID lid1, lid2; int i; for (i = 0; i < num_link_records; ++i) { STL_LINK_RECORD *link_record = &link_records[i]; lid1 = link_record->RID.FromLID; lid2 = link_record->ToLID; if (node_record->RID.LID == lid1) { return lid2; } else if (node_record->RID.LID == lid2) { return lid1; } } return -1; } /* Helper function to determine if list of lids contains * a specific lid * */ int contains (STL_LID * array, STL_LID lid, int array_size) { int i; for (i = 0; i < array_size; ++i){ if (array[i] == lid) return 1; } return 0; } int main(int argc, char **argv) { OMGT_STATUS_T status = OMGT_STATUS_SUCCESS; int exitcode = 0; struct omgt_port *port = NULL; int num_host_records, num_link_records, num_cost_records, num_fabricinfo_records, num_classportinfo_records; STL_LINK_RECORD *link_records = NULL; STL_NODE_RECORD *host_records = NULL; STL_FABRICINFO_RECORD *fabricinfo_records = NULL; STL_SWITCH_COST_RECORD *cost_records = NULL; STL_CLASS_PORT_INFO *classportinfo_records = NULL; opa_switch *edge_switches = NULL; opa_switch **used_switches = NULL; STL_LID *visited_switch_lids = NULL; int last_switch_index = 0; int i, j; int requested_hosts = 0, hosts_needed = 0; int DEBUG = 0; //set to turn on some additional output if (argc > 1) { requested_hosts = strtoul(argv[1], NULL, 0); if (DEBUG) printf("Requested Hosts: %u\n", requested_hosts); } else { fprintf(stderr, "Usage: %s \n", argv[0]); exitcode = 1; goto done; } // create a session status = omgt_open_port_by_num(&port, 1, 1, NULL); if (OMGT_STATUS_SUCCESS != status) { fprintf(stderr, "failed to open port\n"); exitcode = 1; goto done; } omgt_sa_selector_t selector; selector.InputType = InputTypeNoInput; /* Initial set-up, request topology information from SA * * We request FabricInfo, Link, and (HFI) Node records. */ status = omgt_sa_get_fabric_info_records(port, &selector, &num_fabricinfo_records, &fabricinfo_records); if (OMGT_STATUS_SUCCESS != status || num_fabricinfo_records < 1) { fprintf(stderr, "failed to get fabricinfo. MADStatus=0x%x\n", omgt_get_sa_mad_status(port)); exitcode = 1; goto cleanup; } if (fabricinfo_records[0].NumSwitches < 1) { fprintf(stderr, "No switches in fabric\n"); exitcode = 1; goto cleanup; } edge_switches = malloc(sizeof(opa_switch) * fabricinfo_records[0].NumSwitches); // list of switches with hosts used_switches = malloc(sizeof(opa_switch *) * fabricinfo_records[0].NumSwitches); // list of switches used in job visited_switch_lids = malloc(sizeof(STL_LID) * fabricinfo_records[0].NumSwitches); // list of switch lids visited during cost decisions if (!edge_switches || !used_switches || ! visited_switch_lids){ fprintf(stderr, "failed to allocate memory.\n"); exitcode = 1; goto cleanup; } status = omgt_sa_get_link_records(port, &selector, &num_link_records, &link_records); if (OMGT_STATUS_SUCCESS != status) { fprintf(stderr, "failed to execute link record query. MADStatus=0x%x\n", omgt_get_sa_mad_status(port)); exitcode = 1; goto cleanup; } selector.InputType = InputTypeNodeType; // select records by type selector.InputValue.NodeRecord.NodeType = IBA_NODE_CHANNEL_ADAPTER; // select only HFIs (Channel Adapter) status = omgt_sa_get_node_records(port, &selector, &num_host_records, &host_records); if (OMGT_STATUS_SUCCESS != status) { fprintf(stderr, "failed to execute node record query. MADStatus=0x%x\n", omgt_get_sa_mad_status(port)); exitcode = 1; goto cleanup; } if (DEBUG) printf("hosts: %u sws: %u\n", fabricinfo_records[0].NumHFIs, fabricinfo_records[0].NumSwitches); if (requested_hosts > fabricinfo_records[0].NumHFIs) { fprintf(stderr, "Error: Requested number of hosts exceeds number of hosts available\n"); exitcode = 1; goto cleanup; } /* Determine the edge switches in the fabric along with associated hosts*/ for (i = 0; i < num_host_records; ++i) { STL_NODE_RECORD *host_record = &host_records[i]; STL_LID neighbor_lid = get_neighbor(host_record, link_records, num_link_records); if (-1 == neighbor_lid){ fprintf(stderr, "Node missing neighbor\n"); exitcode = 1; goto cleanup; } int in_list = 0; int swIndex = 0; //check if switch in list already for (j = 0; j < last_switch_index; ++j) { if (neighbor_lid == edge_switches[j].lid) { in_list = 1; swIndex = j; break; } } if (!in_list) { edge_switches[last_switch_index++].lid = neighbor_lid; edge_switches[last_switch_index - 1].nodes[0] = host_record; } else { //add this host to switch's host list edge_switches[swIndex].nodes[++edge_switches[swIndex].last_host_index] = host_record; } } if (DEBUG) { printf("Edge Switches:\n"); for (i = 0; i < last_switch_index; ++i) { printf("0x%x\n", edge_switches[i].lid); for (j = 0; j <= edge_switches[i].last_host_index; ++j) { printf("---Host %s\n", edge_switches[i].nodes[j]->NodeDesc.NodeString); } } printf("\n\n"); } /* Determine which hosts to use, attempt to use hosts on a single switch*/ int max_hosts_switch_index = 0, max_hosts = 0; for (i = 0; i < last_switch_index; ++i) { // Save the switch with the max number of hosts for later // use if we need hosts on multiple switches if (edge_switches[i].last_host_index > max_hosts) { max_hosts = edge_switches[i].last_host_index; max_hosts_switch_index = i; } if (edge_switches[i].last_host_index >= requested_hosts - 1) { printf("Job Plan: \nSwitch %u\n", edge_switches[i].lid); printf("-----------------------\n"); for (j = 0; j < requested_hosts; ++j) { printf("---Host %s\n", edge_switches[i].nodes[j]->NodeDesc.NodeString); } goto cleanup; } } //===================PATH COSTS============================ /* There were no switches with the requisite number of hosts, * so use Switch Cost Records to use hosts on least cost * paths. We must check the capability of the SA before using * this feature * * * We can request the "full" cost matrix which includes costs * among all switches (actually only the upper half of the matrix) * OR * We can request one row of the cost matrix. In the following we * request the row corresponding to our previously saved switch. This * gets us the costs from the saved switch to every other switch */ selector.InputType = InputTypeNoInput; status = omgt_sa_get_classportinfo_records(port, &selector, &num_classportinfo_records, &classportinfo_records); if (OMGT_STATUS_SUCCESS != status || num_classportinfo_records != 1) { fprintf(stderr, "failed to execute classportinfo record query. MADStatus=0x%x\n", omgt_get_sa_mad_status(port)); exitcode = 1; goto cleanup; } if (! (classportinfo_records[0].CapMask && STL_SA_CAPABILITY2_SWCOSTRECORD_SUPPORT)) { fprintf(stderr, "SA does not support switchcost records\n"); exitcode = 1; goto cleanup; } selector.InputType = InputTypeLid; // Costs can be looked up by LID selector.InputValue.SwitchCostRecord.Lid = edge_switches[max_hosts_switch_index].lid; status = omgt_sa_get_switchcost_records(port, &selector, &num_cost_records, &cost_records); if (OMGT_STATUS_SUCCESS != status) { fprintf(stderr, "failed to execute cost record query. MADStatus=0x%x\n", omgt_get_sa_mad_status(port)); exitcode = 1; goto cleanup; } hosts_needed = requested_hosts - (max_hosts + 1); used_switches[0] = &edge_switches[max_hosts_switch_index]; // keep track of edge switches we're using STL_LID switch_lid; visited_switch_lids[0] = used_switches[0]->lid; // keep track of switches we've already considered int num_switches = 1, num_lids = 1; while(hosts_needed > 0 && num_switches < last_switch_index) { int min_cost = -1; for (i = 0; i < num_cost_records; ++i) { for (j = 0; j < STL_SWITCH_COST_NUM_ENTRIES; ++j) { if (((min_cost < 0) || (cost_records[i].Cost[j].value < min_cost)) && !contains(visited_switch_lids, cost_records[i].Cost[j].DLID, num_lids) && (cost_records[i].Cost[j].DLID > 0)) { //we should also check that this is an edge switch, this is done below min_cost = cost_records[i].Cost[j].value; switch_lid = cost_records[i].Cost[j].DLID; } } } if (min_cost > 0){ // look up next switch, only include edge switches for (i = 0; i < last_switch_index; ++i) { if (edge_switches[i].lid == switch_lid) { used_switches[num_switches] = &edge_switches[i]; num_switches++; hosts_needed -= (edge_switches[i].last_host_index + 1); break; } } visited_switch_lids[num_lids] = switch_lid; //mark this switch as visited num_lids++; } } if (hosts_needed > 0) { fprintf(stderr, "Error: Could not allocate job hosts\n"); exitcode = 1; goto cleanup; } hosts_needed = requested_hosts; printf("Job Plan: \n"); printf("-----------------------\n"); for (i = 0; i < num_switches; ++i){ printf("Switch %u\n", used_switches[i]->lid); for (j = 0; j <= used_switches[i]->last_host_index; ++j) { printf("---Host %s\n", used_switches[i]->nodes[j]->NodeDesc.NodeString); if (--hosts_needed <= 0) goto cleanup; } } cleanup: if (edge_switches) free(edge_switches); if (used_switches) free(used_switches); if (visited_switch_lids) free(visited_switch_lids); // free our result buffers if (fabricinfo_records) omgt_sa_free_records(fabricinfo_records); if (classportinfo_records) omgt_sa_free_records(classportinfo_records); if (cost_records) omgt_sa_free_records(cost_records); if (host_records) omgt_sa_free_records(host_records); // close our session omgt_close_port(port); done: return exitcode; }