Blob Blame History Raw
/* This sample demonstrates the use of the SA swcost query
 * to select a set of nodes/switches for a job
 *
 */
// core API
#include <opamgt.h>
// swcost query
#include <opamgt_sa.h>

// "Edge" Switch
typedef struct {
	STL_LID lid; // identify switches by LID. Easiest when using cost records
	STL_NODE_RECORD *nodes [48]; // list of hosts attached to this switch
	int last_host_index; // index in previous list of last host
} opa_switch;


/* Helper function to retrieve the LID
 * of the neighbor of the supplied node
 * Used to determine edge switches, those with hosts attached
 */
STL_LID get_neighbor (STL_NODE_RECORD *node_record, STL_LINK_RECORD *link_records, int num_link_records)
{
	STL_LID lid1, lid2;
	int i;
	for (i = 0; i < num_link_records; ++i) {
		STL_LINK_RECORD *link_record = &link_records[i];
		lid1 = link_record->RID.FromLID;
		lid2 = link_record->ToLID;

		if (node_record->RID.LID == lid1) {
			return lid2;
		} else if (node_record->RID.LID == lid2) {
			return lid1;
		}
	}

	return -1;
}

/* Helper function to determine if list of lids contains
 * a specific lid
 * */
int contains (STL_LID * array, STL_LID lid, int array_size)
{
	int i;
	for (i = 0; i < array_size; ++i){
		if (array[i] == lid) return 1;
	}
	return 0;
}

int main(int argc, char **argv)
{
	OMGT_STATUS_T status = OMGT_STATUS_SUCCESS;
	int exitcode = 0;

	struct omgt_port *port = NULL;
	int num_host_records, num_link_records, num_cost_records, num_fabricinfo_records, num_classportinfo_records;

	STL_LINK_RECORD *link_records = NULL;
	STL_NODE_RECORD *host_records = NULL;
	STL_FABRICINFO_RECORD *fabricinfo_records = NULL;
	STL_SWITCH_COST_RECORD *cost_records = NULL;
	STL_CLASS_PORT_INFO *classportinfo_records = NULL;

	opa_switch *edge_switches = NULL;
	opa_switch **used_switches = NULL;
	STL_LID *visited_switch_lids = NULL;


	int last_switch_index = 0;
	int i, j;
	int requested_hosts = 0, hosts_needed = 0;
	int DEBUG = 0; //set to turn on some additional output

	if (argc > 1) {
		requested_hosts = strtoul(argv[1], NULL, 0);
		if (DEBUG) printf("Requested Hosts: %u\n", requested_hosts);
	} else {
		fprintf(stderr, "Usage: %s <num_hosts>\n", argv[0]);
		exitcode = 1;
		goto done;
	}

	// create a session
	status = omgt_open_port_by_num(&port, 1, 1, NULL);
	if (OMGT_STATUS_SUCCESS != status) {
		fprintf(stderr, "failed to open port\n");
		exitcode = 1;
		goto done;
	}

	omgt_sa_selector_t selector;
	selector.InputType = InputTypeNoInput;

	/* Initial set-up, request topology information from SA
	 *
	 * We request FabricInfo, Link, and (HFI) Node records.
	 */
	status = omgt_sa_get_fabric_info_records(port, &selector, &num_fabricinfo_records, &fabricinfo_records);
	if (OMGT_STATUS_SUCCESS != status || num_fabricinfo_records < 1) {
		fprintf(stderr, "failed to get fabricinfo. MADStatus=0x%x\n", omgt_get_sa_mad_status(port));
		exitcode = 1;
		goto cleanup;
	}

	if (fabricinfo_records[0].NumSwitches < 1) {
		fprintf(stderr, "No switches in fabric\n");
		exitcode = 1;
		goto cleanup;
	}

	edge_switches = malloc(sizeof(opa_switch) * fabricinfo_records[0].NumSwitches); // list of switches with hosts
	used_switches = malloc(sizeof(opa_switch *) * fabricinfo_records[0].NumSwitches); // list of switches used in job
	visited_switch_lids = malloc(sizeof(STL_LID) * fabricinfo_records[0].NumSwitches); // list of switch lids visited during cost decisions

	if (!edge_switches || !used_switches || ! visited_switch_lids){
		fprintf(stderr, "failed to allocate memory.\n");
		exitcode = 1;
		goto cleanup;
	}

	status = omgt_sa_get_link_records(port, &selector, &num_link_records, &link_records);
	if (OMGT_STATUS_SUCCESS != status) {
		fprintf(stderr, "failed to execute link record query. MADStatus=0x%x\n", omgt_get_sa_mad_status(port));
		exitcode = 1;
		goto cleanup;
	}

	selector.InputType = InputTypeNodeType; // select records by type
	selector.InputValue.NodeRecord.NodeType = IBA_NODE_CHANNEL_ADAPTER; // select only HFIs (Channel Adapter)
	status = omgt_sa_get_node_records(port, &selector, &num_host_records, &host_records);
	if (OMGT_STATUS_SUCCESS != status) {
		fprintf(stderr, "failed to execute node record query. MADStatus=0x%x\n", omgt_get_sa_mad_status(port));
		exitcode = 1;
		goto cleanup;
	}

	if (DEBUG) printf("hosts: %u sws: %u\n", fabricinfo_records[0].NumHFIs, fabricinfo_records[0].NumSwitches);
	if (requested_hosts > fabricinfo_records[0].NumHFIs) {
		fprintf(stderr, "Error: Requested number of hosts exceeds number of hosts available\n");
		exitcode = 1;
		goto cleanup;
	}

	/* Determine the edge switches in the fabric along with associated hosts*/
	for (i = 0; i < num_host_records; ++i) {
		STL_NODE_RECORD *host_record = &host_records[i];

		STL_LID neighbor_lid = get_neighbor(host_record, link_records, num_link_records);
		if (-1 == neighbor_lid){
			fprintf(stderr, "Node missing neighbor\n");
			exitcode = 1;
			goto cleanup;
		}
		int in_list = 0;
		int swIndex = 0;
		//check if switch in list already
		for (j = 0; j < last_switch_index; ++j) {
			if (neighbor_lid == edge_switches[j].lid) {
				in_list = 1;
				swIndex = j;
				break;
			}
		}
		if (!in_list) {
			edge_switches[last_switch_index++].lid = neighbor_lid;
			edge_switches[last_switch_index - 1].nodes[0] = host_record;

		} else {
			//add this host to switch's host list
			edge_switches[swIndex].nodes[++edge_switches[swIndex].last_host_index] = host_record;
		}
	}

	if (DEBUG) {
		printf("Edge Switches:\n");
		for (i = 0; i < last_switch_index; ++i) {
			printf("0x%x\n", edge_switches[i].lid);
			for (j = 0; j <= edge_switches[i].last_host_index; ++j) {
				printf("---Host %s\n", edge_switches[i].nodes[j]->NodeDesc.NodeString);
			}
		}
		printf("\n\n");
	}


	/* Determine which hosts to use, attempt to use hosts on a single switch*/
	int max_hosts_switch_index = 0, max_hosts = 0;
	for (i = 0; i < last_switch_index; ++i) {
		// Save the switch with the max number of hosts for later
		// use if we need hosts on multiple switches
		if (edge_switches[i].last_host_index > max_hosts) {
			max_hosts = edge_switches[i].last_host_index;
			max_hosts_switch_index = i;
		}
		if (edge_switches[i].last_host_index >= requested_hosts - 1) {
			printf("Job Plan: \nSwitch %u\n", edge_switches[i].lid);
			printf("-----------------------\n");
			for (j = 0; j < requested_hosts; ++j) {
				printf("---Host %s\n", edge_switches[i].nodes[j]->NodeDesc.NodeString);
			}
			goto cleanup;
		}
	}

	//===================PATH COSTS============================
	/* There were no switches with the requisite number of hosts,
	 * so use Switch Cost Records to use hosts on least cost
	 * paths. We must check the capability of the SA before using
	 * this feature
	 *
	 *
	 * We can request the "full" cost matrix which includes costs
	 * among all switches (actually only the upper half of the matrix)
	 * OR
	 * We can request one row of the cost matrix. In the following we
	 * request the row corresponding to our previously saved switch. This
	 * gets us the costs from the saved switch to every other switch
	 */

	selector.InputType = InputTypeNoInput;
	status = omgt_sa_get_classportinfo_records(port, &selector, &num_classportinfo_records, &classportinfo_records);
	if (OMGT_STATUS_SUCCESS != status || num_classportinfo_records != 1) {
		fprintf(stderr, "failed to execute classportinfo record query. MADStatus=0x%x\n", omgt_get_sa_mad_status(port));
		exitcode = 1;
		goto cleanup;
	}

	if (! (classportinfo_records[0].CapMask && STL_SA_CAPABILITY2_SWCOSTRECORD_SUPPORT)) {
		fprintf(stderr, "SA does not support switchcost records\n");
		exitcode = 1;
		goto cleanup;
	}

	selector.InputType = InputTypeLid; // Costs can be looked up by LID
	selector.InputValue.SwitchCostRecord.Lid = edge_switches[max_hosts_switch_index].lid;

	status = omgt_sa_get_switchcost_records(port, &selector, &num_cost_records, &cost_records);
	if (OMGT_STATUS_SUCCESS != status) {
		fprintf(stderr, "failed to execute cost record query. MADStatus=0x%x\n", omgt_get_sa_mad_status(port));
		exitcode = 1;
		goto cleanup;
	}

	hosts_needed = requested_hosts - (max_hosts + 1);
	used_switches[0] = &edge_switches[max_hosts_switch_index]; // keep track of edge switches we're using
	STL_LID switch_lid;
	visited_switch_lids[0] = used_switches[0]->lid;  // keep track of switches we've already considered
	int num_switches = 1, num_lids = 1;
	while(hosts_needed > 0 && num_switches < last_switch_index) {
		int min_cost = -1;
		for (i = 0; i < num_cost_records; ++i) {
			for (j = 0; j < STL_SWITCH_COST_NUM_ENTRIES; ++j) {
				if (((min_cost < 0) || (cost_records[i].Cost[j].value < min_cost)) &&
						!contains(visited_switch_lids, cost_records[i].Cost[j].DLID, num_lids) &&
						(cost_records[i].Cost[j].DLID > 0)) {
					//we should also check that this is an edge switch, this is done below
					min_cost = cost_records[i].Cost[j].value;
					switch_lid = cost_records[i].Cost[j].DLID;
				}
			}
		}

		if (min_cost > 0){
			// look up next switch, only include edge switches
			for (i = 0; i < last_switch_index; ++i) {
				if (edge_switches[i].lid == switch_lid) {
					used_switches[num_switches] = &edge_switches[i];
					num_switches++;

					hosts_needed -= (edge_switches[i].last_host_index + 1);
					break;
				}
			}
			visited_switch_lids[num_lids] = switch_lid; //mark this switch as visited
			num_lids++;
		}
	}

	if (hosts_needed > 0) {
		fprintf(stderr, "Error: Could not allocate job hosts\n");
		exitcode = 1;
		goto cleanup;
	}

	hosts_needed = requested_hosts;
	printf("Job Plan: \n");
	printf("-----------------------\n");
	for (i = 0; i < num_switches; ++i){
		printf("Switch %u\n", used_switches[i]->lid);
		for (j = 0; j <= used_switches[i]->last_host_index; ++j) {
			printf("---Host %s\n", used_switches[i]->nodes[j]->NodeDesc.NodeString);
			if (--hosts_needed <= 0) goto cleanup;
		}
	}


cleanup:
	if (edge_switches) free(edge_switches);
	if (used_switches) free(used_switches);
	if (visited_switch_lids) free(visited_switch_lids);

	// free our result buffers
	if (fabricinfo_records) omgt_sa_free_records(fabricinfo_records);
	if (classportinfo_records) omgt_sa_free_records(classportinfo_records);
	if (cost_records) omgt_sa_free_records(cost_records);
	if (host_records) omgt_sa_free_records(host_records);

	// close our session
	omgt_close_port(port);

done:
	return exitcode;
}