Blame psm_context.c

Packit 961e70
/*
Packit 961e70
Packit 961e70
  This file is provided under a dual BSD/GPLv2 license.  When using or
Packit 961e70
  redistributing this file, you may do so under either license.
Packit 961e70
Packit 961e70
  GPL LICENSE SUMMARY
Packit 961e70
Packit 961e70
  Copyright(c) 2015 Intel Corporation.
Packit 961e70
Packit 961e70
  This program is free software; you can redistribute it and/or modify
Packit 961e70
  it under the terms of version 2 of the GNU General Public License as
Packit 961e70
  published by the Free Software Foundation.
Packit 961e70
Packit 961e70
  This program is distributed in the hope that it will be useful, but
Packit 961e70
  WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 961e70
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 961e70
  General Public License for more details.
Packit 961e70
Packit 961e70
  Contact Information:
Packit 961e70
  Intel Corporation, www.intel.com
Packit 961e70
Packit 961e70
  BSD LICENSE
Packit 961e70
Packit 961e70
  Copyright(c) 2015 Intel Corporation.
Packit 961e70
Packit 961e70
  Redistribution and use in source and binary forms, with or without
Packit 961e70
  modification, are permitted provided that the following conditions
Packit 961e70
  are met:
Packit 961e70
Packit 961e70
    * Redistributions of source code must retain the above copyright
Packit 961e70
      notice, this list of conditions and the following disclaimer.
Packit 961e70
    * Redistributions in binary form must reproduce the above copyright
Packit 961e70
      notice, this list of conditions and the following disclaimer in
Packit 961e70
      the documentation and/or other materials provided with the
Packit 961e70
      distribution.
Packit 961e70
    * Neither the name of Intel Corporation nor the names of its
Packit 961e70
      contributors may be used to endorse or promote products derived
Packit 961e70
      from this software without specific prior written permission.
Packit 961e70
Packit 961e70
  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
Packit 961e70
  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
Packit 961e70
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
Packit 961e70
  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
Packit 961e70
  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
Packit 961e70
  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
Packit 961e70
  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
Packit 961e70
  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
Packit 961e70
  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
Packit 961e70
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
Packit 961e70
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Packit 961e70
Packit 961e70
*/
Packit 961e70
Packit 961e70
/* Copyright (c) 2003-2015 Intel Corporation. All rights reserved. */
Packit 961e70
Packit 961e70
#include <sys/types.h>
Packit 961e70
#include <sys/stat.h>
Packit 961e70
#include "psm_user.h"
Packit 961e70
#include "psm2_hal.h"
Packit 961e70
Packit 961e70
static int psmi_get_hfi_selection_algorithm(void);
Packit 961e70
Packit 961e70
psm2_error_t psmi_context_interrupt_set(psmi_context_t *context, int enable)
Packit 961e70
{
Packit 961e70
	int poll_type;
Packit 961e70
	int ret;
Packit 961e70
Packit 961e70
	if (!enable == !psmi_hal_has_sw_status(PSM_HAL_PSMI_RUNTIME_INTR_ENABLED))
Packit 961e70
		return PSM2_OK;
Packit 961e70
Packit 961e70
	if (enable)
Packit 961e70
		poll_type = PSMI_HAL_POLL_TYPE_URGENT;
Packit 961e70
	else
Packit 961e70
		poll_type = 0;
Packit 961e70
Packit 961e70
	ret = psmi_hal_poll_type(poll_type, context->psm_hw_ctxt);
Packit 961e70
Packit 961e70
	if (ret != 0)
Packit 961e70
		return PSM2_EP_NO_RESOURCES;
Packit 961e70
	else {
Packit 961e70
		if (enable)
Packit 961e70
			psmi_hal_add_sw_status(PSM_HAL_PSMI_RUNTIME_INTR_ENABLED);
Packit 961e70
		else
Packit 961e70
			psmi_hal_sub_sw_status(PSM_HAL_PSMI_RUNTIME_INTR_ENABLED);
Packit 961e70
		return PSM2_OK;
Packit 961e70
	}
Packit 961e70
}
Packit 961e70
Packit 961e70
int psmi_context_interrupt_isenabled(psmi_context_t *context)
Packit 961e70
{
Packit 961e70
	return psmi_hal_has_sw_status(PSM_HAL_PSMI_RUNTIME_INTR_ENABLED);
Packit 961e70
}
Packit 961e70
Packit 961e70
/* Returns 1 when all of the active units have their free contexts
Packit 961e70
 * equal the number of contexts.  This is an indication that no
Packit 961e70
 * jobs are currently running.
Packit 961e70
 *
Packit 961e70
 * Note that this code is clearly racy (this code may happen concurrently
Packit 961e70
 * by two or more processes, and this point of observation,
Packit 961e70
 * occurs earlier in time to when the decision is made for deciding which
Packit 961e70
 * context to assign, which will also occurs earlier in time to when the
Packit 961e70
 * context is actually assigned.  And, when the context is finally
Packit 961e70
 * assigned, this will change the "nfreectxts" observed below.)
Packit 961e70
 */
Packit 961e70
static int psmi_all_active_units_have_max_freecontexts(int nunits)
Packit 961e70
{
Packit 961e70
	int u;
Packit 961e70
Packit 961e70
	for (u=0;u < nunits;u++)
Packit 961e70
	{
Packit 961e70
		if (psmi_hal_get_unit_active(u) > 0)
Packit 961e70
		{
Packit 961e70
			int nfreectxts=psmi_hal_get_num_free_contexts(u),
Packit 961e70
				nctxts=psmi_hal_get_num_contexts(u);
Packit 961e70
			if (nfreectxts > 0 && nctxts > 0)
Packit 961e70
			{
Packit 961e70
				if (nfreectxts != nctxts)
Packit 961e70
					return 0;
Packit 961e70
			}
Packit 961e70
		}
Packit 961e70
	}
Packit 961e70
	return 1;
Packit 961e70
}
Packit 961e70
Packit 961e70
/* returns the integer value of an environment variable, or 0 if the environment
Packit 961e70
 * variable is not set. */
Packit 961e70
static int psmi_get_envvar(const char *env)
Packit 961e70
{
Packit 961e70
	const char *env_val = getenv(env);
Packit 961e70
Packit 961e70
	if (env_val && *env_val)
Packit 961e70
	{
Packit 961e70
		int r = atoi(env_val);
Packit 961e70
		return (r >= 0) ? r : 0;
Packit 961e70
	}
Packit 961e70
	return 0;
Packit 961e70
}
Packit 961e70
Packit 961e70
/* returns the 8-bit hash value of an uuid. */
Packit 961e70
static inline
Packit 961e70
uint8_t
Packit 961e70
psmi_get_uuid_hash(psm2_uuid_t const uuid)
Packit 961e70
{
Packit 961e70
	int i;
Packit 961e70
	uint8_t hashed_uuid = 0;
Packit 961e70
Packit 961e70
	for (i=0; i < sizeof(psm2_uuid_t); ++i)
Packit 961e70
		hashed_uuid ^= *((uint8_t const *)uuid + i);
Packit 961e70
Packit 961e70
	return hashed_uuid;
Packit 961e70
}
Packit 961e70
Packit 961e70
int psmi_get_current_proc_location()
Packit 961e70
{
Packit 961e70
        int core_id, node_id;
Packit 961e70
Packit 961e70
	core_id = sched_getcpu();
Packit 961e70
	if (core_id < 0)
Packit 961e70
		return -EINVAL;
Packit 961e70
Packit 961e70
	node_id = numa_node_of_cpu(core_id);
Packit 961e70
	if (node_id < 0)
Packit 961e70
		return -EINVAL;
Packit 961e70
Packit 961e70
	return node_id;
Packit 961e70
}
Packit 961e70
Packit 961e70
static void
Packit 961e70
psmi_spread_hfi_selection(psm2_uuid_t const job_key, long *unit_start,
Packit 961e70
			     long *unit_end, int nunits)
Packit 961e70
{
Packit 961e70
	/* if the number of ranks on the host is 1 and ... */
Packit 961e70
	if ((psmi_get_envvar("MPI_LOCALNRANKS") == 1) &&
Packit 961e70
		/*
Packit 961e70
		 * All of the active units have free contexts equal the
Packit 961e70
		 * number of contexts.
Packit 961e70
		 */
Packit 961e70
	    psmi_all_active_units_have_max_freecontexts(nunits)) {
Packit 961e70
		/* we start looking at unit 0, and end at nunits-1: */
Packit 961e70
		*unit_start = 0;
Packit 961e70
		*unit_end = nunits - 1;
Packit 961e70
	} else {
Packit 961e70
		/* else, we are going to look at:
Packit 961e70
		   (a hash of the job key plus the local rank id) mod nunits. */
Packit 961e70
Packit 961e70
		*unit_start = (psmi_get_envvar("MPI_LOCALRANKID") +
Packit 961e70
			psmi_get_uuid_hash(job_key)) % nunits;
Packit 961e70
		if (*unit_start > 0)
Packit 961e70
			*unit_end = *unit_start - 1;
Packit 961e70
		else
Packit 961e70
			*unit_end = nunits-1;
Packit 961e70
	}
Packit 961e70
}
Packit 961e70
Packit 961e70
static int
Packit 961e70
psmi_create_and_open_affinity_shm(psm2_uuid_t const job_key)
Packit 961e70
{
Packit 961e70
	int shm_fd, ret;
Packit 961e70
	int first_to_create = 0;
Packit 961e70
	size_t shm_name_len = 256;
Packit 961e70
	shared_affinity_ptr = NULL;
Packit 961e70
	affinity_shm_name = NULL;
Packit 961e70
	affinity_shm_name = (char *) psmi_malloc(PSMI_EP_NONE, UNDEFINED, shm_name_len);
Packit 961e70
Packit 961e70
	psmi_assert_always(affinity_shm_name != NULL);
Packit 961e70
	snprintf(affinity_shm_name, shm_name_len,
Packit 961e70
		 AFFINITY_SHM_BASENAME".%d",
Packit 961e70
		 psmi_get_uuid_hash(job_key));
Packit 961e70
	shm_fd = shm_open(affinity_shm_name, O_RDWR | O_CREAT | O_EXCL,
Packit 961e70
			  S_IRUSR | S_IWUSR);
Packit 961e70
	if ((shm_fd < 0) && (errno == EEXIST)) {
Packit 961e70
		shm_fd = shm_open(affinity_shm_name, O_RDWR, S_IRUSR | S_IWUSR);
Packit 961e70
		if (shm_fd < 0) {
Packit 961e70
			_HFI_VDBG("Cannot open affinity shared mem fd:%s, errno=%d\n",
Packit 961e70
				  affinity_shm_name, errno);
Packit 961e70
			return shm_fd;
Packit 961e70
		}
Packit 961e70
	} else if (shm_fd > 0) {
Packit 961e70
		first_to_create = 1;
Packit 961e70
	} else {
Packit 961e70
		_HFI_VDBG("Cannot create affinity shared mem fd:%s, errno=%d\n",
Packit 961e70
			  affinity_shm_name, errno);
Packit 961e70
	}
Packit 961e70
Packit 961e70
	ret = ftruncate(shm_fd, AFFINITY_SHMEMSIZE);
Packit Service 7ed5cc
	if ( ret < 0 ) {
Packit Service 7ed5cc
		_HFI_VDBG("Cannot truncate affinity shared mem fd:%s, errno=%d\n",
Packit Service 7ed5cc
			affinity_shm_name, errno);
Packit Service 7ed5cc
		if (shm_fd >= 0) close(shm_fd);
Packit 961e70
		return ret;
Packit Service 7ed5cc
	}
Packit 961e70
Packit 961e70
	shared_affinity_ptr = (uint64_t *) mmap(NULL, AFFINITY_SHMEMSIZE, PROT_READ | PROT_WRITE,
Packit 961e70
					MAP_SHARED, shm_fd, 0);
Packit 961e70
	if (shared_affinity_ptr == MAP_FAILED) {
Packit 961e70
		_HFI_VDBG("Cannot mmap affinity shared memory. errno=%d\n",
Packit 961e70
			  errno);
Packit 961e70
		close(shm_fd);
Packit 961e70
		return -1;
Packit 961e70
	}
Packit 961e70
	close(shm_fd);
Packit 961e70
Packit 961e70
	psmi_affinity_shared_file_opened = 1;
Packit 961e70
Packit 961e70
	if (first_to_create) {
Packit 961e70
		_HFI_VDBG("Creating shm to store HFI affinity per socket\n");
Packit 961e70
Packit 961e70
		memset(shared_affinity_ptr, 0, AFFINITY_SHMEMSIZE);
Packit 961e70
Packit 961e70
		/*
Packit 961e70
		 * Once shm object is initialized, unlock others to be able to
Packit 961e70
		 * use it.
Packit 961e70
		 */
Packit 961e70
		psmi_sem_post(sem_affinity_shm_rw, sem_affinity_shm_rw_name);
Packit 961e70
	} else {
Packit 961e70
		_HFI_VDBG("Opening shm object to read/write HFI affinity per socket\n");
Packit 961e70
	}
Packit 961e70
Packit 961e70
	/*
Packit 961e70
	 * Start critical section to increment reference count when creating
Packit 961e70
	 * or opening shm object. Decrement of ref count will be done before
Packit 961e70
	 * closing the shm.
Packit 961e70
	 */
Packit 961e70
	if (psmi_sem_timedwait(sem_affinity_shm_rw, sem_affinity_shm_rw_name)) {
Packit 961e70
		_HFI_VDBG("Could not enter critical section to update shm refcount\n");
Packit 961e70
		return -1;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	shared_affinity_ptr[AFFINITY_SHM_REF_COUNT_LOCATION] += 1;
Packit 961e70
Packit 961e70
	/* End critical section */
Packit 961e70
	psmi_sem_post(sem_affinity_shm_rw, sem_affinity_shm_rw_name);
Packit 961e70
Packit 961e70
	return 0;
Packit 961e70
}
Packit 961e70
Packit 961e70
/*
Packit 961e70
 * Spread HFI selection between units if we find more than one within a socket.
Packit 961e70
 */
Packit 961e70
static void
Packit 961e70
psmi_spread_hfi_within_socket(long *unit_start, long *unit_end, int node_id,
Packit 961e70
			      int *saved_hfis, int found, psm2_uuid_t const job_key)
Packit 961e70
{
Packit 961e70
	int ret, shm_location;
Packit 961e70
Packit 961e70
	/*
Packit 961e70
	 * Take affinity lock and open shared memory region to be able to
Packit 961e70
	 * accurately determine which HFI to pick for this process. If any
Packit 961e70
	 * issues, bail by picking first known HFI.
Packit 961e70
	 */
Packit 961e70
	if (!psmi_affinity_semaphore_open)
Packit 961e70
		goto spread_hfi_fallback;
Packit 961e70
Packit 961e70
	ret = psmi_create_and_open_affinity_shm(job_key);
Packit 961e70
	if (ret < 0)
Packit 961e70
		goto spread_hfi_fallback;
Packit 961e70
Packit 961e70
	shm_location = AFFINITY_SHM_HFI_INDEX_LOCATION + node_id;
Packit 961e70
	if (shm_location > AFFINITY_SHMEMSIZE)
Packit 961e70
		goto spread_hfi_fallback;
Packit 961e70
Packit 961e70
	/* Start critical section to read/write shm object */
Packit 961e70
	if (psmi_sem_timedwait(sem_affinity_shm_rw, sem_affinity_shm_rw_name)) {
Packit 961e70
		_HFI_VDBG("Could not enter critical section to update HFI index\n");
Packit 961e70
		goto spread_hfi_fallback;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	*unit_start = *unit_end = shared_affinity_ptr[shm_location];
Packit 961e70
	shared_affinity_ptr[shm_location] =
Packit 961e70
		(shared_affinity_ptr[shm_location] + 1) % found;
Packit 961e70
	_HFI_VDBG("Selected HFI index= %ld, Next HFI=%ld, node = %d, local rank=%d, found=%d.\n",
Packit 961e70
		  *unit_start, shared_affinity_ptr[shm_location], node_id,
Packit 961e70
		  psmi_get_envvar("MPI_LOCALRANKID"), found);
Packit 961e70
Packit 961e70
	/* End Critical Section */
Packit 961e70
	psmi_sem_post(sem_affinity_shm_rw, sem_affinity_shm_rw_name);
Packit 961e70
Packit 961e70
	return;
Packit 961e70
Packit 961e70
spread_hfi_fallback:
Packit 961e70
	*unit_start = *unit_end = saved_hfis[0];
Packit 961e70
}
Packit 961e70
Packit 961e70
static void
Packit 961e70
psmi_create_affinity_semaphores(psm2_uuid_t const job_key)
Packit 961e70
{
Packit 961e70
	int ret;
Packit 961e70
	sem_affinity_shm_rw_name = NULL;
Packit 961e70
	size_t sem_len = 256;
Packit 961e70
Packit 961e70
	/*
Packit 961e70
	 * If already opened, no need to do anything else.
Packit 961e70
	 * This could be true for Multi-EP cases where a different thread has
Packit 961e70
	 * already created the semaphores. We don't need separate locks here as
Packit 961e70
	 * we are protected by the overall "psmi_creation_lock" which each
Packit 961e70
	 * thread will take in psm2_ep_open()
Packit 961e70
	 */
Packit 961e70
	if (psmi_affinity_semaphore_open)
Packit 961e70
		return;
Packit 961e70
Packit 961e70
	sem_affinity_shm_rw_name = (char *) psmi_malloc(PSMI_EP_NONE, UNDEFINED, sem_len);
Packit 961e70
	psmi_assert_always(sem_affinity_shm_rw_name != NULL);
Packit 961e70
	snprintf(sem_affinity_shm_rw_name, sem_len,
Packit 961e70
		 SEM_AFFINITY_SHM_RW_BASENAME".%d",
Packit 961e70
		 psmi_get_uuid_hash(job_key));
Packit 961e70
Packit 961e70
	ret = psmi_init_semaphore(&sem_affinity_shm_rw, sem_affinity_shm_rw_name,
Packit 961e70
				  S_IRUSR | S_IWUSR, 0);
Packit 961e70
	if (ret) {
Packit 961e70
		_HFI_VDBG("Cannot initialize semaphore: %s for read-write access to shm object.\n",
Packit 961e70
			  sem_affinity_shm_rw_name);
Packit 961e70
		sem_close(sem_affinity_shm_rw);
Packit 961e70
		psmi_free(sem_affinity_shm_rw_name);
Packit 961e70
		sem_affinity_shm_rw_name = NULL;
Packit 961e70
		return;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	_HFI_VDBG("Semaphore: %s created for read-write access to shm object.\n",
Packit 961e70
		  sem_affinity_shm_rw_name);
Packit 961e70
Packit 961e70
	psmi_affinity_semaphore_open = 1;
Packit 961e70
Packit 961e70
	return;
Packit 961e70
}
Packit 961e70
Packit 961e70
static
Packit 961e70
psm2_error_t
Packit 961e70
psmi_compute_start_and_end_unit(long unit_param,int nunitsactive,int nunits,
Packit 961e70
				psm2_uuid_t const job_key,
Packit 961e70
				long *unit_start,long *unit_end)
Packit 961e70
{
Packit 961e70
	unsigned short hfi_sel_alg = PSMI_UNIT_SEL_ALG_ACROSS;
Packit 961e70
	int node_id, unit_id, found = 0;
Packit 961e70
	int saved_hfis[nunits];
Packit 961e70
Packit 961e70
	/* if the user did not set HFI_UNIT then ... */
Packit 961e70
	if (unit_param == HFI_UNIT_ID_ANY)
Packit 961e70
	{
Packit 961e70
		/* Get the actual selection algorithm from the environment: */
Packit 961e70
		hfi_sel_alg = psmi_get_hfi_selection_algorithm();
Packit 961e70
		/* If round-robin is selection algorithm and ... */
Packit 961e70
		if ((hfi_sel_alg == PSMI_UNIT_SEL_ALG_ACROSS) &&
Packit 961e70
		    /* there are more than 1 active units then ... */
Packit 961e70
		    (nunitsactive > 1))
Packit 961e70
		{
Packit 961e70
			/*
Packit 961e70
			 * Pick first HFI we find on same root complex
Packit 961e70
			 * as current task. If none found, fall back to
Packit 961e70
			 * load-balancing algorithm.
Packit 961e70
			 */
Packit 961e70
			node_id = psmi_get_current_proc_location();
Packit 961e70
			if (node_id >= 0) {
Packit 961e70
				for (unit_id = 0; unit_id < nunits; unit_id++) {
Packit 961e70
					if (psmi_hal_get_unit_active(unit_id) <= 0)
Packit 961e70
						continue;
Packit 961e70
Packit 961e70
					int node_id_i;
Packit 961e70
Packit 961e70
					if (!psmi_hal_get_node_id(unit_id, &node_id_i)) {
Packit 961e70
						if (node_id_i == node_id) {
Packit 961e70
							saved_hfis[found] = unit_id;
Packit 961e70
							found++;
Packit 961e70
						}
Packit 961e70
					}
Packit 961e70
				}
Packit 961e70
Packit 961e70
				if (found > 1) {
Packit 961e70
					psmi_create_affinity_semaphores(job_key);
Packit 961e70
					psmi_spread_hfi_within_socket(unit_start, unit_end,
Packit 961e70
								      node_id, saved_hfis,
Packit 961e70
								      found, job_key);
Packit 961e70
				} else if (found == 1) {
Packit 961e70
					*unit_start = *unit_end = saved_hfis[0];
Packit 961e70
				}
Packit 961e70
			}
Packit 961e70
Packit 961e70
			if (node_id < 0 || !found) {
Packit 961e70
				psmi_spread_hfi_selection(job_key, unit_start,
Packit 961e70
							  unit_end, nunits);
Packit 961e70
			}
Packit 961e70
		} else if ((hfi_sel_alg == PSMI_UNIT_SEL_ALG_ACROSS_ALL) &&
Packit 961e70
			 (nunitsactive > 1)) {
Packit 961e70
				psmi_spread_hfi_selection(job_key, unit_start,
Packit 961e70
							  unit_end, nunits);
Packit 961e70
		}
Packit 961e70
		else {
Packit 961e70
			*unit_start = 0;
Packit 961e70
			*unit_end = nunits - 1;
Packit 961e70
		}
Packit 961e70
	} else if (unit_param >= 0) {
Packit 961e70
		/* the user specified HFI_UNIT, we use it. */
Packit 961e70
		*unit_start = *unit_end = unit_param;
Packit 961e70
	} else {
Packit 961e70
		psmi_handle_error(NULL, PSM2_EP_DEVICE_FAILURE,
Packit 961e70
				 "PSM2 can't open unit: %ld for reading and writing",
Packit 961e70
				 unit_param);
Packit 961e70
		return PSM2_EP_DEVICE_FAILURE;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	return PSM2_OK;
Packit 961e70
}
Packit 961e70
Packit 961e70
psm2_error_t
Packit 961e70
psmi_context_open(const psm2_ep_t ep, long unit_param, long port,
Packit 961e70
		  psm2_uuid_t const job_key, int64_t timeout_ns,
Packit 961e70
		  psmi_context_t *context)
Packit 961e70
{
Packit 961e70
	long open_timeout = 0, unit_start, unit_end, unit_id, unit_id_prev;
Packit 961e70
	psm2_error_t err = PSM2_OK;
Packit 961e70
	int nunits = psmi_hal_get_num_units(), nunitsactive=0;
Packit 961e70
Packit 961e70
	/*
Packit 961e70
	 * If shared contexts are enabled, try our best to schedule processes
Packit 961e70
	 * across one or many devices
Packit 961e70
	 */
Packit 961e70
Packit 961e70
	/* if no units, then no joy. */
Packit 961e70
	if (nunits <= 0)
Packit 961e70
	{
Packit 961e70
		err = psmi_handle_error(NULL, PSM2_EP_DEVICE_FAILURE,
Packit 961e70
					"PSM2 no hfi units are available");
Packit 961e70
		goto ret;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	/* Calculate the number of active units: */
Packit 961e70
	for (unit_id=0;unit_id < nunits;unit_id++)
Packit 961e70
	{
Packit 961e70
		if (psmi_hal_get_unit_active(unit_id) > 0)
Packit 961e70
			nunitsactive++;
Packit 961e70
	}
Packit 961e70
	/* if no active units, then no joy. */
Packit 961e70
	if (nunitsactive == 0)
Packit 961e70
	{
Packit 961e70
		err = psmi_handle_error(NULL, PSM2_EP_DEVICE_FAILURE,
Packit 961e70
					"PSM2 no hfi units are active");
Packit 961e70
		goto ret;
Packit 961e70
	}
Packit 961e70
	if (timeout_ns > 0)
Packit 961e70
		open_timeout = (long)(timeout_ns / MSEC_ULL);
Packit 961e70
Packit 961e70
Packit 961e70
	unit_start = 0; unit_end = nunits - 1;
Packit 961e70
	err = psmi_compute_start_and_end_unit(unit_param, nunitsactive,
Packit 961e70
					      nunits, job_key,
Packit 961e70
					      &unit_start, &unit_end);
Packit 961e70
	if (err != PSM2_OK)
Packit 961e70
		return err;
Packit 961e70
Packit 961e70
	/* this is the start of a loop that starts at unit_start and goes to unit_end.
Packit 961e70
	   but note that the way the loop computes the loop control variable is by
Packit 961e70
	   an expression involving the mod operator. */
Packit 961e70
	int success = 0;
Packit 961e70
	unit_id_prev = unit_id = unit_start;
Packit 961e70
	do
Packit 961e70
	{
Packit 961e70
		/* close previous opened unit fd before attempting open of current unit. */
Packit 961e70
		if (psmi_hal_get_fd(context->psm_hw_ctxt) > 0) {
Packit 961e70
			psmi_hal_close_context(&context->psm_hw_ctxt);
Packit 961e70
			context->psm_hw_ctxt = 0;
Packit 961e70
		}
Packit 961e70
Packit 961e70
		/* if the unit_id is not active, go to next one. */
Packit 961e70
		if (psmi_hal_get_unit_active(unit_id) <= 0) {
Packit 961e70
			unit_id_prev = unit_id;
Packit 961e70
			unit_id = (unit_id + 1) % nunits;
Packit 961e70
			continue;
Packit 961e70
		}
Packit 961e70
Packit 961e70
		/* open this unit. */
Packit 961e70
		int rv = psmi_hal_context_open(unit_id, port, open_timeout,
Packit 961e70
					       ep, job_key, context,
Packit 961e70
					       psmi_hal_has_sw_status(PSM_HAL_PSMI_RUNTIME_RX_THREAD_STARTED),
Packit 961e70
					       HAL_CONTEXT_OPEN_RETRY_MAX);
Packit 961e70
Packit 961e70
		/* go to next unit if failed to open. */
Packit 961e70
		if (rv || context->psm_hw_ctxt == NULL) {
Packit 961e70
			unit_id_prev = unit_id;
Packit 961e70
			unit_id = (unit_id + 1) % nunits;
Packit 961e70
			continue;
Packit 961e70
		}
Packit 961e70
Packit 961e70
		success = 1;
Packit 961e70
		break;
Packit 961e70
Packit 961e70
	} while (unit_id_prev != unit_end);
Packit 961e70
Packit 961e70
	if (!success)
Packit 961e70
	{
Packit 961e70
		err = psmi_handle_error(NULL, PSM2_EP_DEVICE_FAILURE,
Packit 961e70
					"PSM2 can't open hfi unit: %ld",unit_param);
Packit 961e70
		goto bail;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	context->ep = (psm2_ep_t) ep;
Packit 961e70
Packit 961e70
	/* Check backward compatibility bits here and save the info */
Packit 961e70
	if (psmi_hal_has_cap(PSM_HAL_CAP_GPUDIRECT_OT))
Packit Service 7ed5cc
	{
Packit Service 7ed5cc
#ifdef PSM_CUDA
Packit 961e70
		is_driver_gpudirect_enabled = 1;
Packit Service 7ed5cc
#else
Packit Service 7ed5cc
		psmi_handle_error(PSMI_EP_NORETURN, PSM2_INTERNAL_ERR, "FATAL ERROR: "
Packit Service 7ed5cc
				  "CUDA version of hfi1 driver is loaded with non-CUDA version of "
Packit Service 7ed5cc
				  "psm2 library.\n");
Packit Service 7ed5cc
#endif
Packit Service 7ed5cc
	}
Packit Service 7ed5cc
#ifdef PSM_CUDA
Packit Service 7ed5cc
	else
Packit Service 7ed5cc
		fprintf(stderr,"WARNING: running CUDA version of libpsm2 with non CUDA version of hfi1 driver.\n");
Packit 961e70
#endif
Packit 961e70
	_HFI_VDBG("hfi_userinit() passed.\n");
Packit 961e70
Packit 961e70
	/* Fetch hw parameters from HAL (that were obtained during opening the context above. */
Packit 961e70
Packit 961e70
	int lid           = psmi_hal_get_lid(context->psm_hw_ctxt);
Packit 961e70
	ep->unit_id       = psmi_hal_get_unit_id(context->psm_hw_ctxt);
Packit 961e70
	ep->portnum       = psmi_hal_get_port_num(context->psm_hw_ctxt);
Packit 961e70
	ep->gid_lo        = psmi_hal_get_gid_lo(context->psm_hw_ctxt);
Packit 961e70
	ep->gid_hi        = psmi_hal_get_gid_hi(context->psm_hw_ctxt);
Packit 961e70
	int ctxt          = psmi_hal_get_context(context->psm_hw_ctxt);
Packit 961e70
	int subctxt       = psmi_hal_get_subctxt(context->psm_hw_ctxt);
Packit 961e70
	uint32_t hfi_type = psmi_hal_get_hfi_type(context->psm_hw_ctxt);
Packit 961e70
	context->ep       = (psm2_ep_t) ep;
Packit 961e70
Packit 961e70
	/* Construct epid for this Endpoint */
Packit 961e70
Packit 961e70
	switch (PSMI_EPID_VERSION) {
Packit 961e70
		case PSMI_EPID_V1:
Packit 961e70
			context->epid = PSMI_EPID_PACK_V1(lid, ctxt,
Packit 961e70
								subctxt,
Packit 961e70
								ep->unit_id,
Packit 961e70
								PSMI_EPID_VERSION, 0x3ffffff);
Packit 961e70
			break;
Packit 961e70
		case PSMI_EPID_V2:
Packit 961e70
			context->epid = PSMI_EPID_PACK_V2(lid, ctxt,
Packit 961e70
								subctxt,
Packit 961e70
								PSMI_EPID_IPS_SHM, /*Not a only-shm epid */
Packit 961e70
								PSMI_EPID_VERSION, ep->gid_hi);
Packit 961e70
			break;
Packit 961e70
		default:
Packit 961e70
			/* Epid version is greater than max supportd version. */
Packit 961e70
			psmi_assert_always(PSMI_EPID_VERSION <= PSMI_EPID_V2);
Packit 961e70
			break;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	_HFI_VDBG
Packit 961e70
	    ("construct epid: lid %d ctxt %d subctxt %d hcatype %d mtu %d\n",
Packit 961e70
	     lid, ctxt,
Packit 961e70
	     subctxt, hfi_type, ep->mtu);
Packit 961e70
Packit 961e70
	goto ret;
Packit 961e70
Packit 961e70
bail:
Packit 961e70
	_HFI_PRDBG("open failed: unit_id: %ld, err: %d (%s)\n", unit_id, err, strerror(errno));
Packit 961e70
	if (psmi_hal_get_fd(context->psm_hw_ctxt) > 0)
Packit 961e70
		psmi_hal_close_context(&context->psm_hw_ctxt);
Packit 961e70
ret:
Packit 961e70
Packit 961e70
	_HFI_VDBG("psmi_context_open() return %d\n", err);
Packit 961e70
	return err;
Packit 961e70
}
Packit 961e70
Packit 961e70
psm2_error_t psmi_context_close(psmi_context_t *context)
Packit 961e70
{
Packit 961e70
	if (psmi_hal_get_fd(context->psm_hw_ctxt) > 0)
Packit 961e70
		psmi_hal_close_context(&context->psm_hw_ctxt);
Packit 961e70
Packit 961e70
	return PSM2_OK;
Packit 961e70
}
Packit 961e70
Packit 961e70
/*
Packit 961e70
 * This function works whether a context is initialized or not in a psm2_ep.
Packit 961e70
 *
Packit 961e70
 * Returns one of
Packit 961e70
 *
Packit 961e70
 * PSM2_OK: Port status is ok (or context not initialized yet but still "ok")
Packit 961e70
 * PSM2_OK_NO_PROGRESS: Cable pulled
Packit 961e70
 * PSM2_EP_NO_NETWORK: No network, no lid, ...
Packit 961e70
 * PSM2_EP_DEVICE_FAILURE: Chip failures, rxe/txe parity, etc.
Packit 961e70
 * The message follows the per-port status
Packit 961e70
 * As of 7322-ready driver, need to check port-specific qword for IB
Packit 961e70
 * as well as older unit-only.  For now, we don't have the port interface
Packit 961e70
 * defined, so just check port 0 qword for spi_status
Packit 961e70
 */
Packit 961e70
psm2_error_t psmi_context_check_status(const psmi_context_t *contexti)
Packit 961e70
{
Packit 961e70
	psm2_error_t err = PSM2_OK;
Packit 961e70
	psmi_context_t *context = (psmi_context_t *) contexti;
Packit 961e70
	char *errmsg = NULL;
Packit 961e70
	uint64_t status = psmi_hal_get_hw_status(context->psm_hw_ctxt);
Packit 961e70
Packit 961e70
	/* Fatal chip-related errors */
Packit 961e70
	if (!(status & PSM_HAL_HW_STATUS_CHIP_PRESENT) ||
Packit 961e70
	    !(status & PSM_HAL_HW_STATUS_INITTED) ||
Packit 961e70
	    (status & PSM_HAL_HW_STATUS_HWERROR)) {
Packit 961e70
Packit 961e70
		err = PSM2_EP_DEVICE_FAILURE;
Packit 961e70
		if (err != context->status_lasterr) {	/* report once */
Packit 961e70
			volatile char *errmsg_sp="no err msg";
Packit 961e70
Packit 961e70
			psmi_hal_get_hw_status_freezemsg(&errmsg_sp,
Packit 961e70
							 context->psm_hw_ctxt);
Packit 961e70
Packit 961e70
			if (*errmsg_sp)
Packit 961e70
				psmi_handle_error(context->ep, err,
Packit 961e70
						  "Hardware problem: %s",
Packit 961e70
						  errmsg_sp);
Packit 961e70
			else {
Packit 961e70
				if (status & PSM_HAL_HW_STATUS_HWERROR)
Packit 961e70
					errmsg = "Hardware error";
Packit 961e70
				else
Packit 961e70
					errmsg = "Hardware not found";
Packit 961e70
Packit 961e70
				psmi_handle_error(context->ep, err,
Packit 961e70
						  "%s", errmsg);
Packit 961e70
			}
Packit 961e70
		}
Packit 961e70
	}
Packit 961e70
	/* Fatal network-related errors with timeout: */
Packit 961e70
	else if (!(status & PSM_HAL_HW_STATUS_IB_CONF) ||
Packit 961e70
		 !(status & PSM_HAL_HW_STATUS_IB_READY)) {
Packit 961e70
		err = PSM2_EP_NO_NETWORK;
Packit 961e70
		if (err != context->status_lasterr) {	/* report once */
Packit 961e70
			context->networkLostTime = time(NULL);
Packit 961e70
		}
Packit 961e70
		else
Packit 961e70
		{
Packit 961e70
			time_t now = time(NULL);
Packit 961e70
			static const double seventySeconds = 70.0;
Packit 961e70
Packit 961e70
			/* The linkup time duration for a system should allow the time needed
Packit 961e70
			   to complete 3 LNI passes which is:
Packit 961e70
			   50 seconds for a passive copper channel
Packit 961e70
			   65 seconds for optical channel.
Packit 961e70
			   (we add 5 seconds of margin.) */
Packit 961e70
			if (difftime(now,context->networkLostTime) > seventySeconds)
Packit 961e70
			{
Packit 961e70
				volatile char *errmsg_sp="no err msg";
Packit 961e70
Packit 961e70
				psmi_hal_get_hw_status_freezemsg(&errmsg_sp,
Packit 961e70
								 context->psm_hw_ctxt);
Packit 961e70
Packit 961e70
				psmi_handle_error(context->ep, err, "%s",
Packit 961e70
						  *errmsg_sp ? errmsg_sp :
Packit 961e70
						  "Network down");
Packit 961e70
			}
Packit 961e70
		}
Packit 961e70
	}
Packit 961e70
Packit 961e70
	if (err == PSM2_OK && context->status_lasterr != PSM2_OK)
Packit 961e70
		context->status_lasterr = PSM2_OK;	/* clear error */
Packit 961e70
	else if (err != PSM2_OK)
Packit 961e70
		context->status_lasterr = err;	/* record error */
Packit 961e70
Packit 961e70
	return err;
Packit 961e70
}
Packit 961e70
Packit 961e70
static
Packit 961e70
int psmi_get_hfi_selection_algorithm(void)
Packit 961e70
{
Packit 961e70
	union psmi_envvar_val env_hfi1_alg;
Packit 961e70
	int hfi1_alg = PSMI_UNIT_SEL_ALG_ACROSS;
Packit 961e70
Packit 961e70
	/* If a specific unit is set in the environment, use that one. */
Packit 961e70
	psmi_getenv("HFI_SELECTION_ALG",
Packit 961e70
		    "HFI Device Selection Algorithm to use. Round Robin (Default) "
Packit 961e70
		    ", Packed or Round Robin All.",
Packit 961e70
		    PSMI_ENVVAR_LEVEL_USER, PSMI_ENVVAR_TYPE_STR,
Packit 961e70
		    (union psmi_envvar_val)"Round Robin", &env_hfi1_alg);
Packit 961e70
Packit 961e70
	if (!strcasecmp(env_hfi1_alg.e_str, "Round Robin"))
Packit 961e70
		hfi1_alg = PSMI_UNIT_SEL_ALG_ACROSS;
Packit 961e70
	else if (!strcasecmp(env_hfi1_alg.e_str, "Packed"))
Packit 961e70
		hfi1_alg = PSMI_UNIT_SEL_ALG_WITHIN;
Packit 961e70
	else if (!strcasecmp(env_hfi1_alg.e_str, "Round Robin All"))
Packit 961e70
		hfi1_alg = PSMI_UNIT_SEL_ALG_ACROSS_ALL;
Packit 961e70
	else {
Packit 961e70
		_HFI_ERROR
Packit 961e70
		    ("Unknown HFI selection algorithm %s. Defaulting to Round Robin "
Packit 961e70
		     "allocation of HFIs.\n", env_hfi1_alg.e_str);
Packit 961e70
		hfi1_alg = PSMI_UNIT_SEL_ALG_ACROSS;
Packit 961e70
	}
Packit 961e70
Packit 961e70
	return hfi1_alg;
Packit 961e70
}