/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
* (C) 2001 by Argonne National Laboratory.
* See COPYRIGHT in top-level directory.
*/
#include "mpiimpl.h"
/* style:PMPIuse:PMPI_Get_processor_name:2 sig:0 */
/* style:PMPIuse:PMPI_Recv:2 sig:0 */
/* style:PMPIuse:PMPI_Ssend:2 sig:0 */
/* style: allow:printf:1 sig:0 */
/* For getpid */
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
/*
=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===
cvars:
- name : MPIR_CVAR_PROCTABLE_SIZE
category : DEBUGGER
type : int
default : 64
class : device
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_ALL_EQ
description : >-
Size of the "MPIR" debugger interface proctable (process table).
- name : MPIR_CVAR_PROCTABLE_PRINT
category : DEBUGGER
type : boolean
default : false
class : device
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_ALL_EQ
description : >-
If true, dump the proctable entries at MPII_Wait_for_debugger-time.
=== END_MPI_T_CVAR_INFO_BLOCK ===
*/
/* There are two versions of the debugger startup:
1. The debugger starts mpiexec - then mpiexec provides the MPIR_proctable
information
2. The debugger attaches to an MPI process which contains the
MPIR_proctable and related variables
This file is intended to provide both as an option. The macros that
control the code for these are
MPICH_STARTER_MPIEXEC
MPICH_STARTER_RANK0
*/
#define MPICH_STARTER_MPIEXEC
/* #define MPICH_STARTER_RANK0 */
#ifdef MPICH_STARTER_RANK0
#define MPIU_PROCTABLE_NEEDED 1
#define MPIU_BREAKPOINT_NEEDED 1
#endif
/* If MPIR_Breakpoint is not defined and called, the message queue information
will not be properly displayed by the debugger. */
/* I believe this was caused by a poor choice in the dll_mpich.c file */
/* #define MPIU_BREAKPOINT_NEEDED 1 */
#ifdef MPIU_BREAKPOINT_NEEDED
/* We prototype this routine here because it is only used in this file. It
is not static so that the debugger can find it (the debugger will set a
breakpoint at this routine */
void *MPIR_Breakpoint(void);
#endif
/*
* This file contains information and routines used to simplify the interface
* to a debugger. This follows the description in "A Standard Interface
* for Debugger Access to Message Queue Information in MPI", by Jim Cownie
* and William Gropp.
*
* This file should be compiled with debug information (-g)
*/
/*
* In addition to the discussion in the paper "A Standard Interface for Debugger
* Access to Message Queue Inforation in MPI" and the more recent paper "An
* Interface to Support the Identification of Dynamic {MPI} 2 Processes for
* Scalable Parallel Debugging", there are a few features that have become
* defacto standard. These include the "proctable" (a relic of the way
* that p4 represented processes that was used in the ch_p4 device in
* MPICH1), a debugger state (has MPI started or exited), and a routine that
* has the sole purpose of serving as a break point for a debugger.
* Specifically, these extensions are:
*
* void * MPIR_Breakpoint(void)
*
* This routine should be called at any point where control should be
* offered to the debugger. Typical spots are in MPI_Init/MPI_Init_thread
* after initialization is completed and in MPI_Abort before exiting.
*
* MPIR_Debugger_set_aborting(const char *msg)
*
* This routine should be called when MPI is exiting (either in finalize
* or abort. If a message is provided, it will call MPIR_Breakpoint.
* This routine sets the variables MPIR_debug_state and MPIR_debug_abort_string.
*
* In MPICH1, the variables MPIR_debug_state, MPIR_debug_abort_string,
* MPIR_being_debugged, and MPIR_debug_gate where exported globally.
* In MPICH, while these are global variables (so that the debugger can
* find them easily), they are not explicitly exported or referenced outside
* of a few routines. In particular, MPID_Abort uses MPIR_Debugger_set_aborting
* instead of directly accessing these variables.
*/
/* The following is used to tell a debugger the location of the shared
library that the debugger can load in order to access information about
the parallel program, such as message queues */
#ifdef HAVE_DEBUGGER_SUPPORT
#ifdef MPICH_INFODLL_LOC
char MPIR_dll_name[] = MPICH_INFODLL_LOC;
#endif
#endif
/*
* The following variables are used to interact with the debugger.
*
* MPIR_debug_state
* Values are 0 (before MPI_Init), 1 (after MPI_init), and 2 (Aborting).
* MPIR_debug_gate
* The debugger will set this to 1 when the debugger attaches
* to the process to tell the process to proceed.
* MPIR_being_debugged
* Set to 1 if the process is started or attached under the debugger
* MPIR_debug_abort_string
* String that the debugger can display on an abort.
*/
volatile int MPIR_debug_state = 0;
volatile int MPIR_debug_gate = 0;
volatile int MPIR_being_debugged = 0;
const char *MPIR_debug_abort_string = 0;
/* Values for the debug_state, this seems to be all we need at the moment
* but that may change...
*/
#define MPIR_DEBUG_SPAWNED 1
#define MPIR_DEBUG_ABORTING 2
#ifdef MPIU_PROCTABLE_NEEDED
/*
* MPIR_PROCDESC is used to pass information to the debugger about
* all of the processes.
*/
typedef struct {
char *host_name; /* Valid name for inet_addr */
char *executable_name; /* The name of the image */
int pid; /* The process id */
} MPIR_PROCDESC;
MPIR_PROCDESC *MPIR_proctable = 0;
int MPIR_proctable_size = 1;
static int MPIR_FreeProctable(void *);
#endif /* MPIR_proctable definition */
/* Other symbols:
* MPIR_i_am_starter - Indicates that this process is not an MPI process
* (for example, the forker mpiexec?)
* MPIR_acquired_pre_main -
* MPIR_partial_attach_ok -
*/
/* Forward references */
static void SendqInit(void);
static int SendqFreePool(void *);
/*
* If MPICH is built with the --enable-debugger option, MPI_Init and
* MPI_Init_thread will call MPII_Wait_for_debugger. This ensures both that
* the debugger can gather information on the MPI job before the MPI_Init
* returns to the user and that the necessary symbols for providing
* information such as message queues is available.
*
* In addition, the environment variable MPIEXEC_DEBUG, if set, will cause
* all MPI processes to wait in this routine until the variable
* MPIR_debug_gate is set to 1.
*/
void MPII_Wait_for_debugger(void)
{
#ifdef MPIU_PROCTABLE_NEEDED
int rank = MPIR_Process.comm_world->rank;
int size = MPIR_Process.comm_world->local_size;
int i, maxsize;
/* FIXME: In MPICH, the executables may not have the information
* on the other processes; this is part of the Process Manager Interface
* (PMI). We need another way to provide this information to
* a debugger */
/* The process manager probably has all of this data - the MPI2
* debugger interface API provides (at least originally) a way
* to access this. */
/* Also, to avoid scaling problems, we only populate the first 64
* entries (default) */
maxsize = MPIR_CVAR_PROCTABLE_SIZE;
if (maxsize > size)
maxsize = size;
if (rank == 0) {
char hostname[MPI_MAX_PROCESSOR_NAME + 1];
int hostlen;
int val;
MPIR_proctable = (MPIR_PROCDESC *) MPL_malloc(size * sizeof(MPIR_PROCDESC), MPL_MEM_DEBUG);
for (i = 0; i < size; i++) {
/* Initialize the proctable */
MPIR_proctable[i].host_name = 0;
MPIR_proctable[i].executable_name = 0;
MPIR_proctable[i].pid = -1;
}
PMPI_Get_processor_name(hostname, &hostlen);
MPIR_proctable[0].host_name = (char *) MPL_strdup(hostname);
MPIR_proctable[0].executable_name = 0;
MPIR_proctable[0].pid = getpid();
for (i = 1; i < maxsize; i++) {
int msg[2];
PMPI_Recv(msg, 2, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPIR_proctable[i].pid = msg[1];
MPIR_proctable[i].host_name = (char *) MPL_malloc(msg[0] + 1, MPL_MEM_DEBUG);
PMPI_Recv(MPIR_proctable[i].host_name, msg[0] + 1, MPI_CHAR,
i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPIR_proctable[i].host_name[msg[0]] = 0;
}
MPIR_proctable_size = size;
/* Debugging hook */
if (MPIR_CVAR_PROCTABLE_PRINT) {
for (i = 0; i < maxsize; i++) {
printf("PT[%d].pid = %d, .host_name = %s\n",
i, MPIR_proctable[i].pid, MPIR_proctable[i].host_name);
}
fflush(stdout);
}
MPIR_Add_finalize(MPIR_FreeProctable, MPIR_proctable, 0);
} else {
char hostname[MPI_MAX_PROCESSOR_NAME + 1];
int hostlen;
int mypid = getpid();
int msg[2];
if (rank < maxsize) {
PMPI_Get_processor_name(hostname, &hostlen);
msg[0] = hostlen;
msg[1] = mypid;
/* Deliver to the root process the proctable information */
PMPI_Ssend(msg, 2, MPI_INT, 0, 0, MPI_COMM_WORLD);
PMPI_Ssend(hostname, hostlen, MPI_CHAR, 0, 0, MPI_COMM_WORLD);
}
}
#endif /* MPIU_PROCTABLE_NEEDED */
/* Put the breakpoint after setting up the proctable */
MPIR_debug_state = MPIR_DEBUG_SPAWNED;
#ifdef MPIU_BREAKPOINT_NEEDED
(void) MPIR_Breakpoint();
#endif
/* After we exit the MPIR_Breakpoint routine, the debugger may have
* set variables such as MPIR_being_debugged */
/* Initialize the sendq support */
SendqInit();
if (getenv("MPIEXEC_DEBUG")) {
while (!MPIR_debug_gate);
}
}
#ifdef MPIU_BREAKPOINT_NEEDED
/*
* This routine is a special dummy routine that is used to provide a
* location for a debugger to set a breakpoint on, allowing a user (and the
* debugger) to attach to MPI processes after MPI_Init succeeds but before
* MPI_Init returns control to the user. It may also be called when MPI aborts,
* also to allow a debugger to regain control of an application.
*
* This routine can also initialize any datastructures that are required
*
*/
void *MPIR_Breakpoint(void)
{
MPL_DBG_MSG(MPIR_DBG_OTHER, VERBOSE, "In MPIR_Breakpoint");
return 0;
}
#endif
/*
* Call this routine to signal to the debugger that the application is aborting.
* If there is an abort message, call the MPIR_Breakpoint routine (which
* allows a tool such as a debugger to gain control.
*/
void MPIR_Debugger_set_aborting(const char *msg)
{
MPIR_debug_abort_string = (char *) msg;
MPIR_debug_state = MPIR_DEBUG_ABORTING;
#ifdef MPIU_BREAKPOINT_NEEDED
if (msg)
MPIR_Breakpoint();
#endif
}
/* ------------------------------------------------------------------------- */
/*
* Manage the send queue.
*
* The send queue is needed only by the debugger. The communication
* device has a separate notion of send queue, which are the operations
* that it needs to complete, independent of whether the user has called
* MPI_Wait/Test/etc on the request.
*
* This implementation uses a simple linked list of user-visible requests
* (more specifically, requests created with MPI_Isend, MPI_Issend, or
* MPI_Irsend).
*
* FIXME: We should exploit this to allow Finalize to report on
* send requests that were never completed.
*/
/* We need to save the tag and rank since this information may not
be included in the request. Saving the context_id also simplifies
matching these entries with a communicator */
typedef struct MPIR_Sendq {
MPIR_Request *sreq;
int tag, rank, context_id;
struct MPIR_Sendq *next;
struct MPIR_Sendq *prev;
} MPIR_Sendq;
MPIR_Sendq *MPIR_Sendq_head = 0;
/* Keep a pool of previous sendq elements to speed allocation of queue
elements */
static MPIR_Sendq *pool = 0;
/* This routine is used to establish a queue of send requests to allow the
debugger easier access to the active requests. Some devices may be able
to provide this information without requiring this separate queue. */
void MPII_Sendq_remember(MPIR_Request * req, int rank, int tag, int context_id)
{
#if defined HAVE_DEBUGGER_SUPPORT
MPIR_Sendq *p;
/* TODO: We reuse the global lock for the per-vni granularity here instead of a
* theoritically more scalable approach of creating a separate lock. Whether
* this brief-global critical section would perturbate debugging is unknown;
* investigation is needed before attempting to optimize this case. */
MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
MPID_THREAD_CS_ENTER(POBJ, req->pobj_mutex);
if (pool) {
p = pool;
pool = p->next;
} else {
p = (MPIR_Sendq *) MPL_malloc(sizeof(MPIR_Sendq), MPL_MEM_DEBUG);
if (!p) {
/* Just ignore it */
if (MPIR_REQUEST_KIND__SEND == req->kind)
req->u.send.dbg_next = NULL;
else if (MPIR_REQUEST_KIND__PREQUEST_SEND == req->kind)
req->u.persist.dbg_next = NULL;
goto fn_exit;
}
}
p->sreq = req;
p->tag = tag;
p->rank = rank;
p->context_id = context_id;
p->next = MPIR_Sendq_head;
p->prev = NULL;
MPIR_Sendq_head = p;
if (p->next)
p->next->prev = p;
if (MPIR_REQUEST_KIND__SEND == req->kind)
req->u.send.dbg_next = p;
else if (MPIR_REQUEST_KIND__PREQUEST_SEND == req->kind)
req->u.persist.dbg_next = p;
fn_exit:
MPID_THREAD_CS_EXIT(POBJ, req->pobj_mutex);
MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
#endif /* HAVE_DEBUGGER_SUPPORT */
}
void MPII_Sendq_forget(MPIR_Request * req)
{
#if defined HAVE_DEBUGGER_SUPPORT
MPIR_Sendq *p, *prev;
MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
MPID_THREAD_CS_ENTER(POBJ, req->pobj_mutex);
if (MPIR_REQUEST_KIND__SEND == req->kind)
p = req->u.send.dbg_next;
else if (MPIR_REQUEST_KIND__PREQUEST_SEND == req->kind)
p = req->u.persist.dbg_next;
if (!p) {
/* Just ignore it */
MPID_THREAD_CS_EXIT(POBJ, req->pobj_mutex);
MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
return;
}
prev = p->prev;
if (prev != NULL)
prev->next = p->next;
else
MPIR_Sendq_head = p->next;
if (p->next != NULL)
p->next->prev = prev;
/* Return this element to the pool */
p->next = pool;
pool = p;
MPID_THREAD_CS_EXIT(POBJ, req->pobj_mutex);
MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
#endif /* HAVE_DEBUGGER_SUPPORT */
}
static int SendqFreePool(void *d)
{
MPIR_Sendq *p;
/* Free the pool */
p = pool;
while (p) {
pool = p->next;
MPL_free(p);
p = pool;
}
/* Free the list of pending sends */
p = MPIR_Sendq_head;
while (p) {
MPIR_Sendq_head = p->next;
MPL_free(p);
p = MPIR_Sendq_head;
}
return 0;
}
static void SendqInit(void)
{
int i;
MPIR_Sendq *p;
/* Preallocated a few send requests */
for (i = 0; i < 10; i++) {
p = (MPIR_Sendq *) MPL_malloc(sizeof(MPIR_Sendq), MPL_MEM_DEBUG);
if (!p) {
/* Just ignore it */
break;
}
p->next = pool;
pool = p;
}
/* Make sure the pool is deleted */
MPIR_Add_finalize(SendqFreePool, 0, 0);
}
/* Manage the known communicators */
/* Provide a list of all active communicators. This is used only by the
debugger message queue interface */
typedef struct MPIR_Comm_list {
int sequence_number; /* Used to detect changes in the list */
MPIR_Comm *head; /* Head of the list */
} MPIR_Comm_list;
MPIR_Comm_list MPIR_All_communicators = { 0, 0 };
void MPII_CommL_remember(MPIR_Comm * comm_ptr)
{
MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE, "Adding communicator %p to remember list", comm_ptr);
MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE,
"Remember list structure address is %p", &MPIR_All_communicators);
MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
if (comm_ptr == MPIR_All_communicators.head) {
MPL_internal_error_printf("Internal error: communicator is already on free list\n");
return;
}
comm_ptr->comm_next = MPIR_All_communicators.head;
MPIR_All_communicators.head = comm_ptr;
MPIR_All_communicators.sequence_number++;
MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE, "master head is %p", MPIR_All_communicators.head);
MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
}
void MPII_CommL_forget(MPIR_Comm * comm_ptr)
{
MPIR_Comm *p, *prev;
MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE,
"Forgetting communicator %p from remember list", comm_ptr);
MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
p = MPIR_All_communicators.head;
prev = 0;
while (p) {
if (p == comm_ptr) {
if (prev)
prev->comm_next = p->comm_next;
else
MPIR_All_communicators.head = p->comm_next;
break;
}
if (p == p->comm_next) {
MPL_internal_error_printf("Mangled pointers to communicators - next is itself for %p\n",
p);
break;
}
prev = p;
p = p->comm_next;
}
/* Record a change to the list */
MPIR_All_communicators.sequence_number++;
MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
}
#ifdef MPIU_PROCTABLE_NEEDED
/* This routine is the finalize callback used to free the procable */
static int MPIR_FreeProctable(void *ptable)
{
int i;
MPIR_PROCDESC *proctable = (MPIR_PROCDESC *) ptable;
for (i = 0; i < MPIR_proctable_size; i++) {
if (proctable[i].host_name) {
MPL_free(proctable[i].host_name);
}
}
MPL_free(proctable);
return 0;
}
#endif /* MPIU_PROCTABLE_NEEDED */
/*
* There is an MPI-2 process table interface which has been defined; this
* provides a more scalable, distributed description of the process table.
*
*
*/