Blob Blame History Raw
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpiimpl.h"

/* style:PMPIuse:PMPI_Get_processor_name:2 sig:0 */
/* style:PMPIuse:PMPI_Recv:2 sig:0 */
/* style:PMPIuse:PMPI_Ssend:2 sig:0 */
/* style: allow:printf:1 sig:0 */

/* For getpid */
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

/*
=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===

cvars:
    - name        : MPIR_CVAR_PROCTABLE_SIZE
      category    : DEBUGGER
      type        : int
      default     : 64
      class       : device
      verbosity   : MPI_T_VERBOSITY_USER_BASIC
      scope       : MPI_T_SCOPE_ALL_EQ
      description : >-
        Size of the "MPIR" debugger interface proctable (process table).

    - name        : MPIR_CVAR_PROCTABLE_PRINT
      category    : DEBUGGER
      type        : boolean
      default     : false
      class       : device
      verbosity   : MPI_T_VERBOSITY_USER_BASIC
      scope       : MPI_T_SCOPE_ALL_EQ
      description : >-
        If true, dump the proctable entries at MPII_Wait_for_debugger-time.

=== END_MPI_T_CVAR_INFO_BLOCK ===
*/

/* There are two versions of the debugger startup:
   1. The debugger starts mpiexec - then mpiexec provides the MPIR_proctable
      information
   2. The debugger attaches to an MPI process which contains the
      MPIR_proctable and related variables

   This file is intended to provide both as an option.  The macros that
   control the code for these are

   MPICH_STARTER_MPIEXEC
   MPICH_STARTER_RANK0
 */
#define MPICH_STARTER_MPIEXEC
/* #define MPICH_STARTER_RANK0 */

#ifdef MPICH_STARTER_RANK0
#define MPIU_PROCTABLE_NEEDED 1
#define MPIU_BREAKPOINT_NEEDED 1
#endif

/* If MPIR_Breakpoint is not defined and called, the message queue information
   will not be properly displayed by the debugger. */
/* I believe this was caused by a poor choice in the dll_mpich.c file */
/* #define MPIU_BREAKPOINT_NEEDED 1 */

#ifdef MPIU_BREAKPOINT_NEEDED
/* We prototype this routine here because it is only used in this file.  It
   is not static so that the debugger can find it (the debugger will set a
   breakpoint at this routine */
void *MPIR_Breakpoint(void);
#endif

/*
 * This file contains information and routines used to simplify the interface
 * to a debugger.  This follows the description in "A Standard Interface
 * for Debugger Access to Message Queue Information in MPI", by Jim Cownie
 * and William Gropp.
 *
 * This file should be compiled with debug information (-g)
 */

/*
 * In addition to the discussion in the paper "A Standard Interface for Debugger
 * Access to Message Queue Inforation in MPI" and the more recent paper "An
 * Interface to Support the Identification of Dynamic {MPI} 2 Processes for
 * Scalable Parallel Debugging", there are a few features that have become
 * defacto standard.  These include the "proctable" (a relic of the way
 * that p4 represented processes that was used in the ch_p4 device in
 * MPICH1), a debugger state (has MPI started or exited), and a routine that
 * has the sole purpose of serving as a break point for a debugger.
 * Specifically, these extensions are:
 *
 *  void * MPIR_Breakpoint(void)
 *
 * This routine should be called at any point where control should be
 * offered to the debugger.  Typical spots are in MPI_Init/MPI_Init_thread
 * after initialization is completed and in MPI_Abort before exiting.
 *
 * MPIR_Debugger_set_aborting(const char *msg)
 *
 * This routine should be called when MPI is exiting (either in finalize
 * or abort.  If a message is provided, it will call MPIR_Breakpoint.
 * This routine sets the variables MPIR_debug_state and MPIR_debug_abort_string.
 *
 * In MPICH1, the variables MPIR_debug_state, MPIR_debug_abort_string,
 * MPIR_being_debugged, and MPIR_debug_gate where exported globally.
 * In MPICH, while these are global variables (so that the debugger can
 * find them easily), they are not explicitly exported or referenced outside
 * of a few routines.  In particular, MPID_Abort uses MPIR_Debugger_set_aborting
 * instead of directly accessing these variables.
 */

/* The following is used to tell a debugger the location of the shared
   library that the debugger can load in order to access information about
   the parallel program, such as message queues */
#ifdef HAVE_DEBUGGER_SUPPORT
#ifdef MPICH_INFODLL_LOC
char MPIR_dll_name[] = MPICH_INFODLL_LOC;
#endif
#endif

/*
 * The following variables are used to interact with the debugger.
 *
 * MPIR_debug_state
 *    Values are 0 (before MPI_Init), 1 (after MPI_init), and 2 (Aborting).
 * MPIR_debug_gate
 *    The debugger will set this to 1 when the debugger attaches
 *    to the process to tell the process to proceed.
 * MPIR_being_debugged
 *    Set to 1 if the process is started or attached under the debugger
 * MPIR_debug_abort_string
 *    String that the debugger can display on an abort.
 */
volatile int MPIR_debug_state = 0;
volatile int MPIR_debug_gate = 0;
volatile int MPIR_being_debugged = 0;
const char *MPIR_debug_abort_string = 0;

/* Values for the debug_state, this seems to be all we need at the moment
 * but that may change...
 */
#define MPIR_DEBUG_SPAWNED   1
#define MPIR_DEBUG_ABORTING  2

#ifdef MPIU_PROCTABLE_NEEDED
/*
 * MPIR_PROCDESC is used to pass information to the debugger about
 * all of the processes.
 */
typedef struct {
    char *host_name;            /* Valid name for inet_addr */
    char *executable_name;      /* The name of the image */
    int pid;                    /* The process id */
} MPIR_PROCDESC;
MPIR_PROCDESC *MPIR_proctable = 0;
int MPIR_proctable_size = 1;
static int MPIR_FreeProctable(void *);

#endif /* MPIR_proctable definition */

/* Other symbols:
 * MPIR_i_am_starter - Indicates that this process is not an MPI process
 *   (for example, the forker mpiexec?)
 * MPIR_acquired_pre_main -
 * MPIR_partial_attach_ok -
*/

/* Forward references */
static void SendqInit(void);
static int SendqFreePool(void *);

/*
 * If MPICH is built with the --enable-debugger option, MPI_Init and
 * MPI_Init_thread will call MPII_Wait_for_debugger.  This ensures both that
 * the debugger can gather information on the MPI job before the MPI_Init
 * returns to the user and that the necessary symbols for providing
 * information such as message queues is available.
 *
 * In addition, the environment variable MPIEXEC_DEBUG, if set, will cause
 * all MPI processes to wait in this routine until the variable
 * MPIR_debug_gate is set to 1.
 */
void MPII_Wait_for_debugger(void)
{
#ifdef MPIU_PROCTABLE_NEEDED
    int rank = MPIR_Process.comm_world->rank;
    int size = MPIR_Process.comm_world->local_size;
    int i, maxsize;

    /* FIXME: In MPICH, the executables may not have the information
     * on the other processes; this is part of the Process Manager Interface
     * (PMI).  We need another way to provide this information to
     * a debugger */
    /* The process manager probably has all of this data - the MPI2
     * debugger interface API provides (at least originally) a way
     * to access this. */
    /* Also, to avoid scaling problems, we only populate the first 64
     * entries (default) */
    maxsize = MPIR_CVAR_PROCTABLE_SIZE;
    if (maxsize > size)
        maxsize = size;

    if (rank == 0) {
        char hostname[MPI_MAX_PROCESSOR_NAME + 1];
        int hostlen;
        int val;

        MPIR_proctable = (MPIR_PROCDESC *) MPL_malloc(size * sizeof(MPIR_PROCDESC), MPL_MEM_DEBUG);
        for (i = 0; i < size; i++) {
            /* Initialize the proctable */
            MPIR_proctable[i].host_name = 0;
            MPIR_proctable[i].executable_name = 0;
            MPIR_proctable[i].pid = -1;
        }

        PMPI_Get_processor_name(hostname, &hostlen);
        MPIR_proctable[0].host_name = (char *) MPL_strdup(hostname);
        MPIR_proctable[0].executable_name = 0;
        MPIR_proctable[0].pid = getpid();

        for (i = 1; i < maxsize; i++) {
            int msg[2];
            PMPI_Recv(msg, 2, MPI_INT, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
            MPIR_proctable[i].pid = msg[1];
            MPIR_proctable[i].host_name = (char *) MPL_malloc(msg[0] + 1, MPL_MEM_DEBUG);
            PMPI_Recv(MPIR_proctable[i].host_name, msg[0] + 1, MPI_CHAR,
                      i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
            MPIR_proctable[i].host_name[msg[0]] = 0;
        }

        MPIR_proctable_size = size;
        /* Debugging hook */
        if (MPIR_CVAR_PROCTABLE_PRINT) {
            for (i = 0; i < maxsize; i++) {
                printf("PT[%d].pid = %d, .host_name = %s\n",
                       i, MPIR_proctable[i].pid, MPIR_proctable[i].host_name);
            }
            fflush(stdout);
        }
        MPIR_Add_finalize(MPIR_FreeProctable, MPIR_proctable, 0);
    } else {
        char hostname[MPI_MAX_PROCESSOR_NAME + 1];
        int hostlen;
        int mypid = getpid();
        int msg[2];
        if (rank < maxsize) {
            PMPI_Get_processor_name(hostname, &hostlen);
            msg[0] = hostlen;
            msg[1] = mypid;

            /* Deliver to the root process the proctable information */
            PMPI_Ssend(msg, 2, MPI_INT, 0, 0, MPI_COMM_WORLD);
            PMPI_Ssend(hostname, hostlen, MPI_CHAR, 0, 0, MPI_COMM_WORLD);
        }
    }
#endif /* MPIU_PROCTABLE_NEEDED */

    /* Put the breakpoint after setting up the proctable */
    MPIR_debug_state = MPIR_DEBUG_SPAWNED;
#ifdef MPIU_BREAKPOINT_NEEDED
    (void) MPIR_Breakpoint();
#endif
    /* After we exit the MPIR_Breakpoint routine, the debugger may have
     * set variables such as MPIR_being_debugged */

    /* Initialize the sendq support */
    SendqInit();

    if (getenv("MPIEXEC_DEBUG")) {
        while (!MPIR_debug_gate);
    }


}

#ifdef MPIU_BREAKPOINT_NEEDED
/*
 * This routine is a special dummy routine that is used to provide a
 * location for a debugger to set a breakpoint on, allowing a user (and the
 * debugger) to attach to MPI processes after MPI_Init succeeds but before
 * MPI_Init returns control to the user. It may also be called when MPI aborts,
 * also to allow a debugger to regain control of an application.
 *
 * This routine can also initialize any datastructures that are required
 *
 */
void *MPIR_Breakpoint(void)
{
    MPL_DBG_MSG(MPIR_DBG_OTHER, VERBOSE, "In MPIR_Breakpoint");
    return 0;
}
#endif

/*
 * Call this routine to signal to the debugger that the application is aborting.
 * If there is an abort message, call the MPIR_Breakpoint routine (which
 * allows a tool such as a debugger to gain control.
 */
void MPIR_Debugger_set_aborting(const char *msg)
{
    MPIR_debug_abort_string = (char *) msg;
    MPIR_debug_state = MPIR_DEBUG_ABORTING;
#ifdef MPIU_BREAKPOINT_NEEDED
    if (msg)
        MPIR_Breakpoint();
#endif
}

/* ------------------------------------------------------------------------- */
/*
 * Manage the send queue.
 *
 * The send queue is needed only by the debugger.  The communication
 * device has a separate notion of send queue, which are the operations
 * that it needs to complete, independent of whether the user has called
 * MPI_Wait/Test/etc on the request.
 *
 * This implementation uses a simple linked list of user-visible requests
 * (more specifically, requests created with MPI_Isend, MPI_Issend, or
 * MPI_Irsend).
 *
 * FIXME: We should exploit this to allow Finalize to report on
 * send requests that were never completed.
 */

/* We need to save the tag and rank since this information may not
   be included in the request.  Saving the context_id also simplifies
   matching these entries with a communicator */
typedef struct MPIR_Sendq {
    MPIR_Request *sreq;
    int tag, rank, context_id;
    struct MPIR_Sendq *next;
    struct MPIR_Sendq *prev;
} MPIR_Sendq;

MPIR_Sendq *MPIR_Sendq_head = 0;
/* Keep a pool of previous sendq elements to speed allocation of queue
   elements */
static MPIR_Sendq *pool = 0;

/* This routine is used to establish a queue of send requests to allow the
   debugger easier access to the active requests.  Some devices may be able
   to provide this information without requiring this separate queue. */
void MPII_Sendq_remember(MPIR_Request * req, int rank, int tag, int context_id)
{
#if defined HAVE_DEBUGGER_SUPPORT
    MPIR_Sendq *p;

/* TODO: We reuse the global lock for the per-vni granularity here instead of a
 * theoritically more scalable approach of creating a separate lock. Whether
 * this brief-global critical section would perturbate debugging is unknown;
 * investigation is needed before attempting to optimize this case. */

    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPID_THREAD_CS_ENTER(POBJ, req->pobj_mutex);
    if (pool) {
        p = pool;
        pool = p->next;
    } else {
        p = (MPIR_Sendq *) MPL_malloc(sizeof(MPIR_Sendq), MPL_MEM_DEBUG);
        if (!p) {
            /* Just ignore it */
            if (MPIR_REQUEST_KIND__SEND == req->kind)
                req->u.send.dbg_next = NULL;
            else if (MPIR_REQUEST_KIND__PREQUEST_SEND == req->kind)
                req->u.persist.dbg_next = NULL;
            goto fn_exit;
        }
    }
    p->sreq = req;
    p->tag = tag;
    p->rank = rank;
    p->context_id = context_id;
    p->next = MPIR_Sendq_head;
    p->prev = NULL;
    MPIR_Sendq_head = p;
    if (p->next)
        p->next->prev = p;
    if (MPIR_REQUEST_KIND__SEND == req->kind)
        req->u.send.dbg_next = p;
    else if (MPIR_REQUEST_KIND__PREQUEST_SEND == req->kind)
        req->u.persist.dbg_next = p;
  fn_exit:
    MPID_THREAD_CS_EXIT(POBJ, req->pobj_mutex);
    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
#endif /* HAVE_DEBUGGER_SUPPORT */
}

void MPII_Sendq_forget(MPIR_Request * req)
{
#if defined HAVE_DEBUGGER_SUPPORT
    MPIR_Sendq *p, *prev;

    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPID_THREAD_CS_ENTER(POBJ, req->pobj_mutex);
    if (MPIR_REQUEST_KIND__SEND == req->kind)
        p = req->u.send.dbg_next;
    else if (MPIR_REQUEST_KIND__PREQUEST_SEND == req->kind)
        p = req->u.persist.dbg_next;
    if (!p) {
        /* Just ignore it */
        MPID_THREAD_CS_EXIT(POBJ, req->pobj_mutex);
        MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
        return;
    }
    prev = p->prev;
    if (prev != NULL)
        prev->next = p->next;
    else
        MPIR_Sendq_head = p->next;
    if (p->next != NULL)
        p->next->prev = prev;
    /* Return this element to the pool */
    p->next = pool;
    pool = p;
    MPID_THREAD_CS_EXIT(POBJ, req->pobj_mutex);
    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
#endif /* HAVE_DEBUGGER_SUPPORT */
}

static int SendqFreePool(void *d)
{
    MPIR_Sendq *p;

    /* Free the pool */
    p = pool;
    while (p) {
        pool = p->next;
        MPL_free(p);
        p = pool;
    }
    /* Free the list of pending sends */
    p = MPIR_Sendq_head;
    while (p) {
        MPIR_Sendq_head = p->next;
        MPL_free(p);
        p = MPIR_Sendq_head;
    }
    return 0;
}

static void SendqInit(void)
{
    int i;
    MPIR_Sendq *p;

    /* Preallocated a few send requests */
    for (i = 0; i < 10; i++) {
        p = (MPIR_Sendq *) MPL_malloc(sizeof(MPIR_Sendq), MPL_MEM_DEBUG);
        if (!p) {
            /* Just ignore it */
            break;
        }
        p->next = pool;
        pool = p;
    }

    /* Make sure the pool is deleted */
    MPIR_Add_finalize(SendqFreePool, 0, 0);
}

/* Manage the known communicators */
/* Provide a list of all active communicators.  This is used only by the
   debugger message queue interface */
typedef struct MPIR_Comm_list {
    int sequence_number;        /* Used to detect changes in the list */
    MPIR_Comm *head;            /* Head of the list */
} MPIR_Comm_list;

MPIR_Comm_list MPIR_All_communicators = { 0, 0 };

void MPII_CommL_remember(MPIR_Comm * comm_ptr)
{
    MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE, "Adding communicator %p to remember list", comm_ptr);
    MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE,
                  "Remember list structure address is %p", &MPIR_All_communicators);
    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
    if (comm_ptr == MPIR_All_communicators.head) {
        MPL_internal_error_printf("Internal error: communicator is already on free list\n");
        return;
    }
    comm_ptr->comm_next = MPIR_All_communicators.head;
    MPIR_All_communicators.head = comm_ptr;
    MPIR_All_communicators.sequence_number++;
    MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE, "master head is %p", MPIR_All_communicators.head);

    MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
}

void MPII_CommL_forget(MPIR_Comm * comm_ptr)
{
    MPIR_Comm *p, *prev;

    MPL_DBG_MSG_P(MPIR_DBG_COMM, VERBOSE,
                  "Forgetting communicator %p from remember list", comm_ptr);
    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
    p = MPIR_All_communicators.head;
    prev = 0;
    while (p) {
        if (p == comm_ptr) {
            if (prev)
                prev->comm_next = p->comm_next;
            else
                MPIR_All_communicators.head = p->comm_next;
            break;
        }
        if (p == p->comm_next) {
            MPL_internal_error_printf("Mangled pointers to communicators - next is itself for %p\n",
                                      p);
            break;
        }
        prev = p;
        p = p->comm_next;
    }
    /* Record a change to the list */
    MPIR_All_communicators.sequence_number++;
    MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_COMM_MUTEX(comm_ptr));
    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
}

#ifdef MPIU_PROCTABLE_NEEDED
/* This routine is the finalize callback used to free the procable */
static int MPIR_FreeProctable(void *ptable)
{
    int i;
    MPIR_PROCDESC *proctable = (MPIR_PROCDESC *) ptable;
    for (i = 0; i < MPIR_proctable_size; i++) {
        if (proctable[i].host_name) {
            MPL_free(proctable[i].host_name);
        }
    }
    MPL_free(proctable);

    return 0;
}
#endif /* MPIU_PROCTABLE_NEEDED */

/*
 * There is an MPI-2 process table interface which has been defined; this
 * provides a more scalable, distributed description of the process table.
 *
 *
 */