|
Packit Service |
c5cf8c |
pt2pt requirement
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- need to specify blocking vs. non-blocking for most routines
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
------------------------------------------------------------------------
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MPI_Send_init(buf, count, datatype, dest, tag, comm, request, error)
|
|
Packit Service |
c5cf8c |
MPI_Bsend_init(buf, count, datatype, dest, tag, comm, request, error)
|
|
Packit Service |
c5cf8c |
MPI_Rsend_init(buf, count, datatype, dest, tag, comm, request, error)
|
|
Packit Service |
c5cf8c |
MPI_Ssend_init(buf, count, datatype, dest, tag, comm, request, error)
|
|
Packit Service |
c5cf8c |
MPI_Recv_init(buf, count , datatype, src, tag, com, request, error)
|
|
Packit Service |
c5cf8c |
{
|
|
Packit Service |
c5cf8c |
request_p = MPIR_Request_alloc();
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* Fill in request structure based on parameters and type of operation */
|
|
Packit Service |
c5cf8c |
request_p->buf = buf;
|
|
Packit Service |
c5cf8c |
request_p->count = count;
|
|
Packit Service |
c5cf8c |
request_p->datatype = datatype;
|
|
Packit Service |
c5cf8c |
request_p->rank = dest/src;
|
|
Packit Service |
c5cf8c |
request_p->tag = tag;
|
|
Packit Service |
c5cf8c |
request_p->comm = comm;
|
|
Packit Service |
c5cf8c |
request_p->type = persistent | <type>;
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
*request = MPIR_Request_handle(request_p);
|
|
Packit Service |
c5cf8c |
}
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MPI_Start(request, error)
|
|
Packit Service |
c5cf8c |
{
|
|
Packit Service |
c5cf8c |
switch(request->type)
|
|
Packit Service |
c5cf8c |
{
|
|
Packit Service |
c5cf8c |
send:
|
|
Packit Service |
c5cf8c |
MPID_Isend(buf, count, datatype, dest, tag, comm, request_p,
|
|
Packit Service |
c5cf8c |
error);
|
|
Packit Service |
c5cf8c |
bsend:
|
|
Packit Service |
c5cf8c |
MPID_Ibsend(...)
|
|
Packit Service |
c5cf8c |
rsend:
|
|
Packit Service |
c5cf8c |
MPID_Irsend(...)
|
|
Packit Service |
c5cf8c |
ssend:
|
|
Packit Service |
c5cf8c |
MPID_Issend(...)
|
|
Packit Service |
c5cf8c |
recv:
|
|
Packit Service |
c5cf8c |
MPID_Irecv(...)
|
|
Packit Service |
c5cf8c |
}
|
|
Packit Service |
c5cf8c |
}
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- persistent requests require copying parameters into the request structure.
|
|
Packit Service |
c5cf8c |
should we always fill in a request and simply pass the request as the only
|
|
Packit Service |
c5cf8c |
parameter? this would eliminate optimizations on machines where large
|
|
Packit Service |
c5cf8c |
numbers of parameters can be passed in registers, but the intel boxes will
|
|
Packit Service |
c5cf8c |
just end up pushing the parameters on the stack anyway...
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- there is an optimization here that allows registered memory to be maintained
|
|
Packit Service |
c5cf8c |
as registered in the persistent case. to do this we will need to let the
|
|
Packit Service |
c5cf8c |
method know that we do/do not want the memory unregistered.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- need to store request type in request structure so that MPI_Start() can do
|
|
Packit Service |
c5cf8c |
the right thing (tm).
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- we chose not to convert handles to structure pointers since the handles may
|
|
Packit Service |
c5cf8c |
cointain quick access to common information avoiding pointer dereferences.
|
|
Packit Service |
c5cf8c |
in some cases, an associated structure may not even exist.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
the implication here is that many of the non-persistent MPI_Xsend routines
|
|
Packit Service |
c5cf8c |
will do little work outside of calling an MPID function. Perhaps we should
|
|
Packit Service |
c5cf8c |
not have separate MPI functions in those cases but rather map the MPI
|
|
Packit Service |
c5cf8c |
functions direct to the MPID functions (through the use of macros or weak
|
|
Packit Service |
c5cf8c |
symbols).
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
------------------------------------------------------------------------
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MPI_Send(buf, count, datatype, dest, tag, comm, error)
|
|
Packit Service |
c5cf8c |
MPI_Bsend(buf, count, datatype, dest, tag, comm, error)
|
|
Packit Service |
c5cf8c |
MPI_Rsend(buf, count, datatype, dest, tag, comm, error)
|
|
Packit Service |
c5cf8c |
MPI_Ssend(buf, count, datatype, dest, tag, comm, error)
|
|
Packit Service |
c5cf8c |
{
|
|
Packit Service |
c5cf8c |
/* Map (comm,rank) handle to a virtual connection */
|
|
Packit Service |
c5cf8c |
MPID_Comm_get_connection(comm, rank, &vc);
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* If virtual connection is not bound to a real connection, then perform
|
|
Packit Service |
c5cf8c |
connection resolution. */
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* (atomically) If no other requests are queued on this connection, the send
|
|
Packit Service |
c5cf8c |
as much data as possible. If the entire message could not be sent
|
|
Packit Service |
c5cf8c |
"immediately" then queue the request for later processing. (We need a
|
|
Packit Service |
c5cf8c |
progress engine to ensure that later happens. */
|
|
Packit Service |
c5cf8c |
/* Build up a segement unless the datatype is "trivial" */
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
/* Wait until entire message is sent */
|
|
Packit Service |
c5cf8c |
}
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- heterogeneity should be handled by the method. this allows methods which do
|
|
Packit Service |
c5cf8c |
require conversions, such as shared memory, to be fully optimized.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- who should setup the segment and convert the buffer (buf, count, datatype) to
|
|
Packit Service |
c5cf8c |
one or more blocks of bytes? should that be a layer above the method or
|
|
Packit Service |
c5cf8c |
should it be the method itself?
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
a method may or may not need to use segments depending on its capabilities.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
there should only be one implementation of the segment API which will be
|
|
Packit Service |
c5cf8c |
called by all of the method implementations.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- we noticed that the segment initialization code take a (comm,rank) pair which
|
|
Packit Service |
c5cf8c |
will have to be dereferenced to a virtual connection in order to determine if
|
|
Packit Service |
c5cf8c |
data conversion is required. since we have already done the dereference, it
|
|
Packit Service |
c5cf8c |
would be ideal if the segment took a ADI3 implementation (MPID) specific
|
|
Packit Service |
c5cf8c |
connection type instead of a (comm,rank). Making this parameter type
|
|
Packit Service |
c5cf8c |
implementation specific implies that the segment interface is never called
|
|
Packit Service |
c5cf8c |
from the MPI layer or that the ADI3 interface provided a means of converting
|
|
Packit Service |
c5cf8c |
a (comm, rank) to a connection type.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- David suggested that we might be able to use the xfer interface for
|
|
Packit Service |
c5cf8c |
point-to-point messaging as well as for collective operations.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
What should the xfer interface look like?
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- David provided a write-up of the existing interface
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- We questioned whether or not multiple receive blocks could be used to
|
|
Packit Service |
c5cf8c |
receive a message sent from a single send block. We decided that blocks
|
|
Packit Service |
c5cf8c |
define envelopes which match, where a single block defines an envelope (and
|
|
Packit Service |
c5cf8c |
payload) per destination and/or source. So, a message sent to a particular
|
|
Packit Service |
c5cf8c |
destination (from a single send block) must be received by a single receive
|
|
Packit Service |
c5cf8c |
block. In other words, the message cannot be broken across receive blocks.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- there is an asymmetry in the existing interface which allows multiple
|
|
Packit Service |
c5cf8c |
destinations but prevents multiple sources. the result of this is that
|
|
Packit Service |
c5cf8c |
scattering operations can be naturally described, but aggregation
|
|
Packit Service |
c5cf8c |
operations cannot. we believe that there are important cases where
|
|
Packit Service |
c5cf8c |
aggregation would benefit collective operations.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- to address this we believe that we should extend the interface to
|
|
Packit Service |
c5cf8c |
implement a many-to-one, in addition to the existing one-to-many
|
|
Packit Service |
c5cf8c |
interface. we hope we don't need the many-to-many...
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- perhaps we should call these scatter_init and gather_init (etc)?
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- Nick proposed that the interface be split up such that sends requests were
|
|
Packit Service |
c5cf8c |
separate from receive requests. This implies that there would be a
|
|
Packit Service |
c5cf8c |
xfer_send_init() and xfer_recv_init(). We later threw this out, as it
|
|
Packit Service |
c5cf8c |
didn't make a whole lot of sense with forwards existing in the recv case.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- Brian wondered about aggregating sends into a single receive and whether
|
|
Packit Service |
c5cf8c |
that could be used to reduce the overhead of message headers when
|
|
Packit Service |
c5cf8c |
forwarding. We think that this can be done below the xfer interface when
|
|
Packit Service |
c5cf8c |
converting into a dataflow-like structure (?)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- We think it may be necessary to describe dependencies, such as progress,
|
|
Packit Service |
c5cf8c |
completion and buffer. These dependencies as frighteningly close to
|
|
Packit Service |
c5cf8c |
dataflow...
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- basically we see the xfer init...start calls as being converted into a set of
|
|
Packit Service |
c5cf8c |
comm. agent requests and a dependency graph. we see the dependencies as
|
|
Packit Service |
c5cf8c |
being possibly stored in a tabular format, so that ranges of the incoming
|
|
Packit Service |
c5cf8c |
stream can have different dependencies on them -- specifically this allows
|
|
Packit Service |
c5cf8c |
for progress dependencies on a range basis, which we see as a requirement.
|
|
Packit Service |
c5cf8c |
completion dependencies (of which there may be > 1) would be listed at the
|
|
Packit Service |
c5cf8c |
end of this table
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
the table describes what depends on THIS request, rather than the other way
|
|
Packit Service |
c5cf8c |
around. this is tailored to a notification system rather than some sort of
|
|
Packit Service |
c5cf8c |
search-for-ready approach (which would be a disaster).
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- for dependencies BETWEEN blocks, we propose waiting on the first block to
|
|
Packit Service |
c5cf8c |
complete before starting the next block. you can still create blocks ahead
|
|
Packit Service |
c5cf8c |
of time if desired. otherwise blocks may be processed in parallel
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- blocks follow the same envelope matching rules as posted mpi send/recvs
|
|
Packit Service |
c5cf8c |
(commit time order). this is the only "dependency" between blocks
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
reminder: envelope = (context (communicator), source_rank, tag)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
QUESTION: what exactly are the semantics of a block? Sends to the same
|
|
Packit Service |
c5cf8c |
destination are definitely ordered. Sends to different desinations could
|
|
Packit Service |
c5cf8c |
proceed in parallel. Should they?
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
example:
|
|
Packit Service |
c5cf8c |
init
|
|
Packit Service |
c5cf8c |
rf(5)
|
|
Packit Service |
c5cf8c |
rf(4)
|
|
Packit Service |
c5cf8c |
r
|
|
Packit Service |
c5cf8c |
start
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
a transfer block defines 0 or 1 envelope/payloads for sources and 0 to N envelope/payloads for destinations, one per destination.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- The communication agent will need to process these requests and data
|
|
Packit Service |
c5cf8c |
dependencies. We see the agent having queues of requests similar in nature
|
|
Packit Service |
c5cf8c |
to the run queue within an operating system. (We aren't really sure what
|
|
Packit Service |
c5cf8c |
this means yet...) Queues might consist of the active queue, the wait queue,
|
|
Packit Service |
c5cf8c |
and the still-to-be-matched queue.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- the "try to send right away" code will look to see if there is anything in
|
|
Packit Service |
c5cf8c |
the active queue for the vc, and if not just put it in run queue and call
|
|
Packit Service |
c5cf8c |
the make progress function (whatever that is...)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- adaptive polling done at the agent level, perhaps with method supplied
|
|
Packit Service |
c5cf8c |
min/max/increments. comm. agent must track outstanding requests (as
|
|
Packit Service |
c5cf8c |
described above) in order to know WHAT to poll. we must also take into
|
|
Packit Service |
c5cf8c |
account that there might be incoming active message or error conditions, so
|
|
Packit Service |
c5cf8c |
we should poll all methods (and all vcs) periodically.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- We believe that a MPIR_Request might simply contain enough information for
|
|
Packit Service |
c5cf8c |
signalling that one or more CARs have completed. This implies that a
|
|
Packit Service |
c5cf8c |
MPIR_Request might consist of a integer counter of outstanding CARs. When
|
|
Packit Service |
c5cf8c |
the counter reached zero, the request is complete. David suggests making
|
|
Packit Service |
c5cf8c |
CARs and MPIR_Requests reside in the same physical structure so that in the
|
|
Packit Service |
c5cf8c |
MPI_Send/Recv() case, two logical allocations (one for MPIR_Request and CAR)
|
|
Packit Service |
c5cf8c |
are combined into one.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- operations within a block are prioritized by the order in which they are
|
|
Packit Service |
c5cf8c |
added to the block. operations may proceed in parallel so long as higher
|
|
Packit Service |
c5cf8c |
priority operations are not slowed down by lesser priority operations. a
|
|
Packit Service |
c5cf8c |
valid implementation is to serialize the operations thus guaranteeing that
|
|
Packit Service |
c5cf8c |
the current operation has all available resources at its desposal.
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MPI_Isend(buf, count, datatype, dest, tag, comm, request, error)
|
|
Packit Service |
c5cf8c |
MPI_Ibsend(buf, count, datatype, dest, tag, comm, request, error)
|
|
Packit Service |
c5cf8c |
MPI_Irsend(buf, count, datatype, dest, tag, comm, request, error)
|
|
Packit Service |
c5cf8c |
MPI_Issend(buf, count, datatype, dest, tag, comm, request, error)
|
|
Packit Service |
c5cf8c |
{
|
|
Packit Service |
c5cf8c |
request_p = MPIR_Request_alloc();
|
|
Packit Service |
c5cf8c |
MPID_IXsend(buf, count, datatype, dest, tag, comm, request_p, error);
|
|
Packit Service |
c5cf8c |
*request = MPIR_Request_handle(request_p);
|
|
Packit Service |
c5cf8c |
}
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MPI_Recv()
|
|
Packit Service |
c5cf8c |
MPI_Irecv()
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
- need to cover wild card receive!
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MPI_Sendrecv()
|
|
Packit Service |
c5cf8c |
{
|
|
Packit Service |
c5cf8c |
/* KISS */
|
|
Packit Service |
c5cf8c |
MPI_Isend()
|
|
Packit Service |
c5cf8c |
MPI_Irecv()
|
|
Packit Service |
c5cf8c |
MPI_Waitall()
|
|
Packit Service |
c5cf8c |
}
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MPID_Send(buf, count, datatype, dest, tag, comm, group, error)
|
|
Packit Service |
c5cf8c |
MPID_Isend(buf, count, datatype, dest, tag, comm, request, error)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MPID_Bsend(buf, count, datatype, dest, tag, comm, error)
|
|
Packit Service |
c5cf8c |
MPID_Ibsend(buf, count, datatype, dest, tag, comm, request, error)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MPID_Rsend(buf, count, datatype, dest, tag, comm, error)
|
|
Packit Service |
c5cf8c |
MPID_Irsend(buf, count, datatype, dest, tag, comm, request, error)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
MPID_Ssend(buf, count, datatype, dest, tag, comm, error)
|
|
Packit Service |
c5cf8c |
MPID_Issend(buf, count, datatype, dest, tag, comm, request, error)
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
-----
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
Items which make life more difficult:
|
|
Packit Service |
c5cf8c |
|
|
Packit Service |
c5cf8c |
-
|