|
Packit |
0848f5 |
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
|
Packit |
0848f5 |
/*
|
|
Packit |
0848f5 |
* (C) 2010 by Argonne National Laboratory.
|
|
Packit |
0848f5 |
* See COPYRIGHT in top-level directory.
|
|
Packit |
0848f5 |
*/
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* This test measures the performance of many rma operations to a single
|
|
Packit |
0848f5 |
target process.
|
|
Packit |
0848f5 |
It uses a number of operations (put or accumulate) to different
|
|
Packit |
0848f5 |
locations in the target window
|
|
Packit |
0848f5 |
This is one of the ways that RMA may be used, and is used in the
|
|
Packit |
0848f5 |
reference implementation of the graph500 benchmark.
|
|
Packit |
0848f5 |
*/
|
|
Packit |
0848f5 |
#include "mpi.h"
|
|
Packit |
0848f5 |
#include <stdio.h>
|
|
Packit |
0848f5 |
#include <stdlib.h>
|
|
Packit |
0848f5 |
#include <string.h>
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
#define MAX_COUNT 65536*4
|
|
Packit |
0848f5 |
#define MAX_RMA_SIZE 16
|
|
Packit |
0848f5 |
#define MAX_RUNS 10
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
typedef enum { SYNC_NONE = 0,
|
|
Packit |
0848f5 |
SYNC_ALL = -1, SYNC_FENCE = 1, SYNC_LOCK = 2, SYNC_PSCW = 4
|
|
Packit |
0848f5 |
} sync_t;
|
|
Packit |
0848f5 |
typedef enum { RMA_NONE = 0, RMA_ALL = -1, RMA_PUT = 1, RMA_ACC = 2, RMA_GET = 4 } rma_t;
|
|
Packit |
0848f5 |
/* Note GET not yet implemented */
|
|
Packit |
0848f5 |
sync_t syncChoice = SYNC_ALL;
|
|
Packit |
0848f5 |
rma_t rmaChoice = RMA_ALL;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
typedef struct {
|
|
Packit |
0848f5 |
double startOp, endOp, endSync;
|
|
Packit |
0848f5 |
} timing;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
static int verbose = 1;
|
|
Packit |
0848f5 |
static int barrierSync = 0;
|
|
Packit |
0848f5 |
static double tickThreshold = 0.0;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
void PrintResults(int cnt, timing t[]);
|
|
Packit |
0848f5 |
void RunAccFence(MPI_Win win, int destRank, int cnt, int sz, timing t[]);
|
|
Packit |
0848f5 |
void RunAccLock(MPI_Win win, int destRank, int cnt, int sz, timing t[]);
|
|
Packit |
0848f5 |
void RunPutFence(MPI_Win win, int destRank, int cnt, int sz, timing t[]);
|
|
Packit |
0848f5 |
void RunPutLock(MPI_Win win, int destRank, int cnt, int sz, timing t[]);
|
|
Packit |
0848f5 |
void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz,
|
|
Packit |
0848f5 |
MPI_Group exposureGroup, MPI_Group accessGroup, timing t[]);
|
|
Packit |
0848f5 |
void RunPutPSCW(MPI_Win win, int destRank, int cnt, int sz,
|
|
Packit |
0848f5 |
MPI_Group exposureGroup, MPI_Group accessGroup, timing t[]);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
int main(int argc, char *argv[])
|
|
Packit |
0848f5 |
{
|
|
Packit |
0848f5 |
int arraysize, i, cnt, sz, maxCount = MAX_COUNT, *arraybuffer;
|
|
Packit |
0848f5 |
int wrank, wsize, destRank, srcRank;
|
|
Packit |
0848f5 |
MPI_Win win;
|
|
Packit |
0848f5 |
MPI_Group wgroup, accessGroup, exposureGroup;
|
|
Packit |
0848f5 |
timing t[MAX_RUNS];
|
|
Packit |
0848f5 |
int maxSz = MAX_RMA_SIZE;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Init(&argc, &argv);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Determine clock accuracy */
|
|
Packit |
0848f5 |
tickThreshold = 10.0 * MPI_Wtick();
|
|
Packit |
0848f5 |
MPI_Allreduce(MPI_IN_PLACE, &tickThreshold, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
for (i = 1; i < argc; i++) {
|
|
Packit |
0848f5 |
if (strcmp(argv[i], "-put") == 0) {
|
|
Packit |
0848f5 |
if (rmaChoice == RMA_ALL)
|
|
Packit |
0848f5 |
rmaChoice = RMA_NONE;
|
|
Packit |
0848f5 |
rmaChoice |= RMA_PUT;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else if (strcmp(argv[i], "-acc") == 0) {
|
|
Packit |
0848f5 |
if (rmaChoice == RMA_ALL)
|
|
Packit |
0848f5 |
rmaChoice = RMA_NONE;
|
|
Packit |
0848f5 |
rmaChoice |= RMA_ACC;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else if (strcmp(argv[i], "-fence") == 0) {
|
|
Packit |
0848f5 |
if (syncChoice == SYNC_ALL)
|
|
Packit |
0848f5 |
syncChoice = SYNC_NONE;
|
|
Packit |
0848f5 |
syncChoice |= SYNC_FENCE;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else if (strcmp(argv[i], "-lock") == 0) {
|
|
Packit |
0848f5 |
if (syncChoice == SYNC_ALL)
|
|
Packit |
0848f5 |
syncChoice = SYNC_NONE;
|
|
Packit |
0848f5 |
syncChoice |= SYNC_LOCK;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else if (strcmp(argv[i], "-pscw") == 0) {
|
|
Packit |
0848f5 |
if (syncChoice == SYNC_ALL)
|
|
Packit |
0848f5 |
syncChoice = SYNC_NONE;
|
|
Packit |
0848f5 |
syncChoice |= SYNC_PSCW;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else if (strcmp(argv[i], "-maxsz") == 0) {
|
|
Packit |
0848f5 |
i++;
|
|
Packit |
0848f5 |
maxSz = atoi(argv[i]);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else if (strcmp(argv[i], "-maxcount") == 0) {
|
|
Packit |
0848f5 |
i++;
|
|
Packit |
0848f5 |
maxCount = atoi(argv[i]);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else if (strcmp(argv[i], "-barrier") == 0) {
|
|
Packit |
0848f5 |
barrierSync = 1;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else {
|
|
Packit |
0848f5 |
fprintf(stderr, "Unrecognized argument %s\n", argv[i]);
|
|
Packit |
0848f5 |
fprintf(stderr,
|
|
Packit |
0848f5 |
"%s [ -put ] [ -acc ] [ -lock ] [ -fence ] [ -pscw ] [ -barrier ] [ -maxsz msgsize ]\n",
|
|
Packit |
0848f5 |
argv[0]);
|
|
Packit |
0848f5 |
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Comm_rank(MPI_COMM_WORLD, &wrank);
|
|
Packit |
0848f5 |
MPI_Comm_size(MPI_COMM_WORLD, &wsize);
|
|
Packit |
0848f5 |
destRank = wrank + 1;
|
|
Packit |
0848f5 |
while (destRank >= wsize)
|
|
Packit |
0848f5 |
destRank = destRank - wsize;
|
|
Packit |
0848f5 |
srcRank = wrank - 1;
|
|
Packit |
0848f5 |
if (srcRank < 0)
|
|
Packit |
0848f5 |
srcRank += wsize;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Create groups for PSCW */
|
|
Packit |
0848f5 |
MPI_Comm_group(MPI_COMM_WORLD, &wgroup);
|
|
Packit |
0848f5 |
MPI_Group_incl(wgroup, 1, &destRank, &accessGroup);
|
|
Packit |
0848f5 |
MPI_Group_incl(wgroup, 1, &srcRank, &exposureGroup);
|
|
Packit |
0848f5 |
MPI_Group_free(&wgroup);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
arraysize = maxSz * MAX_COUNT;
|
|
Packit |
0848f5 |
arraybuffer = (int *) malloc(arraysize * sizeof(int));
|
|
Packit |
0848f5 |
if (!arraybuffer) {
|
|
Packit |
0848f5 |
fprintf(stderr, "Unable to allocate %d words\n", arraysize);
|
|
Packit |
0848f5 |
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Win_create(arraybuffer, arraysize * sizeof(int), (int) sizeof(int),
|
|
Packit |
0848f5 |
MPI_INFO_NULL, MPI_COMM_WORLD, &win);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* FIXME: we need a test on performance consistency.
|
|
Packit |
0848f5 |
* The test needs to have both a relative growth limit and
|
|
Packit |
0848f5 |
* an absolute limit.
|
|
Packit |
0848f5 |
*/
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if (maxCount > MAX_COUNT) {
|
|
Packit |
0848f5 |
fprintf(stderr, "MaxCount must not exceed %d\n", MAX_COUNT);
|
|
Packit |
0848f5 |
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_ACC)) {
|
|
Packit |
0848f5 |
for (sz = 1; sz <= maxSz; sz = sz + sz) {
|
|
Packit |
0848f5 |
if (wrank == 0)
|
|
Packit |
0848f5 |
printf("Accumulate with fence, %d elements\n", sz);
|
|
Packit |
0848f5 |
cnt = 1;
|
|
Packit |
0848f5 |
while (cnt <= maxCount) {
|
|
Packit |
0848f5 |
RunAccFence(win, destRank, cnt, sz, t);
|
|
Packit |
0848f5 |
if (wrank == 0) {
|
|
Packit |
0848f5 |
PrintResults(cnt, t);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
cnt = 2 * cnt;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_ACC)) {
|
|
Packit |
0848f5 |
for (sz = 1; sz <= maxSz; sz = sz + sz) {
|
|
Packit |
0848f5 |
if (wrank == 0)
|
|
Packit |
0848f5 |
printf("Accumulate with lock, %d elements\n", sz);
|
|
Packit |
0848f5 |
cnt = 1;
|
|
Packit |
0848f5 |
while (cnt <= maxCount) {
|
|
Packit |
0848f5 |
RunAccLock(win, destRank, cnt, sz, t);
|
|
Packit |
0848f5 |
if (wrank == 0) {
|
|
Packit |
0848f5 |
PrintResults(cnt, t);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
cnt = 2 * cnt;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if ((syncChoice & SYNC_FENCE) && (rmaChoice & RMA_PUT)) {
|
|
Packit |
0848f5 |
for (sz = 1; sz <= maxSz; sz = sz + sz) {
|
|
Packit |
0848f5 |
if (wrank == 0)
|
|
Packit |
0848f5 |
printf("Put with fence, %d elements\n", sz);
|
|
Packit |
0848f5 |
cnt = 1;
|
|
Packit |
0848f5 |
while (cnt <= maxCount) {
|
|
Packit |
0848f5 |
RunPutFence(win, destRank, cnt, sz, t);
|
|
Packit |
0848f5 |
if (wrank == 0) {
|
|
Packit |
0848f5 |
PrintResults(cnt, t);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
cnt = 2 * cnt;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if ((syncChoice & SYNC_LOCK) && (rmaChoice & RMA_PUT)) {
|
|
Packit |
0848f5 |
for (sz = 1; sz <= maxSz; sz = sz + sz) {
|
|
Packit |
0848f5 |
if (wrank == 0)
|
|
Packit |
0848f5 |
printf("Put with lock, %d elements\n", sz);
|
|
Packit |
0848f5 |
cnt = 1;
|
|
Packit |
0848f5 |
while (cnt <= maxCount) {
|
|
Packit |
0848f5 |
RunPutLock(win, destRank, cnt, sz, t);
|
|
Packit |
0848f5 |
if (wrank == 0) {
|
|
Packit |
0848f5 |
PrintResults(cnt, t);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
cnt = 2 * cnt;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_PUT)) {
|
|
Packit |
0848f5 |
for (sz = 1; sz <= maxSz; sz = sz + sz) {
|
|
Packit |
0848f5 |
if (wrank == 0)
|
|
Packit |
0848f5 |
printf("Put with pscw, %d elements\n", sz);
|
|
Packit |
0848f5 |
cnt = 1;
|
|
Packit |
0848f5 |
while (cnt <= maxCount) {
|
|
Packit |
0848f5 |
RunPutPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup, t);
|
|
Packit |
0848f5 |
if (wrank == 0) {
|
|
Packit |
0848f5 |
PrintResults(cnt, t);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
cnt = 2 * cnt;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if ((syncChoice & SYNC_PSCW) && (rmaChoice & RMA_ACC)) {
|
|
Packit |
0848f5 |
for (sz = 1; sz <= maxSz; sz = sz + sz) {
|
|
Packit |
0848f5 |
if (wrank == 0)
|
|
Packit |
0848f5 |
printf("Accumulate with pscw, %d elements\n", sz);
|
|
Packit |
0848f5 |
cnt = 1;
|
|
Packit |
0848f5 |
while (cnt <= maxCount) {
|
|
Packit |
0848f5 |
RunAccPSCW(win, destRank, cnt, sz, exposureGroup, accessGroup, t);
|
|
Packit |
0848f5 |
if (wrank == 0) {
|
|
Packit |
0848f5 |
PrintResults(cnt, t);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
cnt = 2 * cnt;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Win_free(&win);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Group_free(&accessGroup);
|
|
Packit |
0848f5 |
MPI_Group_free(&exposureGroup);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Finalize();
|
|
Packit |
0848f5 |
return 0;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
void RunAccFence(MPI_Win win, int destRank, int cnt, int sz, timing t[])
|
|
Packit |
0848f5 |
{
|
|
Packit |
0848f5 |
int k, i, j, one = 1;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
for (k = 0; k < MAX_RUNS; k++) {
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Win_fence(0, win);
|
|
Packit |
0848f5 |
j = 0;
|
|
Packit |
0848f5 |
t[k].startOp = MPI_Wtime();
|
|
Packit |
0848f5 |
for (i = 0; i < cnt; i++) {
|
|
Packit |
0848f5 |
MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
|
|
Packit |
0848f5 |
j += sz;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
t[k].endOp = MPI_Wtime();
|
|
Packit |
0848f5 |
if (barrierSync)
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Win_fence(0, win);
|
|
Packit |
0848f5 |
t[k].endSync = MPI_Wtime();
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
void RunAccLock(MPI_Win win, int destRank, int cnt, int sz, timing t[])
|
|
Packit |
0848f5 |
{
|
|
Packit |
0848f5 |
int k, i, j, one = 1;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
for (k = 0; k < MAX_RUNS; k++) {
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Win_lock(MPI_LOCK_SHARED, destRank, 0, win);
|
|
Packit |
0848f5 |
j = 0;
|
|
Packit |
0848f5 |
t[k].startOp = MPI_Wtime();
|
|
Packit |
0848f5 |
for (i = 0; i < cnt; i++) {
|
|
Packit |
0848f5 |
MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
|
|
Packit |
0848f5 |
j += sz;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
t[k].endOp = MPI_Wtime();
|
|
Packit |
0848f5 |
if (barrierSync)
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Win_unlock(destRank, win);
|
|
Packit |
0848f5 |
t[k].endSync = MPI_Wtime();
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
void RunPutFence(MPI_Win win, int destRank, int cnt, int sz, timing t[])
|
|
Packit |
0848f5 |
{
|
|
Packit |
0848f5 |
int k, i, j, one = 1;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
for (k = 0; k < MAX_RUNS; k++) {
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Win_fence(0, win);
|
|
Packit |
0848f5 |
j = 0;
|
|
Packit |
0848f5 |
t[k].startOp = MPI_Wtime();
|
|
Packit |
0848f5 |
for (i = 0; i < cnt; i++) {
|
|
Packit |
0848f5 |
MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
|
|
Packit |
0848f5 |
j += sz;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
t[k].endOp = MPI_Wtime();
|
|
Packit |
0848f5 |
if (barrierSync)
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Win_fence(0, win);
|
|
Packit |
0848f5 |
t[k].endSync = MPI_Wtime();
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
void RunPutLock(MPI_Win win, int destRank, int cnt, int sz, timing t[])
|
|
Packit |
0848f5 |
{
|
|
Packit |
0848f5 |
int k, i, j, one = 1;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
for (k = 0; k < MAX_RUNS; k++) {
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Win_lock(MPI_LOCK_SHARED, destRank, 0, win);
|
|
Packit |
0848f5 |
j = 0;
|
|
Packit |
0848f5 |
t[k].startOp = MPI_Wtime();
|
|
Packit |
0848f5 |
for (i = 0; i < cnt; i++) {
|
|
Packit |
0848f5 |
MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
|
|
Packit |
0848f5 |
j += sz;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
t[k].endOp = MPI_Wtime();
|
|
Packit |
0848f5 |
if (barrierSync)
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Win_unlock(destRank, win);
|
|
Packit |
0848f5 |
t[k].endSync = MPI_Wtime();
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
void RunPutPSCW(MPI_Win win, int destRank, int cnt, int sz,
|
|
Packit |
0848f5 |
MPI_Group exposureGroup, MPI_Group accessGroup, timing t[])
|
|
Packit |
0848f5 |
{
|
|
Packit |
0848f5 |
int k, i, j, one = 1;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
for (k = 0; k < MAX_RUNS; k++) {
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Win_post(exposureGroup, 0, win);
|
|
Packit |
0848f5 |
MPI_Win_start(accessGroup, 0, win);
|
|
Packit |
0848f5 |
j = 0;
|
|
Packit |
0848f5 |
t[k].startOp = MPI_Wtime();
|
|
Packit |
0848f5 |
for (i = 0; i < cnt; i++) {
|
|
Packit |
0848f5 |
MPI_Put(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, win);
|
|
Packit |
0848f5 |
j += sz;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
t[k].endOp = MPI_Wtime();
|
|
Packit |
0848f5 |
if (barrierSync)
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Win_complete(win);
|
|
Packit |
0848f5 |
MPI_Win_wait(win);
|
|
Packit |
0848f5 |
t[k].endSync = MPI_Wtime();
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
void RunAccPSCW(MPI_Win win, int destRank, int cnt, int sz,
|
|
Packit |
0848f5 |
MPI_Group exposureGroup, MPI_Group accessGroup, timing t[])
|
|
Packit |
0848f5 |
{
|
|
Packit |
0848f5 |
int k, i, j, one = 1;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
for (k = 0; k < MAX_RUNS; k++) {
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Win_post(exposureGroup, 0, win);
|
|
Packit |
0848f5 |
MPI_Win_start(accessGroup, 0, win);
|
|
Packit |
0848f5 |
j = 0;
|
|
Packit |
0848f5 |
t[k].startOp = MPI_Wtime();
|
|
Packit |
0848f5 |
for (i = 0; i < cnt; i++) {
|
|
Packit |
0848f5 |
MPI_Accumulate(&one, sz, MPI_INT, destRank, j, sz, MPI_INT, MPI_SUM, win);
|
|
Packit |
0848f5 |
j += sz;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
t[k].endOp = MPI_Wtime();
|
|
Packit |
0848f5 |
if (barrierSync)
|
|
Packit |
0848f5 |
MPI_Barrier(MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Win_complete(win);
|
|
Packit |
0848f5 |
MPI_Win_wait(win);
|
|
Packit |
0848f5 |
t[k].endSync = MPI_Wtime();
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
void PrintResults(int cnt, timing t[])
|
|
Packit |
0848f5 |
{
|
|
Packit |
0848f5 |
int k;
|
|
Packit |
0848f5 |
double d1 = 0, d2 = 0;
|
|
Packit |
0848f5 |
double minD1 = 1e10, minD2 = 1e10;
|
|
Packit |
0848f5 |
double tOp, tSync;
|
|
Packit |
0848f5 |
for (k = 0; k < MAX_RUNS; k++) {
|
|
Packit |
0848f5 |
tOp = t[k].endOp - t[k].startOp;
|
|
Packit |
0848f5 |
tSync = t[k].endSync - t[k].endOp;
|
|
Packit |
0848f5 |
d1 += tOp;
|
|
Packit |
0848f5 |
d2 += tSync;
|
|
Packit |
0848f5 |
if (tOp < minD1)
|
|
Packit |
0848f5 |
minD1 = tOp;
|
|
Packit |
0848f5 |
if (tSync < minD2)
|
|
Packit |
0848f5 |
minD2 = tSync;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
if (verbose) {
|
|
Packit |
0848f5 |
long rate = 0;
|
|
Packit |
0848f5 |
/* Use the minimum times because they are more stable - if timing
|
|
Packit |
0848f5 |
* accuracy is an issue, use the min over multiple trials */
|
|
Packit |
0848f5 |
d1 = minD1;
|
|
Packit |
0848f5 |
d2 = minD2;
|
|
Packit |
0848f5 |
/* d1 = d1 / MAX_RUNS; d2 = d2 / MAX_RUNS); */
|
|
Packit |
0848f5 |
if (d2 > 0)
|
|
Packit |
0848f5 |
rate = (long) (cnt) / d2;
|
|
Packit |
0848f5 |
/* count, op, sync, op/each, sync/each, rate */
|
|
Packit |
0848f5 |
printf("%d\t%e\t%e\t%e\t%e\t%ld\n", cnt, d1, d2, d1 / cnt, d2 / cnt, rate);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|