|
Packit |
0848f5 |
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
|
|
Packit |
0848f5 |
/*
|
|
Packit |
0848f5 |
* (C) 2012 by Argonne National Laboratory.
|
|
Packit |
0848f5 |
* See COPYRIGHT in top-level directory.
|
|
Packit |
0848f5 |
*/
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/*
|
|
Packit |
0848f5 |
* Tests that the performance of a struct that contains a vector type
|
|
Packit |
0848f5 |
* exploits the vector type correctly
|
|
Packit |
0848f5 |
*
|
|
Packit |
0848f5 |
* If PACK_IS_NATIVE is defined, MPI_Pack stores exactly the same bytes as the
|
|
Packit |
0848f5 |
* user would pack manually; in that case, there is a consistency check.
|
|
Packit |
0848f5 |
*/
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
#include "mpi.h"
|
|
Packit |
0848f5 |
#include <stdio.h>
|
|
Packit |
0848f5 |
#include <stdlib.h>
|
|
Packit |
0848f5 |
#include <string.h>
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
#include "mpitestconf.h"
|
|
Packit |
0848f5 |
#ifdef HAVE_STDINT_H
|
|
Packit |
0848f5 |
#include <stdint.h>
|
|
Packit |
0848f5 |
#endif
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
#ifdef MPICH
|
|
Packit |
0848f5 |
/* MPICH (as of 6/2012) packs the native bytes */
|
|
Packit |
0848f5 |
#define PACK_IS_NATIVE
|
|
Packit |
0848f5 |
#endif
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
static int verbose = 0;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
int main(int argc, char **argv)
|
|
Packit |
0848f5 |
{
|
|
Packit |
0848f5 |
int vcount, vstride;
|
|
Packit |
0848f5 |
int32_t counts[2];
|
|
Packit |
0848f5 |
int v2stride, typesize, packsize, i, position, errs = 0;
|
|
Packit |
0848f5 |
double *outbuf, *outbuf2;
|
|
Packit |
0848f5 |
double *vsource;
|
|
Packit |
0848f5 |
MPI_Datatype vtype, stype;
|
|
Packit |
0848f5 |
MPI_Aint lb, extent;
|
|
Packit |
0848f5 |
double t0, t1;
|
|
Packit |
0848f5 |
double tspack, tvpack, tmanual;
|
|
Packit |
0848f5 |
int ntry;
|
|
Packit |
0848f5 |
int blocklengths[2];
|
|
Packit |
0848f5 |
MPI_Aint displacements[2];
|
|
Packit |
0848f5 |
MPI_Datatype typesArray[2];
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Init(&argc, &argv);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Create a struct consisting of a two 32-bit ints, followed by a
|
|
Packit |
0848f5 |
* vector of stride 3 but count 128k (less than a few MB of data area) */
|
|
Packit |
0848f5 |
vcount = 128000;
|
|
Packit |
0848f5 |
vstride = 3;
|
|
Packit |
0848f5 |
MPI_Type_vector(vcount, 1, vstride, MPI_DOUBLE, &vtype);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
vsource = (double *) malloc((vcount + 1) * (vstride + 1) * sizeof(double));
|
|
Packit |
0848f5 |
if (!vsource) {
|
|
Packit |
0848f5 |
fprintf(stderr, "Unable to allocate vsource\n");
|
|
Packit |
0848f5 |
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
for (i = 0; i < vcount * vstride; i++) {
|
|
Packit |
0848f5 |
vsource[i] = i;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
blocklengths[0] = 2;
|
|
Packit |
0848f5 |
MPI_Get_address(&counts[0], &displacements[0]);
|
|
Packit |
0848f5 |
blocklengths[1] = 1;
|
|
Packit |
0848f5 |
MPI_Get_address(vsource, &displacements[1]);
|
|
Packit |
0848f5 |
if (verbose) {
|
|
Packit |
0848f5 |
printf("%p = %p?\n", vsource, (void *) displacements[1]);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
typesArray[0] = MPI_INT32_T;
|
|
Packit |
0848f5 |
typesArray[1] = vtype;
|
|
Packit |
0848f5 |
MPI_Type_create_struct(2, blocklengths, displacements, typesArray, &stype);
|
|
Packit |
0848f5 |
MPI_Type_commit(&stype);
|
|
Packit |
0848f5 |
MPI_Type_commit(&vtype);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
#if defined(MPICH) && defined(PRINT_DATATYPE_INTERNALS)
|
|
Packit |
0848f5 |
/* To use MPIDU_Datatype_debug to print the datatype internals,
|
|
Packit |
0848f5 |
* you must configure MPICH with --enable-g=log */
|
|
Packit |
0848f5 |
if (verbose) {
|
|
Packit |
0848f5 |
printf("Original struct datatype:\n");
|
|
Packit |
0848f5 |
MPIDU_Datatype_debug(stype, 10);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
#endif
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Pack_size(1, stype, MPI_COMM_WORLD, &packsize);
|
|
Packit |
0848f5 |
outbuf = (double *) malloc(packsize);
|
|
Packit |
0848f5 |
outbuf2 = (double *) malloc(packsize);
|
|
Packit |
0848f5 |
if (!outbuf) {
|
|
Packit |
0848f5 |
fprintf(stderr, "Unable to allocate %ld for outbuf\n", (long) packsize);
|
|
Packit |
0848f5 |
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
if (!outbuf2) {
|
|
Packit |
0848f5 |
fprintf(stderr, "Unable to allocate %ld for outbuf2\n", (long) packsize);
|
|
Packit |
0848f5 |
MPI_Abort(MPI_COMM_WORLD, 1);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
position = 0;
|
|
Packit |
0848f5 |
/* Warm up the code and data */
|
|
Packit |
0848f5 |
MPI_Pack(MPI_BOTTOM, 1, stype, outbuf, packsize, &position, MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
tspack = 1e12;
|
|
Packit |
0848f5 |
for (ntry = 0; ntry < 5; ntry++) {
|
|
Packit |
0848f5 |
position = 0;
|
|
Packit |
0848f5 |
t0 = MPI_Wtime();
|
|
Packit |
0848f5 |
MPI_Pack(MPI_BOTTOM, 1, stype, outbuf, packsize, &position, MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
t1 = MPI_Wtime() - t0;
|
|
Packit |
0848f5 |
if (t1 < tspack)
|
|
Packit |
0848f5 |
tspack = t1;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
MPI_Type_free(&stype);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* An equivalent packing, using the 2 ints and the vector separately */
|
|
Packit |
0848f5 |
tvpack = 1e12;
|
|
Packit |
0848f5 |
for (ntry = 0; ntry < 5; ntry++) {
|
|
Packit |
0848f5 |
position = 0;
|
|
Packit |
0848f5 |
t0 = MPI_Wtime();
|
|
Packit |
0848f5 |
MPI_Pack(counts, 2, MPI_INT32_T, outbuf, packsize, &position, MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
MPI_Pack(vsource, 1, vtype, outbuf, packsize, &position, MPI_COMM_WORLD);
|
|
Packit |
0848f5 |
t1 = MPI_Wtime() - t0;
|
|
Packit |
0848f5 |
if (t1 < tvpack)
|
|
Packit |
0848f5 |
tvpack = t1;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
MPI_Type_free(&vtype);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Note that we exploit the fact that the vector type contains vblock
|
|
Packit |
0848f5 |
* instances of a contiguous type of size 24, or a single block of 24*vblock
|
|
Packit |
0848f5 |
* bytes.
|
|
Packit |
0848f5 |
*/
|
|
Packit |
0848f5 |
tmanual = 1e12;
|
|
Packit |
0848f5 |
for (ntry = 0; ntry < 5; ntry++) {
|
|
Packit |
0848f5 |
const double *restrict ppe = (const double *) vsource;
|
|
Packit |
0848f5 |
double *restrict ppo = outbuf2;
|
|
Packit |
0848f5 |
int j;
|
|
Packit |
0848f5 |
t0 = MPI_Wtime();
|
|
Packit |
0848f5 |
position = 0;
|
|
Packit |
0848f5 |
*(int32_t *) ppo = counts[0];
|
|
Packit |
0848f5 |
*(((int32_t *) ppo) + 1) = counts[1];
|
|
Packit |
0848f5 |
ppo++;
|
|
Packit |
0848f5 |
/* Some hand optimization because this file is not normally
|
|
Packit |
0848f5 |
* compiled with optimization by the test suite */
|
|
Packit |
0848f5 |
j = vcount;
|
|
Packit |
0848f5 |
while (j) {
|
|
Packit |
0848f5 |
*ppo++ = *ppe;
|
|
Packit |
0848f5 |
ppe += vstride;
|
|
Packit |
0848f5 |
*ppo++ = *ppe;
|
|
Packit |
0848f5 |
ppe += vstride;
|
|
Packit |
0848f5 |
*ppo++ = *ppe;
|
|
Packit |
0848f5 |
ppe += vstride;
|
|
Packit |
0848f5 |
*ppo++ = *ppe;
|
|
Packit |
0848f5 |
ppe += vstride;
|
|
Packit |
0848f5 |
j -= 4;
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
position += (1 + vcount);
|
|
Packit |
0848f5 |
position *= sizeof(double);
|
|
Packit |
0848f5 |
t1 = MPI_Wtime() - t0;
|
|
Packit |
0848f5 |
if (t1 < tmanual)
|
|
Packit |
0848f5 |
tmanual = t1;
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
/* Check on correctness */
|
|
Packit |
0848f5 |
#ifdef PACK_IS_NATIVE
|
|
Packit |
0848f5 |
if (memcmp(outbuf, outbuf2, position) != 0) {
|
|
Packit |
0848f5 |
printf("Panic(manual) - pack buffers differ\n");
|
|
Packit |
0848f5 |
for (j = 0; j < 8; j++) {
|
|
Packit |
0848f5 |
printf("%d: %llx\t%llx\n", j, (long long unsigned) outbuf[j],
|
|
Packit |
0848f5 |
(long long unsigned) outbuf2[j]);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
#endif
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if (verbose) {
|
|
Packit |
0848f5 |
printf("Bytes packed = %d\n", position);
|
|
Packit |
0848f5 |
printf("MPI_Pack time = %e (struct), = %e (vector), manual pack time = %e\n",
|
|
Packit |
0848f5 |
tspack, tvpack, tmanual);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if (4 * tmanual < tspack) {
|
|
Packit |
0848f5 |
errs++;
|
|
Packit |
0848f5 |
printf("MPI_Pack time using struct with vector = %e, manual pack time = %e\n", tspack,
|
|
Packit |
0848f5 |
tmanual);
|
|
Packit |
0848f5 |
printf("MPI_Pack time should be less than 4 times the manual time\n");
|
|
Packit |
0848f5 |
printf("For most informative results, be sure to compile this test with optimization\n");
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
if (4 * tmanual < tvpack) {
|
|
Packit |
0848f5 |
errs++;
|
|
Packit |
0848f5 |
printf("MPI_Pack using vector = %e, manual pack time = %e\n", tvpack, tmanual);
|
|
Packit |
0848f5 |
printf("MPI_Pack time should be less than 4 times the manual time\n");
|
|
Packit |
0848f5 |
printf("For most informative results, be sure to compile this test with optimization\n");
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
if (4 * tvpack < tspack) {
|
|
Packit |
0848f5 |
errs++;
|
|
Packit |
0848f5 |
printf("MPI_Pack using a vector = %e, using a struct with vector = %e\n", tvpack, tspack);
|
|
Packit |
0848f5 |
printf
|
|
Packit |
0848f5 |
("MPI_Pack time using vector should be about the same as the struct containing the vector\n");
|
|
Packit |
0848f5 |
printf("For most informative results, be sure to compile this test with optimization\n");
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
if (errs) {
|
|
Packit |
0848f5 |
printf(" Found %d errors\n", errs);
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
else {
|
|
Packit |
0848f5 |
printf(" No Errors\n");
|
|
Packit |
0848f5 |
}
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
free(vsource);
|
|
Packit |
0848f5 |
free(outbuf);
|
|
Packit |
0848f5 |
free(outbuf2);
|
|
Packit |
0848f5 |
|
|
Packit |
0848f5 |
MPI_Finalize();
|
|
Packit |
0848f5 |
return 0;
|
|
Packit |
0848f5 |
}
|