/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ /* * Changes to this example * (C) 2001 by Argonne National Laboratory. * See COPYRIGHT in top-level directory. */ /* * This example is taken from MPI-The complete reference, Vol 1, * pages 222-224. * * Lines after the "--CUT HERE--" were added to make this into a complete * test program. */ /* Specify the maximum number of errors to report. */ #define MAX_ERRORS 10 #include "mpi.h" #include "mpitest.h" #include #include #define MAX_SIZE 64 MPI_Datatype transpose_type(int M, int m, int n, MPI_Datatype type); MPI_Datatype submatrix_type(int N, int m, int n, MPI_Datatype type); void Transpose(float *localA, float *localB, int M, int N, MPI_Comm comm); void Transpose(float *localA, float *localB, int M, int N, MPI_Comm comm) /* transpose MxN matrix A that is block distributed (1-D) on processes of comm onto block distributed matrix B */ { int i, j, extent, myrank, p, n[2], m[2]; int lasti, lastj; int *sendcounts, *recvcounts; int *sdispls, *rdispls; MPI_Datatype xtype[2][2], stype[2][2], *sendtypes, *recvtypes; MTestPrintfMsg(2, "M = %d, N = %d\n", M, N); /* compute parameters */ MPI_Comm_size(comm, &p); MPI_Comm_rank(comm, &myrank); extent = sizeof(float); /* allocate arrays */ sendcounts = (int *) malloc(p * sizeof(int)); recvcounts = (int *) malloc(p * sizeof(int)); sdispls = (int *) malloc(p * sizeof(int)); rdispls = (int *) malloc(p * sizeof(int)); sendtypes = (MPI_Datatype *) malloc(p * sizeof(MPI_Datatype)); recvtypes = (MPI_Datatype *) malloc(p * sizeof(MPI_Datatype)); /* compute block sizes */ m[0] = M / p; m[1] = M - (p - 1) * (M / p); n[0] = N / p; n[1] = N - (p - 1) * (N / p); /* compute types */ for (i = 0; i <= 1; i++) for (j = 0; j <= 1; j++) { xtype[i][j] = transpose_type(N, m[i], n[j], MPI_FLOAT); stype[i][j] = submatrix_type(M, m[i], n[j], MPI_FLOAT); } /* prepare collective operation arguments */ lasti = myrank == p - 1; for (j = 0; j < p; j++) { lastj = j == p - 1; sendcounts[j] = 1; sdispls[j] = j * n[0] * extent; sendtypes[j] = xtype[lasti][lastj]; recvcounts[j] = 1; rdispls[j] = j * m[0] * extent; recvtypes[j] = stype[lastj][lasti]; } /* communicate */ MTestPrintfMsg(2, "Begin Alltoallw...\n"); /* -- Note that the book incorrectly uses &localA and &localB * as arguments to MPI_Alltoallw */ MPI_Alltoallw(localA, sendcounts, sdispls, sendtypes, localB, recvcounts, rdispls, recvtypes, comm); MTestPrintfMsg(2, "Done with Alltoallw\n"); /* Free buffers */ free(sendcounts); free(recvcounts); free(sdispls); free(rdispls); free(sendtypes); free(recvtypes); /* Free datatypes */ for (i = 0; i <= 1; i++) for (j = 0; j <= 1; j++) { MPI_Type_free(&xtype[i][j]); MPI_Type_free(&stype[i][j]); } } /* Define an n x m submatrix in a n x M local matrix (this is the destination in the transpose matrix */ MPI_Datatype submatrix_type(int M, int m, int n, MPI_Datatype type) /* computes a datatype for an mxn submatrix within an MxN matrix with entries of type type */ { /* MPI_Datatype subrow; */ MPI_Datatype submatrix; /* The book, MPI: The Complete Reference, has the wrong type constructor * here. Since the stride in the vector type is relative to the input * type, the stride in the book's code is n times as long as is intended. * Since n may not exactly divide N, it is better to simply use the * blocklength argument in Type_vector */ /* * MPI_Type_contiguous(n, type, &subrow); * MPI_Type_vector(m, 1, N, subrow, &submatrix); */ MPI_Type_vector(n, m, M, type, &submatrix); MPI_Type_commit(&submatrix); /* Add a consistency test: the size of submatrix should be * n * m * sizeof(type) and the extent should be ((n-1)*M+m) * sizeof(type) */ { int tsize; MPI_Aint textent, lb; MPI_Type_size(type, &tsize); MPI_Type_get_extent(submatrix, &lb, &textent); if (textent != tsize * (M * (n - 1) + m)) { fprintf(stderr, "Submatrix extent is %ld, expected %ld (%d,%d,%d)\n", (long) textent, (long) (tsize * (M * (n - 1) + m)), M, n, m); } } return (submatrix); } /* Extract an m x n submatrix within an m x N matrix and transpose it. Assume storage by rows; the defined datatype accesses by columns */ MPI_Datatype transpose_type(int N, int m, int n, MPI_Datatype type) /* computes a datatype for the transpose of an mxn matrix with entries of type type */ { MPI_Datatype subrow, subrow1, submatrix; MPI_Aint lb, extent; MPI_Type_vector(m, 1, N, type, &subrow); MPI_Type_get_extent(type, &lb, &extent); MPI_Type_create_resized(subrow, 0, extent, &subrow1); MPI_Type_contiguous(n, subrow1, &submatrix); MPI_Type_commit(&submatrix); MPI_Type_free(&subrow); MPI_Type_free(&subrow1); /* Add a consistency test: the size of submatrix should be * n * m * sizeof(type) and the extent should be ((m-1)*N+n) * sizeof(type) */ { int tsize; MPI_Aint textent, llb; MPI_Type_size(type, &tsize); MPI_Type_get_true_extent(submatrix, &llb, &textent); if (textent != tsize * (N * (m - 1) + n)) { fprintf(stderr, "Transpose Submatrix extent is %ld, expected %ld (%d,%d,%d)\n", (long) textent, (long) (tsize * (N * (m - 1) + n)), N, n, m); } } return (submatrix); } /* -- CUT HERE -- */ int main(int argc, char *argv[]) { int gM, gN, lm, lmlast, ln, lnlast, i, j, errs = 0; int size, rank; float *localA, *localB; MPI_Comm comm; MTest_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); gM = 20; gN = 30; /* Each block is lm x ln in size, except for the last process, * which has lmlast x lnlast */ lm = gM / size; lmlast = gM - (size - 1) * lm; ln = gN / size; lnlast = gN - (size - 1) * ln; /* Create the local matrices. * Initialize the input matrix so that the entries are * consequtive integers, by row, starting at 0. */ if (rank == size - 1) { localA = (float *) malloc(gN * lmlast * sizeof(float)); localB = (float *) malloc(gM * lnlast * sizeof(float)); for (i = 0; i < lmlast; i++) { for (j = 0; j < gN; j++) { localA[i * gN + j] = (float) (i * gN + j + rank * gN * lm); } } } else { localA = (float *) malloc(gN * lm * sizeof(float)); localB = (float *) malloc(gM * ln * sizeof(float)); for (i = 0; i < lm; i++) { for (j = 0; j < gN; j++) { localA[i * gN + j] = (float) (i * gN + j + rank * gN * lm); } } } MTestPrintfMsg(2, "Allocated local arrays\n"); /* Transpose */ Transpose(localA, localB, gM, gN, comm); /* check the transposed matrix * In the global matrix, the transpose has consequtive integers, * organized by columns. */ if (rank == size - 1) { for (i = 0; i < lnlast; i++) { for (j = 0; j < gM; j++) { int expected = i + gN * j + rank * ln; if ((int) localB[i * gM + j] != expected) { if (errs < MAX_ERRORS) printf("Found %d but expected %d\n", (int) localB[i * gM + j], expected); errs++; } } } } else { for (i = 0; i < ln; i++) { for (j = 0; j < gM; j++) { int expected = i + gN * j + rank * ln; if ((int) localB[i * gM + j] != expected) { if (errs < MAX_ERRORS) printf("Found %d but expected %d\n", (int) localB[i * gM + j], expected); errs++; } } } } /* Free storage */ free(localA); free(localB); MTest_Finalize(errs); return MTestReturnValue(errs); }