Blame src/mpi/topo/dims_create.c

Packit Service c5cf8c
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
Packit Service c5cf8c
/*
Packit Service c5cf8c
 *  (C) 2001 by Argonne National Laboratory.
Packit Service c5cf8c
 *      See COPYRIGHT in top-level directory.
Packit Service c5cf8c
 */
Packit Service c5cf8c
Packit Service c5cf8c
#include "mpiimpl.h"
Packit Service c5cf8c
Packit Service c5cf8c
/*
Packit Service c5cf8c
 * This CVAR is used for debugging support.  An alternative would be
Packit Service c5cf8c
 * to use the MPIU_DBG interface, which predates the CVAR interface,
Packit Service c5cf8c
 * and also provides different levels of debugging support.  In the
Packit Service c5cf8c
 * long run, the MPIU_DBG interface should be updated to make use of
Packit Service c5cf8c
 * CVARs.
Packit Service c5cf8c
 */
Packit Service c5cf8c
/*
Packit Service c5cf8c
=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===
Packit Service c5cf8c
Packit Service c5cf8c
categories:
Packit Service c5cf8c
    - name        : DIMS
Packit Service c5cf8c
      description : Dims_create cvars
Packit Service c5cf8c
Packit Service c5cf8c
cvars:
Packit Service c5cf8c
    - name        : MPIR_CVAR_DIMS_VERBOSE
Packit Service c5cf8c
      category    : DIMS
Packit Service c5cf8c
      type        : boolean
Packit Service c5cf8c
      default     : false
Packit Service c5cf8c
      class       : none
Packit Service c5cf8c
      verbosity   : MPI_T_VERBOSITY_MPIDEV_DETAIL
Packit Service c5cf8c
      scope       : MPI_T_SCOPE_ALL_EQ
Packit Service c5cf8c
      description : >-
Packit Service c5cf8c
           If true, enable verbose output about the actions of the
Packit Service c5cf8c
           implementation of MPI_Dims_create.
Packit Service c5cf8c
Packit Service c5cf8c
=== END_MPI_T_CVAR_INFO_BLOCK ===
Packit Service c5cf8c
*/
Packit Service c5cf8c
Packit Service c5cf8c
/* -- Begin Profiling Symbol Block for routine MPI_Dims_create */
Packit Service c5cf8c
#if defined(HAVE_PRAGMA_WEAK)
Packit Service c5cf8c
#pragma weak MPI_Dims_create = PMPI_Dims_create
Packit Service c5cf8c
#elif defined(HAVE_PRAGMA_HP_SEC_DEF)
Packit Service c5cf8c
#pragma _HP_SECONDARY_DEF PMPI_Dims_create  MPI_Dims_create
Packit Service c5cf8c
#elif defined(HAVE_PRAGMA_CRI_DUP)
Packit Service c5cf8c
#pragma _CRI duplicate MPI_Dims_create as PMPI_Dims_create
Packit Service c5cf8c
#elif defined(HAVE_WEAK_ATTRIBUTE)
Packit Service c5cf8c
int MPI_Dims_create(int nnodes, int ndims, int dims[])
Packit Service c5cf8c
    __attribute__ ((weak, alias("PMPI_Dims_create")));
Packit Service c5cf8c
#endif
Packit Service c5cf8c
/* -- End Profiling Symbol Block */
Packit Service c5cf8c
Packit Service c5cf8c
Packit Service c5cf8c
Packit Service c5cf8c
/* Because we store factors with their multiplicities, a small array can
Packit Service c5cf8c
   store all of the factors for a large number (grows *faster* than n
Packit Service c5cf8c
   factorial). */
Packit Service c5cf8c
#define MAX_FACTORS 10
Packit Service c5cf8c
/* 2^20 is a millon */
Packit Service c5cf8c
#define MAX_DIMS    20
Packit Service c5cf8c
Packit Service c5cf8c
typedef struct Factors {
Packit Service c5cf8c
    int val, cnt;
Packit Service c5cf8c
} Factors;
Packit Service c5cf8c
Packit Service c5cf8c
/* These routines may be global if we are not using weak symbols */
Packit Service c5cf8c
PMPI_LOCAL int MPIR_Dims_create_init(void);
Packit Service c5cf8c
PMPI_LOCAL int MPIR_Dims_create_impl(int nnodes, int ndims, int dims[]);
Packit Service c5cf8c
Packit Service c5cf8c
/* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build
Packit Service c5cf8c
   the MPI routines.  You can use USE_WEAK_SYMBOLS to see if MPICH is
Packit Service c5cf8c
   using weak symbols to implement the MPI routines. */
Packit Service c5cf8c
Packit Service c5cf8c
#ifndef MPICH_MPI_FROM_PMPI
Packit Service c5cf8c
#undef MPI_Dims_create
Packit Service c5cf8c
#define MPI_Dims_create PMPI_Dims_create
Packit Service c5cf8c
Packit Service c5cf8c
PMPI_LOCAL MPIR_T_pvar_timer_t PVAR_TIMER_dims_getdivs;
Packit Service c5cf8c
PMPI_LOCAL MPIR_T_pvar_timer_t PVAR_TIMER_dims_sort;
Packit Service c5cf8c
PMPI_LOCAL MPIR_T_pvar_timer_t PVAR_TIMER_dims_fact;
Packit Service c5cf8c
PMPI_LOCAL MPIR_T_pvar_timer_t PVAR_TIMER_dims_basefact;
Packit Service c5cf8c
PMPI_LOCAL MPIR_T_pvar_timer_t PVAR_TIMER_dims_div;
Packit Service c5cf8c
PMPI_LOCAL MPIR_T_pvar_timer_t PVAR_TIMER_dims_bal;
Packit Service c5cf8c
Packit Service c5cf8c
PMPI_LOCAL unsigned long long PVAR_COUNTER_dims_npruned;
Packit Service c5cf8c
PMPI_LOCAL unsigned long long PVAR_COUNTER_dims_ndivmade;
Packit Service c5cf8c
PMPI_LOCAL unsigned long long PVAR_COUNTER_dims_optbalcalls;
Packit Service c5cf8c
Packit Service c5cf8c
/* MPI_Dims_create and PMPI_Dims_create must see the same variable for this
Packit Service c5cf8c
   one-time initialization flag.  If this file must be compiled twice,
Packit Service c5cf8c
   this variable is defined here and as external for the other build. */
Packit Service c5cf8c
volatile int MPIR_DIMS_initPCVars = 1;
Packit Service c5cf8c
Packit Service c5cf8c
/* This routine is called once to define any PVARS and CVARS */
Packit Service c5cf8c
PMPI_LOCAL int MPIR_Dims_create_init(void)
Packit Service c5cf8c
{
Packit Service c5cf8c
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(DIMS,
Packit Service c5cf8c
                                      MPI_DOUBLE,
Packit Service c5cf8c
                                      dims_getdivs,
Packit Service c5cf8c
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
Packit Service c5cf8c
                                      MPI_T_BIND_NO_OBJECT,
Packit Service c5cf8c
                                      MPIR_T_PVAR_FLAG_READONLY,
Packit Service c5cf8c
                                      "DIMS", "Time to find divisors (seconds)");
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(DIMS,
Packit Service c5cf8c
                                      MPI_DOUBLE,
Packit Service c5cf8c
                                      dims_sort,
Packit Service c5cf8c
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
Packit Service c5cf8c
                                      MPI_T_BIND_NO_OBJECT,
Packit Service c5cf8c
                                      MPIR_T_PVAR_FLAG_READONLY,
Packit Service c5cf8c
                                      "DIMS", "Time to sort divisors (seconds)");
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(DIMS,
Packit Service c5cf8c
                                      MPI_DOUBLE,
Packit Service c5cf8c
                                      dims_fact,
Packit Service c5cf8c
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
Packit Service c5cf8c
                                      MPI_T_BIND_NO_OBJECT,
Packit Service c5cf8c
                                      MPIR_T_PVAR_FLAG_READONLY,
Packit Service c5cf8c
                                      "DIMS", "Time to find factors (seconds)");
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(DIMS,
Packit Service c5cf8c
                                      MPI_DOUBLE,
Packit Service c5cf8c
                                      dims_basefact,
Packit Service c5cf8c
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
Packit Service c5cf8c
                                      MPI_T_BIND_NO_OBJECT,
Packit Service c5cf8c
                                      MPIR_T_PVAR_FLAG_READONLY, "DIMS", "Time to ? (seconds)");
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(DIMS,
Packit Service c5cf8c
                                      MPI_DOUBLE,
Packit Service c5cf8c
                                      dims_div,
Packit Service c5cf8c
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
Packit Service c5cf8c
                                      MPI_T_BIND_NO_OBJECT,
Packit Service c5cf8c
                                      MPIR_T_PVAR_FLAG_READONLY,
Packit Service c5cf8c
                                      "DIMS", "Time spent in integer division (seconds)");
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_REGISTER_STATIC(DIMS,
Packit Service c5cf8c
                                      MPI_DOUBLE,
Packit Service c5cf8c
                                      dims_bal,
Packit Service c5cf8c
                                      MPI_T_VERBOSITY_MPIDEV_DETAIL,
Packit Service c5cf8c
                                      MPI_T_BIND_NO_OBJECT,
Packit Service c5cf8c
                                      MPIR_T_PVAR_FLAG_READONLY,
Packit Service c5cf8c
                                      "DIMS", "Time finding balanced decomposition (seconds)");
Packit Service c5cf8c
Packit Service c5cf8c
    MPIR_T_PVAR_COUNTER_REGISTER_STATIC(DIMS, MPI_UNSIGNED_LONG_LONG, dims_npruned, MPI_T_VERBOSITY_MPIDEV_DETAIL, MPI_T_BIND_NO_OBJECT, (MPIR_T_PVAR_FLAG_READONLY | MPIR_T_PVAR_FLAG_CONTINUOUS), "DIMS",     /* category name */
Packit Service c5cf8c
                                        "Number of divisors pruned from the search for a balanced decomposition");
Packit Service c5cf8c
    MPIR_T_PVAR_COUNTER_REGISTER_STATIC(DIMS, MPI_UNSIGNED_LONG_LONG, dims_ndivmade, MPI_T_VERBOSITY_MPIDEV_DETAIL, MPI_T_BIND_NO_OBJECT, (MPIR_T_PVAR_FLAG_READONLY | MPIR_T_PVAR_FLAG_CONTINUOUS), "DIMS",    /* category name */
Packit Service c5cf8c
                                        "Number of integer divisions performed");
Packit Service c5cf8c
    MPIR_T_PVAR_COUNTER_REGISTER_STATIC(DIMS, MPI_UNSIGNED_LONG_LONG, dims_optbalcalls, MPI_T_VERBOSITY_MPIDEV_DETAIL, MPI_T_BIND_NO_OBJECT, (MPIR_T_PVAR_FLAG_READONLY | MPIR_T_PVAR_FLAG_CONTINUOUS), "DIMS", /* category name */
Packit Service c5cf8c
                                        "Number of call to optbalance");
Packit Service c5cf8c
Packit Service c5cf8c
    return 0;
Packit Service c5cf8c
}
Packit Service c5cf8c
Packit Service c5cf8c
/* This includes all primes up to 46337 (sufficient for 2 billion ranks).
Packit Service c5cf8c
   If storage is a concern (MPICH was originally organized so that only
Packit Service c5cf8c
   code and data needed by applications would be loaded in order to keep
Packit Service c5cf8c
   both the size of the executable and any shared library loads small),
Packit Service c5cf8c
   this include could be organized include only primes up to 1000 (for
Packit Service c5cf8c
   a maximum of a million ranks) or 3400 (for a maximum of 10 million ranks),
Packit Service c5cf8c
   which will significantly reduce the number of elements included here.
Packit Service c5cf8c
*/
Packit Service c5cf8c
#include "primes.h"
Packit Service c5cf8c
Packit Service c5cf8c
/* Local only routines.  These should *not* have standard prefix */
Packit Service c5cf8c
static int factor_num(int, Factors[], int *);
Packit Service c5cf8c
static int ndivisors_from_factor(int nf, const Factors * factors);
Packit Service c5cf8c
static int factor_to_divisors(int nf, Factors * factors, int ndivs, int divs[]);
Packit Service c5cf8c
static void factor_to_dims_by_rr(int nf, Factors f[], int nd, int dims[]);
Packit Service c5cf8c
static int optbalance(int n, int idx, int nd, int ndivs,
Packit Service c5cf8c
                      const int divs[], int trydims[], int *curbal_p, int optdims[]);
Packit Service c5cf8c
Packit Service c5cf8c
/*
Packit Service c5cf8c
 * Factor "n", returning the prime factors and their counts in factors, in
Packit Service c5cf8c
 * increasing order.  Return the sum of the number of primes, including their
Packit Service c5cf8c
 * powers (e.g., 2^3 * 7^5 * 19 gives 9 divisors, the maximum that can be created
Packit Service c5cf8c
 * from this factorization)
Packit Service c5cf8c
 */
Packit Service c5cf8c
static int factor_num(int nn, Factors factors[], int *nprimes)
Packit Service c5cf8c
{
Packit Service c5cf8c
    int n = nn, val;
Packit Service c5cf8c
    int i, nfactors = 0, nall = 0;
Packit Service c5cf8c
    int cnt;
Packit Service c5cf8c
Packit Service c5cf8c
    /* Find the prime number that less than that value and try dividing
Packit Service c5cf8c
     * out the primes.  */
Packit Service c5cf8c
Packit Service c5cf8c
    /* First, get factors of 2 without division */
Packit Service c5cf8c
    if ((n & 0x1) == 0) {
Packit Service c5cf8c
        cnt = 1;
Packit Service c5cf8c
        n >>= 1;
Packit Service c5cf8c
        while ((n & 0x1) == 0) {
Packit Service c5cf8c
            cnt++;
Packit Service c5cf8c
            n >>= 1;
Packit Service c5cf8c
        }
Packit Service c5cf8c
        factors[0].cnt = cnt;
Packit Service c5cf8c
        factors[0].val = 2;
Packit Service c5cf8c
        nfactors = 1;
Packit Service c5cf8c
        nall = cnt;
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    /* An earlier version computed an approximation to the sqrt(n) to serve
Packit Service c5cf8c
     * as a stopping criteria.  However, tests showed that checking that the
Packit Service c5cf8c
     * square of the current prime is less than the remaining value (after
Packit Service c5cf8c
     * dividing out the primes found so far) is faster.
Packit Service c5cf8c
     */
Packit Service c5cf8c
    i = 1;
Packit Service c5cf8c
    do {
Packit Service c5cf8c
        int n2;
Packit Service c5cf8c
        val = primes[i];
Packit Service c5cf8c
        /* Note that we keep removing factors from n, so this test becomes
Packit Service c5cf8c
         * easier and easier to satisfy as we remove factors from n
Packit Service c5cf8c
         */
Packit Service c5cf8c
        if (val * val > n)
Packit Service c5cf8c
            break;
Packit Service c5cf8c
        n2 = n / val;
Packit Service c5cf8c
        if (n2 * val == n) {
Packit Service c5cf8c
            cnt = 1;
Packit Service c5cf8c
            n = n2;
Packit Service c5cf8c
            n2 = n / val;
Packit Service c5cf8c
            while (n2 * val == n) {
Packit Service c5cf8c
                cnt++;
Packit Service c5cf8c
                n = n2;
Packit Service c5cf8c
                n2 = n / val;
Packit Service c5cf8c
            }
Packit Service c5cf8c
            /* Test on nfactors >= MAX_FACTORS?  */
Packit Service c5cf8c
            /* --BEGIN ERROR HANDLING-- */
Packit Service c5cf8c
            if (nfactors + 1 == MAX_FACTORS) {
Packit Service c5cf8c
                /* Time to panic.  This should not happen, since the
Packit Service c5cf8c
                 * smallest number that could exceed this would
Packit Service c5cf8c
                 * be the product of the first 10 primes that are
Packit Service c5cf8c
                 * greater than one, which is 6,469,693,230 */
Packit Service c5cf8c
                return nfactors;
Packit Service c5cf8c
            }
Packit Service c5cf8c
            /* --END ERROR HANDLING-- */
Packit Service c5cf8c
            factors[nfactors].val = val;
Packit Service c5cf8c
            factors[nfactors++].cnt = cnt;
Packit Service c5cf8c
            nall += cnt;
Packit Service c5cf8c
            if (n == 1)
Packit Service c5cf8c
                break;
Packit Service c5cf8c
        }
Packit Service c5cf8c
        i++;
Packit Service c5cf8c
    } while (1);
Packit Service c5cf8c
    if (n != 1) {
Packit Service c5cf8c
        /* There was one factor > sqrt(n).  This factor must be prime.
Packit Service c5cf8c
         * Note if nfactors was 0, n was prime */
Packit Service c5cf8c
        factors[nfactors].val = n;
Packit Service c5cf8c
        factors[nfactors++].cnt = 1;
Packit Service c5cf8c
        nall++;
Packit Service c5cf8c
    }
Packit Service c5cf8c
    *nprimes = nall;
Packit Service c5cf8c
    return nfactors;
Packit Service c5cf8c
}
Packit Service c5cf8c
Packit Service c5cf8c
static int ndivisors_from_factor(int nf, const Factors * factors)
Packit Service c5cf8c
{
Packit Service c5cf8c
    int i, ndiv;
Packit Service c5cf8c
    ndiv = 1;
Packit Service c5cf8c
    for (i = 0; i < nf; i++) {
Packit Service c5cf8c
        ndiv *= (factors[i].cnt + 1);
Packit Service c5cf8c
    }
Packit Service c5cf8c
    ndiv -= 2;
Packit Service c5cf8c
Packit Service c5cf8c
    return ndiv;
Packit Service c5cf8c
}
Packit Service c5cf8c
Packit Service c5cf8c
#undef FCNAME
Packit Service c5cf8c
#define FCNAME "factor_to_divisors"
Packit Service c5cf8c
static int factor_to_divisors(int nf, Factors * factors, int ndiv, int divs[])
Packit Service c5cf8c
{
Packit Service c5cf8c
    int i, powers[MAX_FACTORS], curbase[MAX_FACTORS], nd, idx, val, mpi_errno;
Packit Service c5cf8c
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_START(DIMS, dims_getdivs);
Packit Service c5cf8c
    for (i = 0; i < nf; i++) {
Packit Service c5cf8c
        powers[i] = 0;
Packit Service c5cf8c
        curbase[i] = 1;
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    for (nd = 0; nd < ndiv; nd++) {
Packit Service c5cf8c
        /* Get the next power */
Packit Service c5cf8c
        for (idx = 0; idx < nf; idx++) {
Packit Service c5cf8c
            powers[idx]++;
Packit Service c5cf8c
            if (powers[idx] > factors[idx].cnt) {
Packit Service c5cf8c
                powers[idx] = 0;
Packit Service c5cf8c
                curbase[idx] = 1;
Packit Service c5cf8c
            } else {
Packit Service c5cf8c
                curbase[idx] *= factors[idx].val;
Packit Service c5cf8c
                break;
Packit Service c5cf8c
            }
Packit Service c5cf8c
        }
Packit Service c5cf8c
        /* Compute the divisor.  Note that we could keep the scan of values
Packit Service c5cf8c
         * from k to nf-1, then curscan[0] would always be the next value */
Packit Service c5cf8c
        val = 1;
Packit Service c5cf8c
        for (idx = 0; idx < nf; idx++)
Packit Service c5cf8c
            val *= curbase[idx];
Packit Service c5cf8c
        divs[nd] = val;
Packit Service c5cf8c
    }
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_END(DIMS, dims_getdivs);
Packit Service c5cf8c
Packit Service c5cf8c
    /* Values are not sorted - for example, 2(2),3 will give 2,4,3 as the
Packit Service c5cf8c
     * divisors */
Packit Service c5cf8c
    /* This code is used because it is significantly faster than using
Packit Service c5cf8c
     * the qsort routine.  In tests of several million dims_create
Packit Service c5cf8c
     * calls, this code took 1.02 seconds (with -O3) and 1.79 seconds
Packit Service c5cf8c
     * (without optimization) while qsort took 2.5 seconds.
Packit Service c5cf8c
     */
Packit Service c5cf8c
    if (nf > 1) {
Packit Service c5cf8c
        int gap, j, j1, j2, k, j1max, j2max;
Packit Service c5cf8c
        MPIR_CHKLMEM_DECL(1);
Packit Service c5cf8c
        int *divs2;
Packit Service c5cf8c
        int *restrict d1, *restrict d2;
Packit Service c5cf8c
Packit Service c5cf8c
        MPIR_CHKLMEM_MALLOC(divs2, int *, nd * sizeof(int), mpi_errno, "divs2", MPL_MEM_COMM);
Packit Service c5cf8c
Packit Service c5cf8c
        MPIR_T_PVAR_TIMER_START(DIMS, dims_sort);
Packit Service c5cf8c
        /* handling the first set of pairs separately saved about 20%;
Packit Service c5cf8c
         * done inplace */
Packit Service c5cf8c
        for (j = 0; j < nd - 1; j += 2) {
Packit Service c5cf8c
            if (divs[j] > divs[j + 1]) {
Packit Service c5cf8c
                int tmp = divs[j];
Packit Service c5cf8c
                divs[j] = divs[j + 1];
Packit Service c5cf8c
                divs[j + 1] = tmp;
Packit Service c5cf8c
            }
Packit Service c5cf8c
        }
Packit Service c5cf8c
        /* Using pointers d1 and d2 about 2x copying divs2 back into divs
Packit Service c5cf8c
         * at the end of each phase */
Packit Service c5cf8c
        d1 = divs;
Packit Service c5cf8c
        d2 = divs2;
Packit Service c5cf8c
        for (gap = 2; gap < nd; gap *= 2) {
Packit Service c5cf8c
            k = 0;
Packit Service c5cf8c
            for (j = 0; j + gap < nd; j += gap * 2) {
Packit Service c5cf8c
                j1 = j;
Packit Service c5cf8c
                j2 = j + gap;
Packit Service c5cf8c
                j1max = j2;
Packit Service c5cf8c
                j2max = j2 + gap;
Packit Service c5cf8c
                if (j2max > nd)
Packit Service c5cf8c
                    j2max = nd;
Packit Service c5cf8c
                while (j1 < j1max && j2 < j2max) {
Packit Service c5cf8c
                    if (d1[j1] < d1[j2]) {
Packit Service c5cf8c
                        d2[k++] = d1[j1++];
Packit Service c5cf8c
                    } else {
Packit Service c5cf8c
                        d2[k++] = d1[j2++];
Packit Service c5cf8c
                    }
Packit Service c5cf8c
                }
Packit Service c5cf8c
                /* Copy remainder */
Packit Service c5cf8c
                while (j1 < j1max)
Packit Service c5cf8c
                    d2[k++] = d1[j1++];
Packit Service c5cf8c
                while (j2 < j2max)
Packit Service c5cf8c
                    d2[k++] = d1[j2++];
Packit Service c5cf8c
            }
Packit Service c5cf8c
            /* May be some left over */
Packit Service c5cf8c
            while (j < nd)
Packit Service c5cf8c
                d2[k++] = d1[j++];
Packit Service c5cf8c
            /* swap pointers and start again */
Packit Service c5cf8c
            {
Packit Service c5cf8c
                int *dt = d1;
Packit Service c5cf8c
                d1 = d2;
Packit Service c5cf8c
                d2 = dt;
Packit Service c5cf8c
            }
Packit Service c5cf8c
        }
Packit Service c5cf8c
        MPIR_T_PVAR_TIMER_END(DIMS, dims_sort);
Packit Service c5cf8c
        /* Result must end up in divs */
Packit Service c5cf8c
        if (d1 != divs) {
Packit Service c5cf8c
            for (j1 = 0; j1 < nd; j1++)
Packit Service c5cf8c
                divs[j1] = d1[j1];
Packit Service c5cf8c
        }
Packit Service c5cf8c
        MPIR_CHKLMEM_FREEALL();
Packit Service c5cf8c
    }
Packit Service c5cf8c
    return nd;
Packit Service c5cf8c
  fn_fail:
Packit Service c5cf8c
    return mpi_errno;
Packit Service c5cf8c
}
Packit Service c5cf8c
Packit Service c5cf8c
Packit Service c5cf8c
/*
Packit Service c5cf8c
 * This is a modified round robin assignment.  The goal is to
Packit Service c5cf8c
 * get a good first guess at a good distribution.
Packit Service c5cf8c
 *
Packit Service c5cf8c
 * First, distribute factors to dims[0..nd-1].  The purpose is to get the
Packit Service c5cf8c
 * initial factors set and to ensure that the smallest dimension is > 1.
Packit Service c5cf8c
 * Second, distibute the remaining factors, starting with the largest, to
Packit Service c5cf8c
 * the elements of dims with the smallest index i such that
Packit Service c5cf8c
 *   dims[i-1] > dims[i]*val
Packit Service c5cf8c
 * or to dims[0] if no i satisfies.
Packit Service c5cf8c
 * For example, this will successfully distribute the factors of 100 in 3-d
Packit Service c5cf8c
 * as 5,5,4.  A pure round robin would give 10,5,2.
Packit Service c5cf8c
 */
Packit Service c5cf8c
static void factor_to_dims_by_rr(int nf, Factors f[], int nd, int dims[])
Packit Service c5cf8c
{
Packit Service c5cf8c
    int i, j, k, cnt, val;
Packit Service c5cf8c
Packit Service c5cf8c
    /* Initialize dims */
Packit Service c5cf8c
    for (i = 0; i < nd; i++)
Packit Service c5cf8c
        dims[i] = 1;
Packit Service c5cf8c
Packit Service c5cf8c
    k = 0;
Packit Service c5cf8c
    /* Start with the largest factors, move back to the smallest factor */
Packit Service c5cf8c
    for (i = nf - 1; i >= 0; i--) {
Packit Service c5cf8c
        val = f[i].val;
Packit Service c5cf8c
        cnt = f[i].cnt;
Packit Service c5cf8c
        for (j = 0; j < cnt; j++) {
Packit Service c5cf8c
            if (k < nd) {
Packit Service c5cf8c
                dims[k++] = val;
Packit Service c5cf8c
            } else {
Packit Service c5cf8c
                int kk;
Packit Service c5cf8c
                /* find first location where apply val is valid.
Packit Service c5cf8c
                 * Can always apply at dims[0] */
Packit Service c5cf8c
                for (kk = nd - 1; kk > 0; kk--) {
Packit Service c5cf8c
                    if (dims[kk] * val <= dims[kk - 1])
Packit Service c5cf8c
                        break;
Packit Service c5cf8c
                }
Packit Service c5cf8c
                dims[kk] *= val;
Packit Service c5cf8c
            }
Packit Service c5cf8c
        }
Packit Service c5cf8c
    }
Packit Service c5cf8c
}
Packit Service c5cf8c
Packit Service c5cf8c
Packit Service c5cf8c
/* need to set a minidx where it stops because the remaining
Packit Service c5cf8c
   values are known.  Then pass in the entire array.  This is needed
Packit Service c5cf8c
   to get the correct values for "ties" between the first and last values.
Packit Service c5cf8c
 */
Packit Service c5cf8c
#undef FC_NAME
Packit Service c5cf8c
#define FC_NAME "optbalance"
Packit Service c5cf8c
Packit Service c5cf8c
static int optbalance(int n, int idx, int nd, int ndivs, const int divs[],
Packit Service c5cf8c
                      int trydims[], int *curbal_p, int optdims[])
Packit Service c5cf8c
{
Packit Service c5cf8c
    int min = trydims[nd - 1], curbal = *curbal_p, testbal;
Packit Service c5cf8c
    int k, f, q, ff, i, ii, kk, nndivs, sf, mpi_errno = MPI_SUCCESS;
Packit Service c5cf8c
Packit Service c5cf8c
    MPIR_T_PVAR_COUNTER_INC(DIMS, dims_optbalcalls, 1);
Packit Service c5cf8c
    if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
        MPL_msg_printf("Noptb: idx=%d, nd=%d, ndivs=%d, balance=%d\n", idx, nd, ndivs, curbal);
Packit Service c5cf8c
        MPL_msg_printf("Noptb:optdims: ");
Packit Service c5cf8c
        for (i = 0; i < nd; i++)
Packit Service c5cf8c
            MPL_msg_printf("%d%c", optdims[i], (i + 1 < nd) ? 'x' : '\n');
Packit Service c5cf8c
        MPL_msg_printf("Noptb:trydims: ");
Packit Service c5cf8c
        for (i = idx + 1; i < nd; i++)
Packit Service c5cf8c
            MPL_msg_printf("%d%c", trydims[i], (i + 1 < nd) ? 'x' : '\n');
Packit Service c5cf8c
    }
Packit Service c5cf8c
    if (idx > 1) {
Packit Service c5cf8c
        MPIR_CHKLMEM_DECL(1);
Packit Service c5cf8c
        int *newdivs;
Packit Service c5cf8c
        MPIR_CHKLMEM_MALLOC(newdivs, int *, ndivs * sizeof(int), mpi_errno, "divs", MPL_MEM_COMM);
Packit Service c5cf8c
        if (mpi_errno)
Packit Service c5cf8c
            return mpi_errno;
Packit Service c5cf8c
Packit Service c5cf8c
        /* At least 3 divisors to set (0...idx).  We try all choices
Packit Service c5cf8c
         * recursively, but stop looking when we can easily tell that
Packit Service c5cf8c
         * no additional cases can improve the current solution. */
Packit Service c5cf8c
        for (k = 0; k < ndivs; k++) {
Packit Service c5cf8c
            f = divs[k];
Packit Service c5cf8c
            if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
                MPL_msg_printf("Noptb: try f=%d at dims[%d]\n", f, idx);
Packit Service c5cf8c
            }
Packit Service c5cf8c
            if (idx < nd - 1 && f - min > curbal) {
Packit Service c5cf8c
                if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
                    MPL_msg_printf("f-min = %d, curbal = %d, skipping other divisors\n",
Packit Service c5cf8c
                                   f - min, curbal);
Packit Service c5cf8c
                }
Packit Service c5cf8c
                MPIR_T_PVAR_COUNTER_INC(DIMS, dims_npruned, 1);
Packit Service c5cf8c
                break;  /* best case is all divisors at least
Packit Service c5cf8c
                         * f, so the best balance is at least
Packit Service c5cf8c
                         * f - min */
Packit Service c5cf8c
            }
Packit Service c5cf8c
            q = n / f;
Packit Service c5cf8c
            /* check whether f is a divisor of what's left.  If so,
Packit Service c5cf8c
             * remember it as the smallest new divisor */
Packit Service c5cf8c
            /* sf is the smallest remaining factor; we use this in an
Packit Service c5cf8c
             * optimization for possible divisors */
Packit Service c5cf8c
            nndivs = 0;
Packit Service c5cf8c
            if (q % f == 0) {
Packit Service c5cf8c
                newdivs[nndivs++] = f;
Packit Service c5cf8c
                sf = f;
Packit Service c5cf8c
            } else {
Packit Service c5cf8c
                sf = divs[k + 1];
Packit Service c5cf8c
            }
Packit Service c5cf8c
            if (idx < nd - 1 && sf - min > curbal) {
Packit Service c5cf8c
                MPIR_T_PVAR_COUNTER_INC(DIMS, dims_npruned, 1);
Packit Service c5cf8c
                break;
Packit Service c5cf8c
            }
Packit Service c5cf8c
            if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
                MPL_msg_printf("Noptb: sf = %d\n", sf);
Packit Service c5cf8c
            }
Packit Service c5cf8c
            ff = sf * sf;       /* At least 2 more divisors */
Packit Service c5cf8c
            for (ii = idx - 1; ii > 0 && ff <= q; ii--)
Packit Service c5cf8c
                ff *= sf;
Packit Service c5cf8c
            if (ii > 0) {
Packit Service c5cf8c
                if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
                    MPL_msg_printf("break for ii = %d, ff = %d and q = %d\n", ii, ff, q);
Packit Service c5cf8c
                }
Packit Service c5cf8c
                MPIR_T_PVAR_COUNTER_INC(DIMS, dims_npruned, 1);
Packit Service c5cf8c
                break;  /* remaining divisors >= sf, and
Packit Service c5cf8c
                         * product must be <= q */
Packit Service c5cf8c
            }
Packit Service c5cf8c
Packit Service c5cf8c
            /* Try f as the divisor at the idx location */
Packit Service c5cf8c
            trydims[idx] = f;
Packit Service c5cf8c
            MPIR_T_PVAR_TIMER_START(DIMS, dims_div);
Packit Service c5cf8c
            /* find the subset of divisors of n that are divisors of q
Packit Service c5cf8c
             * and larger than f but such that f*f <= q */
Packit Service c5cf8c
            for (kk = k + 1; kk < ndivs; kk++) {
Packit Service c5cf8c
                f = divs[kk];
Packit Service c5cf8c
                ff = f * f;
Packit Service c5cf8c
                if (ff > q) {
Packit Service c5cf8c
                    MPIR_T_PVAR_COUNTER_INC(DIMS, dims_npruned, 1);
Packit Service c5cf8c
                    break;
Packit Service c5cf8c
                }
Packit Service c5cf8c
                MPIR_T_PVAR_COUNTER_INC(DIMS, dims_ndivmade, 1);
Packit Service c5cf8c
                if ((q % f) == 0) {
Packit Service c5cf8c
                    newdivs[nndivs++] = f;
Packit Service c5cf8c
                }
Packit Service c5cf8c
                /* sf * f <= q, break otherwise */
Packit Service c5cf8c
            }
Packit Service c5cf8c
            MPIR_T_PVAR_TIMER_END(DIMS, dims_div);
Packit Service c5cf8c
            /* recursively try to find the best subset */
Packit Service c5cf8c
            if (nndivs > 0)
Packit Service c5cf8c
                optbalance(q, idx - 1, nd, nndivs, newdivs, trydims, curbal_p, optdims);
Packit Service c5cf8c
        }
Packit Service c5cf8c
        MPIR_CHKLMEM_FREEALL();
Packit Service c5cf8c
    } else if (idx == 1) {
Packit Service c5cf8c
        /* Only two.  Find the pair such that q,f has min value for q-min, i.e.,
Packit Service c5cf8c
         * the smallest q such that q > f.  Note that f*f < n if q > f and
Packit Service c5cf8c
         * q*f = n */
Packit Service c5cf8c
        /* Valid choices for f, q >= divs[0] */
Packit Service c5cf8c
        int qprev = -1;
Packit Service c5cf8c
        for (k = 1; k < ndivs; k++) {
Packit Service c5cf8c
            f = divs[k];
Packit Service c5cf8c
            q = n / f;
Packit Service c5cf8c
            if (q < f)
Packit Service c5cf8c
                break;
Packit Service c5cf8c
            qprev = q;
Packit Service c5cf8c
        }
Packit Service c5cf8c
        f = divs[k - 1];
Packit Service c5cf8c
        if (qprev > 0)
Packit Service c5cf8c
            q = qprev;
Packit Service c5cf8c
        else
Packit Service c5cf8c
            q = n / f;
Packit Service c5cf8c
Packit Service c5cf8c
        if (q < f) {
Packit Service c5cf8c
            if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
                MPL_msg_printf("Skipping because %d < %d\n", q, f);
Packit Service c5cf8c
            }
Packit Service c5cf8c
            /* No valid solution.  Exit without changing current optdims */
Packit Service c5cf8c
            MPIR_T_PVAR_COUNTER_INC(DIMS, dims_npruned, 1);
Packit Service c5cf8c
            return 0;
Packit Service c5cf8c
        }
Packit Service c5cf8c
        if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
            MPL_msg_printf("Found best factors %d,%d, from divs[%d]\n", q, f, k - 1);
Packit Service c5cf8c
        }
Packit Service c5cf8c
        /* If nd is 2 (and not greater), we will be replacing all values
Packit Service c5cf8c
         * in dims.  In that case, testbal is q-f;
Packit Service c5cf8c
         */
Packit Service c5cf8c
        if (nd == 2)
Packit Service c5cf8c
            testbal = q - f;
Packit Service c5cf8c
        else
Packit Service c5cf8c
            testbal = q - min;
Packit Service c5cf8c
Packit Service c5cf8c
        /* Take the new value if it is at least as good as the
Packit Service c5cf8c
         * current choice.  This is what Jesper Traeff's version does
Packit Service c5cf8c
         * (see the code he provided for his EuroMPI'15 paper) */
Packit Service c5cf8c
        if (testbal <= curbal) {
Packit Service c5cf8c
            for (i = 2; i < nd; i++)
Packit Service c5cf8c
                optdims[i] = trydims[i];
Packit Service c5cf8c
            optdims[0] = q;
Packit Service c5cf8c
            optdims[1] = f;
Packit Service c5cf8c
            /* Use the balance for the range of dims, not the total
Packit Service c5cf8c
             * balance */
Packit Service c5cf8c
            *curbal_p = q - min;
Packit Service c5cf8c
        }
Packit Service c5cf8c
    } else {
Packit Service c5cf8c
        /* idx == 0.  Only one choice for divisor */
Packit Service c5cf8c
        if (n - min <= curbal) {
Packit Service c5cf8c
            for (i = 1; i < nd; i++)
Packit Service c5cf8c
                optdims[i] = trydims[i];
Packit Service c5cf8c
            optdims[0] = n;
Packit Service c5cf8c
            *curbal_p = n - min;
Packit Service c5cf8c
        }
Packit Service c5cf8c
    }
Packit Service c5cf8c
    return 0;
Packit Service c5cf8c
  fn_fail:
Packit Service c5cf8c
    return mpi_errno;
Packit Service c5cf8c
}
Packit Service c5cf8c
Packit Service c5cf8c
Packit Service c5cf8c
/* FIXME: The error checking should really be part of MPI_Dims_create,
Packit Service c5cf8c
   not part of MPIR_Dims_create_impl.  That slightly changes the
Packit Service c5cf8c
   semantics of Dims_create provided by the device, but only by
Packit Service c5cf8c
   removing the need to check for errors */
Packit Service c5cf8c
Packit Service c5cf8c
Packit Service c5cf8c
PMPI_LOCAL int MPIR_Dims_create_impl(int nnodes, int ndims, int dims[])
Packit Service c5cf8c
{
Packit Service c5cf8c
    Factors f[MAX_FACTORS];
Packit Service c5cf8c
    int nf, nprimes = 0, i, j, k, val, nextidx;
Packit Service c5cf8c
    int ndivs, curbal;
Packit Service c5cf8c
    int trydims[MAX_DIMS];
Packit Service c5cf8c
    int dims_needed, dims_product, mpi_errno;
Packit Service c5cf8c
    int chosen[MAX_DIMS], foundDecomp;
Packit Service c5cf8c
    int *divs;
Packit Service c5cf8c
    MPIR_CHKLMEM_DECL(1);
Packit Service c5cf8c
Packit Service c5cf8c
    /* Find the number of unspecified dimensions in dims and the product
Packit Service c5cf8c
     * of the positive values in dims */
Packit Service c5cf8c
    dims_needed = 0;
Packit Service c5cf8c
    dims_product = 1;
Packit Service c5cf8c
    for (i = 0; i < ndims; i++) {
Packit Service c5cf8c
        if (dims[i] < 0) {
Packit Service c5cf8c
            mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
Packit Service c5cf8c
                                             MPIR_ERR_RECOVERABLE,
Packit Service c5cf8c
                                             "MPIR_Dims_create", __LINE__,
Packit Service c5cf8c
                                             MPI_ERR_DIMS,
Packit Service c5cf8c
                                             "**argarrayneg",
Packit Service c5cf8c
                                             "**argarrayneg %s %d %d", "dims", i, dims[i]);
Packit Service c5cf8c
            return mpi_errno;
Packit Service c5cf8c
        }
Packit Service c5cf8c
        if (dims[i] == 0)
Packit Service c5cf8c
            dims_needed++;
Packit Service c5cf8c
        else
Packit Service c5cf8c
            dims_product *= dims[i];
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    /* Can we factor nnodes by dims_product? */
Packit Service c5cf8c
    if ((nnodes / dims_product) * dims_product != nnodes) {
Packit Service c5cf8c
        mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,
Packit Service c5cf8c
                                         "MPIR_Dims_create", __LINE__,
Packit Service c5cf8c
                                         MPI_ERR_DIMS, "**dimspartition", 0);
Packit Service c5cf8c
        return mpi_errno;
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    if (!dims_needed) {
Packit Service c5cf8c
        /* Special case - all dimensions provided */
Packit Service c5cf8c
        return MPI_SUCCESS;
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    if (dims_needed > MAX_DIMS) {
Packit Service c5cf8c
/* --BEGIN ERROR HANDLING-- */
Packit Service c5cf8c
        mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,
Packit Service c5cf8c
                                         MPIR_ERR_RECOVERABLE,
Packit Service c5cf8c
                                         "MPIR_Dims_create", __LINE__, MPI_ERR_DIMS,
Packit Service c5cf8c
                                         "**dimsmany", "**dimsmany %d %d", dims_needed, MAX_DIMS);
Packit Service c5cf8c
        return mpi_errno;
Packit Service c5cf8c
/* --END ERROR HANDLING-- */
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    nnodes /= dims_product;
Packit Service c5cf8c
Packit Service c5cf8c
    /* Factor the remaining nodes into dims_needed components.  The
Packit Service c5cf8c
     * MPI standard says that these should be as balanced as possible;
Packit Service c5cf8c
     * in particular, unfortunately, not necessarily arranged in a way
Packit Service c5cf8c
     * that makes sense for the underlying machine topology.  The approach
Packit Service c5cf8c
     * used here was inspired by "Specification Guideline Violations
Packit Service c5cf8c
     * by MPI_Dims_create," by Jesper Traeff and Felix Luebbe, EuroMPI'15.
Packit Service c5cf8c
     * Among the changes are the use of a table of primes to speed up the
Packit Service c5cf8c
     * process of finding factors, and some effort to reduce the number
Packit Service c5cf8c
     * of integer division operations.
Packit Service c5cf8c
     *
Packit Service c5cf8c
     * Values are put into chosen[0..dims_needed].  These values are then
Packit Service c5cf8c
     * copied back into dims into the "empty" slots (values == 0).
Packit Service c5cf8c
     */
Packit Service c5cf8c
Packit Service c5cf8c
    if (dims_needed == 1) {
Packit Service c5cf8c
        /* Simply put the correct value in place */
Packit Service c5cf8c
        for (j = 0; j < ndims; j++) {
Packit Service c5cf8c
            if (dims[j] == 0) {
Packit Service c5cf8c
                dims[j] = nnodes;
Packit Service c5cf8c
                break;
Packit Service c5cf8c
            }
Packit Service c5cf8c
        }
Packit Service c5cf8c
        return 0;
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    /* Factor nnodes */
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_START(DIMS, dims_fact);
Packit Service c5cf8c
    nf = factor_num(nnodes, f, &nprimes);
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_END(DIMS, dims_fact);
Packit Service c5cf8c
Packit Service c5cf8c
    if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
        MPL_msg_printf("found %d factors\n", nf);
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    /* Remove primes > sqrt(n) */
Packit Service c5cf8c
    nextidx = 0;
Packit Service c5cf8c
    for (j = nf - 1; j > 0 && nextidx < dims_needed - 1; j--) {
Packit Service c5cf8c
        val = f[j].val;
Packit Service c5cf8c
        if (f[j].cnt == 1 && val * val > nnodes) {
Packit Service c5cf8c
            if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
                MPL_msg_printf("prime %d required in idx %d\n", val, nextidx);
Packit Service c5cf8c
            }
Packit Service c5cf8c
            chosen[nextidx++] = val;
Packit Service c5cf8c
            nf--;
Packit Service c5cf8c
            nnodes /= val;
Packit Service c5cf8c
            nprimes--;
Packit Service c5cf8c
        } else
Packit Service c5cf8c
            break;
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    /* Now, handle some special cases.  If we find *any* of these,
Packit Service c5cf8c
     * we're done and can use the values in chosen to return values in dims */
Packit Service c5cf8c
    foundDecomp = 0;
Packit Service c5cf8c
    if (nprimes + nextidx <= dims_needed) {
Packit Service c5cf8c
        /* Fill with the primes */
Packit Service c5cf8c
        int m, cnt;
Packit Service c5cf8c
        if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
            MPL_msg_printf("Nprimes = %d, number dims left = %d\n", nprimes, dims_needed - nextidx);
Packit Service c5cf8c
        }
Packit Service c5cf8c
        i = nextidx + nprimes;
Packit Service c5cf8c
        for (k = 0; k < nf; k++) {
Packit Service c5cf8c
            cnt = f[k].cnt;
Packit Service c5cf8c
            val = f[k].val;
Packit Service c5cf8c
            for (m = 0; m < cnt; m++)
Packit Service c5cf8c
                chosen[--i] = val;
Packit Service c5cf8c
        }
Packit Service c5cf8c
        i = nextidx + nprimes;
Packit Service c5cf8c
        while (i < ndims)
Packit Service c5cf8c
            chosen[i++] = 1;
Packit Service c5cf8c
        foundDecomp = 1;
Packit Service c5cf8c
    } else if (dims_needed - nextidx == 1) {
Packit Service c5cf8c
        chosen[nextidx] = nnodes;
Packit Service c5cf8c
        foundDecomp = 1;
Packit Service c5cf8c
    } else if (nf == 1) {
Packit Service c5cf8c
        /* What's left is a product of a single prime, so there is only one
Packit Service c5cf8c
         * possible factorization */
Packit Service c5cf8c
        int cnt = f[0].cnt;
Packit Service c5cf8c
        val = f[0].val;
Packit Service c5cf8c
        if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
            MPL_msg_printf("only 1 prime = %d left\n", val);
Packit Service c5cf8c
        }
Packit Service c5cf8c
        for (k = nextidx; k < dims_needed; k++)
Packit Service c5cf8c
            chosen[k] = 1;
Packit Service c5cf8c
        k = nextidx;
Packit Service c5cf8c
        while (cnt-- > 0) {
Packit Service c5cf8c
            if (k >= dims_needed)
Packit Service c5cf8c
                k = nextidx;
Packit Service c5cf8c
            chosen[k++] *= val;
Packit Service c5cf8c
        }
Packit Service c5cf8c
        foundDecomp = 1;
Packit Service c5cf8c
    }
Packit Service c5cf8c
    /* There's another case we consider, particularly since we've
Packit Service c5cf8c
     * factored out the "large" primes.  If (cnt % (ndims-nextidx)) == 0
Packit Service c5cf8c
     * for every remaining factor, then
Packit Service c5cf8c
     * f = product of (val^ (cnt/(ndims-nextidx)))
Packit Service c5cf8c
     */
Packit Service c5cf8c
    if (!foundDecomp) {
Packit Service c5cf8c
        int ndleft = dims_needed - nextidx;
Packit Service c5cf8c
        for (j = 0; j < nf; j++)
Packit Service c5cf8c
            if (f[j].cnt % ndleft != 0)
Packit Service c5cf8c
                break;
Packit Service c5cf8c
        if (j == nf) {
Packit Service c5cf8c
            int fval;
Packit Service c5cf8c
            val = 1;
Packit Service c5cf8c
            for (j = 0; j < nf; j++) {
Packit Service c5cf8c
                int pow = f[j].cnt / ndleft;
Packit Service c5cf8c
                fval = f[j].val;
Packit Service c5cf8c
                while (pow--)
Packit Service c5cf8c
                    val *= fval;
Packit Service c5cf8c
            }
Packit Service c5cf8c
            for (j = nextidx; j < dims_needed; j++)
Packit Service c5cf8c
                chosen[j] = val;
Packit Service c5cf8c
            if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
                MPL_msg_printf("Used power of factors for dims\n");
Packit Service c5cf8c
            }
Packit Service c5cf8c
            foundDecomp = 1;
Packit Service c5cf8c
        }
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    if (foundDecomp) {
Packit Service c5cf8c
        j = 0;
Packit Service c5cf8c
        for (i = 0; i < ndims; i++) {
Packit Service c5cf8c
            if (dims[i] == 0) {
Packit Service c5cf8c
                dims[i] = chosen[j++];
Packit Service c5cf8c
            }
Packit Service c5cf8c
        }
Packit Service c5cf8c
        return 0;
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    /* ndims >= 2 */
Packit Service c5cf8c
Packit Service c5cf8c
    /* No trivial solution.  Balance the remaining values (note that we may
Packit Service c5cf8c
     * have already trimmed off some large factors */
Packit Service c5cf8c
    /* First, find all of the divisors given by the remaining factors */
Packit Service c5cf8c
    ndivs = ndivisors_from_factor(nf, (const Factors *) f);
Packit Service c5cf8c
    MPIR_CHKLMEM_MALLOC(divs, int *, ((unsigned int) ndivs) * sizeof(int), mpi_errno, "divs",
Packit Service c5cf8c
                        MPL_MEM_COMM);
Packit Service c5cf8c
    ndivs = factor_to_divisors(nf, f, ndivs, divs);
Packit Service c5cf8c
    if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
        for (i = 0; i < ndivs; i++) {
Packit Service c5cf8c
            if (divs[i] <= 0) {
Packit Service c5cf8c
                MPL_msg_printf("divs[%d]=%d!\n", i, divs[i]);
Packit Service c5cf8c
            }
Packit Service c5cf8c
        }
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    /* Create a simple first guess.  We do this by using a round robin
Packit Service c5cf8c
     * distribution of the primes in the remaining values (from nextidx
Packit Service c5cf8c
     * to ndims) */
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_START(DIMS, dims_basefact);
Packit Service c5cf8c
    factor_to_dims_by_rr(nf, f, dims_needed - nextidx, chosen + nextidx);
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_END(DIMS, dims_basefact);
Packit Service c5cf8c
Packit Service c5cf8c
    /* This isn't quite right, as the balance may depend on the other (larger)
Packit Service c5cf8c
     * divisors already chosen */
Packit Service c5cf8c
    /* Need to add nextidx */
Packit Service c5cf8c
    curbal = chosen[0] - chosen[dims_needed - 1];
Packit Service c5cf8c
    trydims[dims_needed - nextidx - 1] = divs[0];
Packit Service c5cf8c
    if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
        MPL_msg_printf("N: initial decomp is: ");
Packit Service c5cf8c
        for (i = 0; i < dims_needed; i++)
Packit Service c5cf8c
            MPL_msg_printf("%d%c", chosen[i], (i + 1 < dims_needed) ? 'x' : '\n');
Packit Service c5cf8c
    }
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_START(DIMS, dims_bal);
Packit Service c5cf8c
    optbalance(nnodes, dims_needed - nextidx - 1, dims_needed - nextidx,
Packit Service c5cf8c
               ndivs, divs, trydims, &curbal, chosen + nextidx);
Packit Service c5cf8c
    MPIR_T_PVAR_TIMER_END(DIMS, dims_bal);
Packit Service c5cf8c
    MPIR_CHKLMEM_FREEALL();
Packit Service c5cf8c
Packit Service c5cf8c
    if (MPIR_CVAR_DIMS_VERBOSE) {
Packit Service c5cf8c
        MPL_msg_printf("N: final decomp is: ");
Packit Service c5cf8c
        for (i = 0; i < dims_needed; i++)
Packit Service c5cf8c
            MPL_msg_printf("%d%c", chosen[i], (i + 1 < dims_needed) ? 'x' : '\n');
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    j = 0;
Packit Service c5cf8c
    for (i = 0; i < ndims; i++) {
Packit Service c5cf8c
        if (dims[i] == 0) {
Packit Service c5cf8c
            dims[i] = chosen[j++];
Packit Service c5cf8c
        }
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    return MPI_SUCCESS;
Packit Service c5cf8c
  fn_fail:
Packit Service c5cf8c
    return mpi_errno;
Packit Service c5cf8c
}
Packit Service c5cf8c
Packit Service c5cf8c
#else
Packit Service c5cf8c
/* MPI_Dims_create and PMPI_Dims_create must see the same variable for this
Packit Service c5cf8c
   one-time initialization flag */
Packit Service c5cf8c
extern volatile int MPIR_DIMS_initPCVars;
Packit Service c5cf8c
Packit Service c5cf8c
#endif /* PMPI Local */
Packit Service c5cf8c
Packit Service c5cf8c
#undef FUNCNAME
Packit Service c5cf8c
#define FUNCNAME MPI_Dims_create
Packit Service c5cf8c
#undef FCNAME
Packit Service c5cf8c
#define FCNAME "MPI_Dims_create"
Packit Service c5cf8c
Packit Service c5cf8c
/*@
Packit Service c5cf8c
    MPI_Dims_create - Creates a division of processors in a cartesian grid
Packit Service c5cf8c
Packit Service c5cf8c
Input Parameters:
Packit Service c5cf8c
+ nnodes - number of nodes in a grid (integer)
Packit Service c5cf8c
- ndims - number of cartesian dimensions (integer)
Packit Service c5cf8c
Packit Service c5cf8c
Input/Output Parameters:
Packit Service c5cf8c
. dims - integer array of size  'ndims' specifying the number of nodes in each
Packit Service c5cf8c
 dimension.  A value of 0 indicates that 'MPI_Dims_create' should fill in a
Packit Service c5cf8c
 suitable value.
Packit Service c5cf8c
Packit Service c5cf8c
.N ThreadSafe
Packit Service c5cf8c
Packit Service c5cf8c
.N Fortran
Packit Service c5cf8c
Packit Service c5cf8c
.N Errors
Packit Service c5cf8c
.N MPI_SUCCESS
Packit Service c5cf8c
@*/
Packit Service c5cf8c
int MPI_Dims_create(int nnodes, int ndims, int dims[])
Packit Service c5cf8c
{
Packit Service c5cf8c
    int mpi_errno = MPI_SUCCESS;
Packit Service c5cf8c
    MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_DIMS_CREATE);
Packit Service c5cf8c
Packit Service c5cf8c
    MPIR_ERRTEST_INITIALIZED_ORDIE();
Packit Service c5cf8c
    MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_DIMS_CREATE);
Packit Service c5cf8c
Packit Service c5cf8c
    if (ndims == 0)
Packit Service c5cf8c
        goto fn_exit;
Packit Service c5cf8c
Packit Service c5cf8c
    /* Validate parameters and objects (post conversion) */
Packit Service c5cf8c
#ifdef HAVE_ERROR_CHECKING
Packit Service c5cf8c
    {
Packit Service c5cf8c
        MPID_BEGIN_ERROR_CHECKS;
Packit Service c5cf8c
        {
Packit Service c5cf8c
            MPIR_ERRTEST_ARGNEG(nnodes, "nnodes", mpi_errno);
Packit Service c5cf8c
            MPIR_ERRTEST_ARGNEG(ndims, "ndims", mpi_errno);
Packit Service c5cf8c
            MPIR_ERRTEST_ARGNULL(dims, "dims", mpi_errno);
Packit Service c5cf8c
        }
Packit Service c5cf8c
        MPID_END_ERROR_CHECKS;
Packit Service c5cf8c
    }
Packit Service c5cf8c
#endif /* HAVE_ERROR_CHECKING */
Packit Service c5cf8c
Packit Service c5cf8c
    /* Initialize pvars and cvars if this is the first call */
Packit Service c5cf8c
    if (MPIR_DIMS_initPCVars) {
Packit Service c5cf8c
        MPIR_Dims_create_init();
Packit Service c5cf8c
        MPIR_DIMS_initPCVars = 0;
Packit Service c5cf8c
    }
Packit Service c5cf8c
Packit Service c5cf8c
    /* ... body of routine ...  */
Packit Service c5cf8c
    if (MPIR_Process.dimsCreate != NULL) {
Packit Service c5cf8c
        mpi_errno = MPIR_Process.dimsCreate(nnodes, ndims, dims);
Packit Service c5cf8c
    } else {
Packit Service c5cf8c
        mpi_errno = MPIR_Dims_create_impl(nnodes, ndims, dims);
Packit Service c5cf8c
    }
Packit Service c5cf8c
    if (mpi_errno)
Packit Service c5cf8c
        MPIR_ERR_POP(mpi_errno);
Packit Service c5cf8c
    /* ... end of body of routine ... */
Packit Service c5cf8c
Packit Service c5cf8c
  fn_exit:
Packit Service c5cf8c
    MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_DIMS_CREATE);
Packit Service c5cf8c
    return mpi_errno;
Packit Service c5cf8c
Packit Service c5cf8c
    /* --BEGIN ERROR HANDLING-- */
Packit Service c5cf8c
  fn_fail:
Packit Service c5cf8c
#ifdef HAVE_ERROR_CHECKING
Packit Service c5cf8c
    {
Packit Service c5cf8c
        mpi_errno =
Packit Service c5cf8c
            MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,
Packit Service c5cf8c
                                 "**mpi_dims_create", "**mpi_dims_create %d %d %p", nnodes, ndims,
Packit Service c5cf8c
                                 dims);
Packit Service c5cf8c
    }
Packit Service c5cf8c
#endif
Packit Service c5cf8c
    mpi_errno = MPIR_Err_return_comm(NULL, FCNAME, mpi_errno);
Packit Service c5cf8c
    goto fn_exit;
Packit Service c5cf8c
    /* --END ERROR HANDLING-- */
Packit Service c5cf8c
}