Tree - source-git/mpich - CentOS Git server

source-git / mpich

Blame src/mpi/topo/dims_create.c

Blob History Raw

Packit Service	c5cf8c	`/* -- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -- */`
Packit Service	c5cf8c	`/*`
Packit Service	c5cf8c	`* (C) 2001 by Argonne National Laboratory.`
Packit Service	c5cf8c	`* See COPYRIGHT in top-level directory.`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c
Packit Service	c5cf8c	`#include "mpiimpl.h"`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/*`
Packit Service	c5cf8c	`* This CVAR is used for debugging support. An alternative would be`
Packit Service	c5cf8c	`* to use the MPIU_DBG interface, which predates the CVAR interface,`
Packit Service	c5cf8c	`* and also provides different levels of debugging support. In the`
Packit Service	c5cf8c	`* long run, the MPIU_DBG interface should be updated to make use of`
Packit Service	c5cf8c	`* CVARs.`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c	`/*`
Packit Service	c5cf8c	`=== BEGIN_MPI_T_CVAR_INFO_BLOCK ===`
Packit Service	c5cf8c
Packit Service	c5cf8c	`categories:`
Packit Service	c5cf8c	`- name : DIMS`
Packit Service	c5cf8c	`description : Dims_create cvars`
Packit Service	c5cf8c
Packit Service	c5cf8c	`cvars:`
Packit Service	c5cf8c	`- name : MPIR_CVAR_DIMS_VERBOSE`
Packit Service	c5cf8c	`category : DIMS`
Packit Service	c5cf8c	`type : boolean`
Packit Service	c5cf8c	`default : false`
Packit Service	c5cf8c	`class : none`
Packit Service	c5cf8c	`verbosity : MPI_T_VERBOSITY_MPIDEV_DETAIL`
Packit Service	c5cf8c	`scope : MPI_T_SCOPE_ALL_EQ`
Packit Service	c5cf8c	`description : >-`
Packit Service	c5cf8c	`If true, enable verbose output about the actions of the`
Packit Service	c5cf8c	`implementation of MPI_Dims_create.`
Packit Service	c5cf8c
Packit Service	c5cf8c	`=== END_MPI_T_CVAR_INFO_BLOCK ===`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* -- Begin Profiling Symbol Block for routine MPI_Dims_create */`
Packit Service	c5cf8c	`#if defined(HAVE_PRAGMA_WEAK)`
Packit Service	c5cf8c	`#pragma weak MPI_Dims_create = PMPI_Dims_create`
Packit Service	c5cf8c	`#elif defined(HAVE_PRAGMA_HP_SEC_DEF)`
Packit Service	c5cf8c	`#pragma _HP_SECONDARY_DEF PMPI_Dims_create MPI_Dims_create`
Packit Service	c5cf8c	`#elif defined(HAVE_PRAGMA_CRI_DUP)`
Packit Service	c5cf8c	`#pragma _CRI duplicate MPI_Dims_create as PMPI_Dims_create`
Packit Service	c5cf8c	`#elif defined(HAVE_WEAK_ATTRIBUTE)`
Packit Service	c5cf8c	`int MPI_Dims_create(int nnodes, int ndims, int dims[])`
Packit Service	c5cf8c	`__attribute__ ((weak, alias("PMPI_Dims_create")));`
Packit Service	c5cf8c	`#endif`
Packit Service	c5cf8c	`/* -- End Profiling Symbol Block */`
Packit Service	c5cf8c
Packit Service	c5cf8c
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Because we store factors with their multiplicities, a small array can`
Packit Service	c5cf8c	`store all of the factors for a large number (grows faster than n`
Packit Service	c5cf8c	`factorial). */`
Packit Service	c5cf8c	`#define MAX_FACTORS 10`
Packit Service	c5cf8c	`/* 2^20 is a millon */`
Packit Service	c5cf8c	`#define MAX_DIMS 20`
Packit Service	c5cf8c
Packit Service	c5cf8c	`typedef struct Factors {`
Packit Service	c5cf8c	`int val, cnt;`
Packit Service	c5cf8c	`} Factors;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* These routines may be global if we are not using weak symbols */`
Packit Service	c5cf8c	`PMPI_LOCAL int MPIR_Dims_create_init(void);`
Packit Service	c5cf8c	`PMPI_LOCAL int MPIR_Dims_create_impl(int nnodes, int ndims, int dims[]);`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Define MPICH_MPI_FROM_PMPI if weak symbols are not supported to build`
Packit Service	c5cf8c	`the MPI routines. You can use USE_WEAK_SYMBOLS to see if MPICH is`
Packit Service	c5cf8c	`using weak symbols to implement the MPI routines. */`
Packit Service	c5cf8c
Packit Service	c5cf8c	`#ifndef MPICH_MPI_FROM_PMPI`
Packit Service	c5cf8c	`#undef MPI_Dims_create`
Packit Service	c5cf8c	`#define MPI_Dims_create PMPI_Dims_create`
Packit Service	c5cf8c
Packit Service	c5cf8c	`PMPI_LOCAL MPIR_T_pvar_timer_t PVAR_TIMER_dims_getdivs;`
Packit Service	c5cf8c	`PMPI_LOCAL MPIR_T_pvar_timer_t PVAR_TIMER_dims_sort;`
Packit Service	c5cf8c	`PMPI_LOCAL MPIR_T_pvar_timer_t PVAR_TIMER_dims_fact;`
Packit Service	c5cf8c	`PMPI_LOCAL MPIR_T_pvar_timer_t PVAR_TIMER_dims_basefact;`
Packit Service	c5cf8c	`PMPI_LOCAL MPIR_T_pvar_timer_t PVAR_TIMER_dims_div;`
Packit Service	c5cf8c	`PMPI_LOCAL MPIR_T_pvar_timer_t PVAR_TIMER_dims_bal;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`PMPI_LOCAL unsigned long long PVAR_COUNTER_dims_npruned;`
Packit Service	c5cf8c	`PMPI_LOCAL unsigned long long PVAR_COUNTER_dims_ndivmade;`
Packit Service	c5cf8c	`PMPI_LOCAL unsigned long long PVAR_COUNTER_dims_optbalcalls;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* MPI_Dims_create and PMPI_Dims_create must see the same variable for this`
Packit Service	c5cf8c	`one-time initialization flag. If this file must be compiled twice,`
Packit Service	c5cf8c	`this variable is defined here and as external for the other build. */`
Packit Service	c5cf8c	`volatile int MPIR_DIMS_initPCVars = 1;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* This routine is called once to define any PVARS and CVARS */`
Packit Service	c5cf8c	`PMPI_LOCAL int MPIR_Dims_create_init(void)`
Packit Service	c5cf8c	`{`
Packit Service	c5cf8c
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_REGISTER_STATIC(DIMS,`
Packit Service	c5cf8c	`MPI_DOUBLE,`
Packit Service	c5cf8c	`dims_getdivs,`
Packit Service	c5cf8c	`MPI_T_VERBOSITY_MPIDEV_DETAIL,`
Packit Service	c5cf8c	`MPI_T_BIND_NO_OBJECT,`
Packit Service	c5cf8c	`MPIR_T_PVAR_FLAG_READONLY,`
Packit Service	c5cf8c	`"DIMS", "Time to find divisors (seconds)");`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_REGISTER_STATIC(DIMS,`
Packit Service	c5cf8c	`MPI_DOUBLE,`
Packit Service	c5cf8c	`dims_sort,`
Packit Service	c5cf8c	`MPI_T_VERBOSITY_MPIDEV_DETAIL,`
Packit Service	c5cf8c	`MPI_T_BIND_NO_OBJECT,`
Packit Service	c5cf8c	`MPIR_T_PVAR_FLAG_READONLY,`
Packit Service	c5cf8c	`"DIMS", "Time to sort divisors (seconds)");`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_REGISTER_STATIC(DIMS,`
Packit Service	c5cf8c	`MPI_DOUBLE,`
Packit Service	c5cf8c	`dims_fact,`
Packit Service	c5cf8c	`MPI_T_VERBOSITY_MPIDEV_DETAIL,`
Packit Service	c5cf8c	`MPI_T_BIND_NO_OBJECT,`
Packit Service	c5cf8c	`MPIR_T_PVAR_FLAG_READONLY,`
Packit Service	c5cf8c	`"DIMS", "Time to find factors (seconds)");`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_REGISTER_STATIC(DIMS,`
Packit Service	c5cf8c	`MPI_DOUBLE,`
Packit Service	c5cf8c	`dims_basefact,`
Packit Service	c5cf8c	`MPI_T_VERBOSITY_MPIDEV_DETAIL,`
Packit Service	c5cf8c	`MPI_T_BIND_NO_OBJECT,`
Packit Service	c5cf8c	`MPIR_T_PVAR_FLAG_READONLY, "DIMS", "Time to ? (seconds)");`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_REGISTER_STATIC(DIMS,`
Packit Service	c5cf8c	`MPI_DOUBLE,`
Packit Service	c5cf8c	`dims_div,`
Packit Service	c5cf8c	`MPI_T_VERBOSITY_MPIDEV_DETAIL,`
Packit Service	c5cf8c	`MPI_T_BIND_NO_OBJECT,`
Packit Service	c5cf8c	`MPIR_T_PVAR_FLAG_READONLY,`
Packit Service	c5cf8c	`"DIMS", "Time spent in integer division (seconds)");`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_REGISTER_STATIC(DIMS,`
Packit Service	c5cf8c	`MPI_DOUBLE,`
Packit Service	c5cf8c	`dims_bal,`
Packit Service	c5cf8c	`MPI_T_VERBOSITY_MPIDEV_DETAIL,`
Packit Service	c5cf8c	`MPI_T_BIND_NO_OBJECT,`
Packit Service	c5cf8c	`MPIR_T_PVAR_FLAG_READONLY,`
Packit Service	c5cf8c	`"DIMS", "Time finding balanced decomposition (seconds)");`
Packit Service	c5cf8c
Packit Service	c5cf8c	`MPIR_T_PVAR_COUNTER_REGISTER_STATIC(DIMS, MPI_UNSIGNED_LONG_LONG, dims_npruned, MPI_T_VERBOSITY_MPIDEV_DETAIL, MPI_T_BIND_NO_OBJECT, (MPIR_T_PVAR_FLAG_READONLY \| MPIR_T_PVAR_FLAG_CONTINUOUS), "DIMS", /* category name */`
Packit Service	c5cf8c	`"Number of divisors pruned from the search for a balanced decomposition");`
Packit Service	c5cf8c	`MPIR_T_PVAR_COUNTER_REGISTER_STATIC(DIMS, MPI_UNSIGNED_LONG_LONG, dims_ndivmade, MPI_T_VERBOSITY_MPIDEV_DETAIL, MPI_T_BIND_NO_OBJECT, (MPIR_T_PVAR_FLAG_READONLY \| MPIR_T_PVAR_FLAG_CONTINUOUS), "DIMS", /* category name */`
Packit Service	c5cf8c	`"Number of integer divisions performed");`
Packit Service	c5cf8c	`MPIR_T_PVAR_COUNTER_REGISTER_STATIC(DIMS, MPI_UNSIGNED_LONG_LONG, dims_optbalcalls, MPI_T_VERBOSITY_MPIDEV_DETAIL, MPI_T_BIND_NO_OBJECT, (MPIR_T_PVAR_FLAG_READONLY \| MPIR_T_PVAR_FLAG_CONTINUOUS), "DIMS", /* category name */`
Packit Service	c5cf8c	`"Number of call to optbalance");`
Packit Service	c5cf8c
Packit Service	c5cf8c	`return 0;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* This includes all primes up to 46337 (sufficient for 2 billion ranks).`
Packit Service	c5cf8c	`If storage is a concern (MPICH was originally organized so that only`
Packit Service	c5cf8c	`code and data needed by applications would be loaded in order to keep`
Packit Service	c5cf8c	`both the size of the executable and any shared library loads small),`
Packit Service	c5cf8c	`this include could be organized include only primes up to 1000 (for`
Packit Service	c5cf8c	`a maximum of a million ranks) or 3400 (for a maximum of 10 million ranks),`
Packit Service	c5cf8c	`which will significantly reduce the number of elements included here.`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c	`#include "primes.h"`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Local only routines. These should not have standard prefix */`
Packit Service	c5cf8c	`static int factor_num(int, Factors[], int *);`
Packit Service	c5cf8c	`static int ndivisors_from_factor(int nf, const Factors * factors);`
Packit Service	c5cf8c	`static int factor_to_divisors(int nf, Factors * factors, int ndivs, int divs[]);`
Packit Service	c5cf8c	`static void factor_to_dims_by_rr(int nf, Factors f[], int nd, int dims[]);`
Packit Service	c5cf8c	`static int optbalance(int n, int idx, int nd, int ndivs,`
Packit Service	c5cf8c	`const int divs[], int trydims[], int *curbal_p, int optdims[]);`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/*`
Packit Service	c5cf8c	`* Factor "n", returning the prime factors and their counts in factors, in`
Packit Service	c5cf8c	`* increasing order. Return the sum of the number of primes, including their`
Packit Service	c5cf8c	`* powers (e.g., 2^3 * 7^5 * 19 gives 9 divisors, the maximum that can be created`
Packit Service	c5cf8c	`* from this factorization)`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c	`static int factor_num(int nn, Factors factors[], int *nprimes)`
Packit Service	c5cf8c	`{`
Packit Service	c5cf8c	`int n = nn, val;`
Packit Service	c5cf8c	`int i, nfactors = 0, nall = 0;`
Packit Service	c5cf8c	`int cnt;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Find the prime number that less than that value and try dividing`
Packit Service	c5cf8c	`* out the primes. */`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* First, get factors of 2 without division */`
Packit Service	c5cf8c	`if ((n & 0x1) == 0) {`
Packit Service	c5cf8c	`cnt = 1;`
Packit Service	c5cf8c	`n >>= 1;`
Packit Service	c5cf8c	`while ((n & 0x1) == 0) {`
Packit Service	c5cf8c	`cnt++;`
Packit Service	c5cf8c	`n >>= 1;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`factors[0].cnt = cnt;`
Packit Service	c5cf8c	`factors[0].val = 2;`
Packit Service	c5cf8c	`nfactors = 1;`
Packit Service	c5cf8c	`nall = cnt;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* An earlier version computed an approximation to the sqrt(n) to serve`
Packit Service	c5cf8c	`* as a stopping criteria. However, tests showed that checking that the`
Packit Service	c5cf8c	`* square of the current prime is less than the remaining value (after`
Packit Service	c5cf8c	`* dividing out the primes found so far) is faster.`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c	`i = 1;`
Packit Service	c5cf8c	`do {`
Packit Service	c5cf8c	`int n2;`
Packit Service	c5cf8c	`val = primes[i];`
Packit Service	c5cf8c	`/* Note that we keep removing factors from n, so this test becomes`
Packit Service	c5cf8c	`* easier and easier to satisfy as we remove factors from n`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c	`if (val * val > n)`
Packit Service	c5cf8c	`break;`
Packit Service	c5cf8c	`n2 = n / val;`
Packit Service	c5cf8c	`if (n2 * val == n) {`
Packit Service	c5cf8c	`cnt = 1;`
Packit Service	c5cf8c	`n = n2;`
Packit Service	c5cf8c	`n2 = n / val;`
Packit Service	c5cf8c	`while (n2 * val == n) {`
Packit Service	c5cf8c	`cnt++;`
Packit Service	c5cf8c	`n = n2;`
Packit Service	c5cf8c	`n2 = n / val;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`/* Test on nfactors >= MAX_FACTORS? */`
Packit Service	c5cf8c	`/* --BEGIN ERROR HANDLING-- */`
Packit Service	c5cf8c	`if (nfactors + 1 == MAX_FACTORS) {`
Packit Service	c5cf8c	`/* Time to panic. This should not happen, since the`
Packit Service	c5cf8c	`* smallest number that could exceed this would`
Packit Service	c5cf8c	`* be the product of the first 10 primes that are`
Packit Service	c5cf8c	`* greater than one, which is 6,469,693,230 */`
Packit Service	c5cf8c	`return nfactors;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`/* --END ERROR HANDLING-- */`
Packit Service	c5cf8c	`factors[nfactors].val = val;`
Packit Service	c5cf8c	`factors[nfactors++].cnt = cnt;`
Packit Service	c5cf8c	`nall += cnt;`
Packit Service	c5cf8c	`if (n == 1)`
Packit Service	c5cf8c	`break;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`i++;`
Packit Service	c5cf8c	`} while (1);`
Packit Service	c5cf8c	`if (n != 1) {`
Packit Service	c5cf8c	`/* There was one factor > sqrt(n). This factor must be prime.`
Packit Service	c5cf8c	`* Note if nfactors was 0, n was prime */`
Packit Service	c5cf8c	`factors[nfactors].val = n;`
Packit Service	c5cf8c	`factors[nfactors++].cnt = 1;`
Packit Service	c5cf8c	`nall++;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`*nprimes = nall;`
Packit Service	c5cf8c	`return nfactors;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`static int ndivisors_from_factor(int nf, const Factors * factors)`
Packit Service	c5cf8c	`{`
Packit Service	c5cf8c	`int i, ndiv;`
Packit Service	c5cf8c	`ndiv = 1;`
Packit Service	c5cf8c	`for (i = 0; i < nf; i++) {`
Packit Service	c5cf8c	`ndiv *= (factors[i].cnt + 1);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`ndiv -= 2;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`return ndiv;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`#undef FCNAME`
Packit Service	c5cf8c	`#define FCNAME "factor_to_divisors"`
Packit Service	c5cf8c	`static int factor_to_divisors(int nf, Factors * factors, int ndiv, int divs[])`
Packit Service	c5cf8c	`{`
Packit Service	c5cf8c	`int i, powers[MAX_FACTORS], curbase[MAX_FACTORS], nd, idx, val, mpi_errno;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_START(DIMS, dims_getdivs);`
Packit Service	c5cf8c	`for (i = 0; i < nf; i++) {`
Packit Service	c5cf8c	`powers[i] = 0;`
Packit Service	c5cf8c	`curbase[i] = 1;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`for (nd = 0; nd < ndiv; nd++) {`
Packit Service	c5cf8c	`/* Get the next power */`
Packit Service	c5cf8c	`for (idx = 0; idx < nf; idx++) {`
Packit Service	c5cf8c	`powers[idx]++;`
Packit Service	c5cf8c	`if (powers[idx] > factors[idx].cnt) {`
Packit Service	c5cf8c	`powers[idx] = 0;`
Packit Service	c5cf8c	`curbase[idx] = 1;`
Packit Service	c5cf8c	`} else {`
Packit Service	c5cf8c	`curbase[idx] *= factors[idx].val;`
Packit Service	c5cf8c	`break;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`/* Compute the divisor. Note that we could keep the scan of values`
Packit Service	c5cf8c	`* from k to nf-1, then curscan[0] would always be the next value */`
Packit Service	c5cf8c	`val = 1;`
Packit Service	c5cf8c	`for (idx = 0; idx < nf; idx++)`
Packit Service	c5cf8c	`val *= curbase[idx];`
Packit Service	c5cf8c	`divs[nd] = val;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_END(DIMS, dims_getdivs);`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Values are not sorted - for example, 2(2),3 will give 2,4,3 as the`
Packit Service	c5cf8c	`* divisors */`
Packit Service	c5cf8c	`/* This code is used because it is significantly faster than using`
Packit Service	c5cf8c	`* the qsort routine. In tests of several million dims_create`
Packit Service	c5cf8c	`* calls, this code took 1.02 seconds (with -O3) and 1.79 seconds`
Packit Service	c5cf8c	`* (without optimization) while qsort took 2.5 seconds.`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c	`if (nf > 1) {`
Packit Service	c5cf8c	`int gap, j, j1, j2, k, j1max, j2max;`
Packit Service	c5cf8c	`MPIR_CHKLMEM_DECL(1);`
Packit Service	c5cf8c	`int *divs2;`
Packit Service	c5cf8c	`int restrict d1, restrict d2;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`MPIR_CHKLMEM_MALLOC(divs2, int , nd sizeof(int), mpi_errno, "divs2", MPL_MEM_COMM);`
Packit Service	c5cf8c
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_START(DIMS, dims_sort);`
Packit Service	c5cf8c	`/* handling the first set of pairs separately saved about 20%;`
Packit Service	c5cf8c	`* done inplace */`
Packit Service	c5cf8c	`for (j = 0; j < nd - 1; j += 2) {`
Packit Service	c5cf8c	`if (divs[j] > divs[j + 1]) {`
Packit Service	c5cf8c	`int tmp = divs[j];`
Packit Service	c5cf8c	`divs[j] = divs[j + 1];`
Packit Service	c5cf8c	`divs[j + 1] = tmp;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`/* Using pointers d1 and d2 about 2x copying divs2 back into divs`
Packit Service	c5cf8c	`* at the end of each phase */`
Packit Service	c5cf8c	`d1 = divs;`
Packit Service	c5cf8c	`d2 = divs2;`
Packit Service	c5cf8c	`for (gap = 2; gap < nd; gap *= 2) {`
Packit Service	c5cf8c	`k = 0;`
Packit Service	c5cf8c	`for (j = 0; j + gap < nd; j += gap * 2) {`
Packit Service	c5cf8c	`j1 = j;`
Packit Service	c5cf8c	`j2 = j + gap;`
Packit Service	c5cf8c	`j1max = j2;`
Packit Service	c5cf8c	`j2max = j2 + gap;`
Packit Service	c5cf8c	`if (j2max > nd)`
Packit Service	c5cf8c	`j2max = nd;`
Packit Service	c5cf8c	`while (j1 < j1max && j2 < j2max) {`
Packit Service	c5cf8c	`if (d1[j1] < d1[j2]) {`
Packit Service	c5cf8c	`d2[k++] = d1[j1++];`
Packit Service	c5cf8c	`} else {`
Packit Service	c5cf8c	`d2[k++] = d1[j2++];`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`/* Copy remainder */`
Packit Service	c5cf8c	`while (j1 < j1max)`
Packit Service	c5cf8c	`d2[k++] = d1[j1++];`
Packit Service	c5cf8c	`while (j2 < j2max)`
Packit Service	c5cf8c	`d2[k++] = d1[j2++];`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`/* May be some left over */`
Packit Service	c5cf8c	`while (j < nd)`
Packit Service	c5cf8c	`d2[k++] = d1[j++];`
Packit Service	c5cf8c	`/* swap pointers and start again */`
Packit Service	c5cf8c	`{`
Packit Service	c5cf8c	`int *dt = d1;`
Packit Service	c5cf8c	`d1 = d2;`
Packit Service	c5cf8c	`d2 = dt;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_END(DIMS, dims_sort);`
Packit Service	c5cf8c	`/* Result must end up in divs */`
Packit Service	c5cf8c	`if (d1 != divs) {`
Packit Service	c5cf8c	`for (j1 = 0; j1 < nd; j1++)`
Packit Service	c5cf8c	`divs[j1] = d1[j1];`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`MPIR_CHKLMEM_FREEALL();`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`return nd;`
Packit Service	c5cf8c	`fn_fail:`
Packit Service	c5cf8c	`return mpi_errno;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c
Packit Service	c5cf8c	`/*`
Packit Service	c5cf8c	`* This is a modified round robin assignment. The goal is to`
Packit Service	c5cf8c	`* get a good first guess at a good distribution.`
Packit Service	c5cf8c	`*`
Packit Service	c5cf8c	`* First, distribute factors to dims[0..nd-1]. The purpose is to get the`
Packit Service	c5cf8c	`* initial factors set and to ensure that the smallest dimension is > 1.`
Packit Service	c5cf8c	`* Second, distibute the remaining factors, starting with the largest, to`
Packit Service	c5cf8c	`* the elements of dims with the smallest index i such that`
Packit Service	c5cf8c	`* dims[i-1] > dims[i]*val`
Packit Service	c5cf8c	`* or to dims[0] if no i satisfies.`
Packit Service	c5cf8c	`* For example, this will successfully distribute the factors of 100 in 3-d`
Packit Service	c5cf8c	`* as 5,5,4. A pure round robin would give 10,5,2.`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c	`static void factor_to_dims_by_rr(int nf, Factors f[], int nd, int dims[])`
Packit Service	c5cf8c	`{`
Packit Service	c5cf8c	`int i, j, k, cnt, val;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Initialize dims */`
Packit Service	c5cf8c	`for (i = 0; i < nd; i++)`
Packit Service	c5cf8c	`dims[i] = 1;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`k = 0;`
Packit Service	c5cf8c	`/* Start with the largest factors, move back to the smallest factor */`
Packit Service	c5cf8c	`for (i = nf - 1; i >= 0; i--) {`
Packit Service	c5cf8c	`val = f[i].val;`
Packit Service	c5cf8c	`cnt = f[i].cnt;`
Packit Service	c5cf8c	`for (j = 0; j < cnt; j++) {`
Packit Service	c5cf8c	`if (k < nd) {`
Packit Service	c5cf8c	`dims[k++] = val;`
Packit Service	c5cf8c	`} else {`
Packit Service	c5cf8c	`int kk;`
Packit Service	c5cf8c	`/* find first location where apply val is valid.`
Packit Service	c5cf8c	`* Can always apply at dims[0] */`
Packit Service	c5cf8c	`for (kk = nd - 1; kk > 0; kk--) {`
Packit Service	c5cf8c	`if (dims[kk] * val <= dims[kk - 1])`
Packit Service	c5cf8c	`break;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`dims[kk] *= val;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* need to set a minidx where it stops because the remaining`
Packit Service	c5cf8c	`values are known. Then pass in the entire array. This is needed`
Packit Service	c5cf8c	`to get the correct values for "ties" between the first and last values.`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c	`#undef FC_NAME`
Packit Service	c5cf8c	`#define FC_NAME "optbalance"`
Packit Service	c5cf8c
Packit Service	c5cf8c	`static int optbalance(int n, int idx, int nd, int ndivs, const int divs[],`
Packit Service	c5cf8c	`int trydims[], int *curbal_p, int optdims[])`
Packit Service	c5cf8c	`{`
Packit Service	c5cf8c	`int min = trydims[nd - 1], curbal = *curbal_p, testbal;`
Packit Service	c5cf8c	`int k, f, q, ff, i, ii, kk, nndivs, sf, mpi_errno = MPI_SUCCESS;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`MPIR_T_PVAR_COUNTER_INC(DIMS, dims_optbalcalls, 1);`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("Noptb: idx=%d, nd=%d, ndivs=%d, balance=%d\n", idx, nd, ndivs, curbal);`
Packit Service	c5cf8c	`MPL_msg_printf("Noptb:optdims: ");`
Packit Service	c5cf8c	`for (i = 0; i < nd; i++)`
Packit Service	c5cf8c	`MPL_msg_printf("%d%c", optdims[i], (i + 1 < nd) ? 'x' : '\n');`
Packit Service	c5cf8c	`MPL_msg_printf("Noptb:trydims: ");`
Packit Service	c5cf8c	`for (i = idx + 1; i < nd; i++)`
Packit Service	c5cf8c	`MPL_msg_printf("%d%c", trydims[i], (i + 1 < nd) ? 'x' : '\n');`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`if (idx > 1) {`
Packit Service	c5cf8c	`MPIR_CHKLMEM_DECL(1);`
Packit Service	c5cf8c	`int *newdivs;`
Packit Service	c5cf8c	`MPIR_CHKLMEM_MALLOC(newdivs, int , ndivs sizeof(int), mpi_errno, "divs", MPL_MEM_COMM);`
Packit Service	c5cf8c	`if (mpi_errno)`
Packit Service	c5cf8c	`return mpi_errno;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* At least 3 divisors to set (0...idx). We try all choices`
Packit Service	c5cf8c	`* recursively, but stop looking when we can easily tell that`
Packit Service	c5cf8c	`* no additional cases can improve the current solution. */`
Packit Service	c5cf8c	`for (k = 0; k < ndivs; k++) {`
Packit Service	c5cf8c	`f = divs[k];`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("Noptb: try f=%d at dims[%d]\n", f, idx);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`if (idx < nd - 1 && f - min > curbal) {`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("f-min = %d, curbal = %d, skipping other divisors\n",`
Packit Service	c5cf8c	`f - min, curbal);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`MPIR_T_PVAR_COUNTER_INC(DIMS, dims_npruned, 1);`
Packit Service	c5cf8c	`break; /* best case is all divisors at least`
Packit Service	c5cf8c	`* f, so the best balance is at least`
Packit Service	c5cf8c	`* f - min */`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`q = n / f;`
Packit Service	c5cf8c	`/* check whether f is a divisor of what's left. If so,`
Packit Service	c5cf8c	`* remember it as the smallest new divisor */`
Packit Service	c5cf8c	`/* sf is the smallest remaining factor; we use this in an`
Packit Service	c5cf8c	`* optimization for possible divisors */`
Packit Service	c5cf8c	`nndivs = 0;`
Packit Service	c5cf8c	`if (q % f == 0) {`
Packit Service	c5cf8c	`newdivs[nndivs++] = f;`
Packit Service	c5cf8c	`sf = f;`
Packit Service	c5cf8c	`} else {`
Packit Service	c5cf8c	`sf = divs[k + 1];`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`if (idx < nd - 1 && sf - min > curbal) {`
Packit Service	c5cf8c	`MPIR_T_PVAR_COUNTER_INC(DIMS, dims_npruned, 1);`
Packit Service	c5cf8c	`break;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("Noptb: sf = %d\n", sf);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`ff = sf * sf; /* At least 2 more divisors */`
Packit Service	c5cf8c	`for (ii = idx - 1; ii > 0 && ff <= q; ii--)`
Packit Service	c5cf8c	`ff *= sf;`
Packit Service	c5cf8c	`if (ii > 0) {`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("break for ii = %d, ff = %d and q = %d\n", ii, ff, q);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`MPIR_T_PVAR_COUNTER_INC(DIMS, dims_npruned, 1);`
Packit Service	c5cf8c	`break; /* remaining divisors >= sf, and`
Packit Service	c5cf8c	`* product must be <= q */`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Try f as the divisor at the idx location */`
Packit Service	c5cf8c	`trydims[idx] = f;`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_START(DIMS, dims_div);`
Packit Service	c5cf8c	`/* find the subset of divisors of n that are divisors of q`
Packit Service	c5cf8c	`* and larger than f but such that ff <= q /`
Packit Service	c5cf8c	`for (kk = k + 1; kk < ndivs; kk++) {`
Packit Service	c5cf8c	`f = divs[kk];`
Packit Service	c5cf8c	`ff = f * f;`
Packit Service	c5cf8c	`if (ff > q) {`
Packit Service	c5cf8c	`MPIR_T_PVAR_COUNTER_INC(DIMS, dims_npruned, 1);`
Packit Service	c5cf8c	`break;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`MPIR_T_PVAR_COUNTER_INC(DIMS, dims_ndivmade, 1);`
Packit Service	c5cf8c	`if ((q % f) == 0) {`
Packit Service	c5cf8c	`newdivs[nndivs++] = f;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`/* sf * f <= q, break otherwise */`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_END(DIMS, dims_div);`
Packit Service	c5cf8c	`/* recursively try to find the best subset */`
Packit Service	c5cf8c	`if (nndivs > 0)`
Packit Service	c5cf8c	`optbalance(q, idx - 1, nd, nndivs, newdivs, trydims, curbal_p, optdims);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`MPIR_CHKLMEM_FREEALL();`
Packit Service	c5cf8c	`} else if (idx == 1) {`
Packit Service	c5cf8c	`/* Only two. Find the pair such that q,f has min value for q-min, i.e.,`
Packit Service	c5cf8c	`* the smallest q such that q > f. Note that f*f < n if q > f and`
Packit Service	c5cf8c	`* qf = n /`
Packit Service	c5cf8c	`/* Valid choices for f, q >= divs[0] */`
Packit Service	c5cf8c	`int qprev = -1;`
Packit Service	c5cf8c	`for (k = 1; k < ndivs; k++) {`
Packit Service	c5cf8c	`f = divs[k];`
Packit Service	c5cf8c	`q = n / f;`
Packit Service	c5cf8c	`if (q < f)`
Packit Service	c5cf8c	`break;`
Packit Service	c5cf8c	`qprev = q;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`f = divs[k - 1];`
Packit Service	c5cf8c	`if (qprev > 0)`
Packit Service	c5cf8c	`q = qprev;`
Packit Service	c5cf8c	`else`
Packit Service	c5cf8c	`q = n / f;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`if (q < f) {`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("Skipping because %d < %d\n", q, f);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`/* No valid solution. Exit without changing current optdims */`
Packit Service	c5cf8c	`MPIR_T_PVAR_COUNTER_INC(DIMS, dims_npruned, 1);`
Packit Service	c5cf8c	`return 0;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("Found best factors %d,%d, from divs[%d]\n", q, f, k - 1);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`/* If nd is 2 (and not greater), we will be replacing all values`
Packit Service	c5cf8c	`* in dims. In that case, testbal is q-f;`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c	`if (nd == 2)`
Packit Service	c5cf8c	`testbal = q - f;`
Packit Service	c5cf8c	`else`
Packit Service	c5cf8c	`testbal = q - min;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Take the new value if it is at least as good as the`
Packit Service	c5cf8c	`* current choice. This is what Jesper Traeff's version does`
Packit Service	c5cf8c	`* (see the code he provided for his EuroMPI'15 paper) */`
Packit Service	c5cf8c	`if (testbal <= curbal) {`
Packit Service	c5cf8c	`for (i = 2; i < nd; i++)`
Packit Service	c5cf8c	`optdims[i] = trydims[i];`
Packit Service	c5cf8c	`optdims[0] = q;`
Packit Service	c5cf8c	`optdims[1] = f;`
Packit Service	c5cf8c	`/* Use the balance for the range of dims, not the total`
Packit Service	c5cf8c	`* balance */`
Packit Service	c5cf8c	`*curbal_p = q - min;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`} else {`
Packit Service	c5cf8c	`/* idx == 0. Only one choice for divisor */`
Packit Service	c5cf8c	`if (n - min <= curbal) {`
Packit Service	c5cf8c	`for (i = 1; i < nd; i++)`
Packit Service	c5cf8c	`optdims[i] = trydims[i];`
Packit Service	c5cf8c	`optdims[0] = n;`
Packit Service	c5cf8c	`*curbal_p = n - min;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`return 0;`
Packit Service	c5cf8c	`fn_fail:`
Packit Service	c5cf8c	`return mpi_errno;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* FIXME: The error checking should really be part of MPI_Dims_create,`
Packit Service	c5cf8c	`not part of MPIR_Dims_create_impl. That slightly changes the`
Packit Service	c5cf8c	`semantics of Dims_create provided by the device, but only by`
Packit Service	c5cf8c	`removing the need to check for errors */`
Packit Service	c5cf8c
Packit Service	c5cf8c
Packit Service	c5cf8c	`PMPI_LOCAL int MPIR_Dims_create_impl(int nnodes, int ndims, int dims[])`
Packit Service	c5cf8c	`{`
Packit Service	c5cf8c	`Factors f[MAX_FACTORS];`
Packit Service	c5cf8c	`int nf, nprimes = 0, i, j, k, val, nextidx;`
Packit Service	c5cf8c	`int ndivs, curbal;`
Packit Service	c5cf8c	`int trydims[MAX_DIMS];`
Packit Service	c5cf8c	`int dims_needed, dims_product, mpi_errno;`
Packit Service	c5cf8c	`int chosen[MAX_DIMS], foundDecomp;`
Packit Service	c5cf8c	`int *divs;`
Packit Service	c5cf8c	`MPIR_CHKLMEM_DECL(1);`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Find the number of unspecified dimensions in dims and the product`
Packit Service	c5cf8c	`* of the positive values in dims */`
Packit Service	c5cf8c	`dims_needed = 0;`
Packit Service	c5cf8c	`dims_product = 1;`
Packit Service	c5cf8c	`for (i = 0; i < ndims; i++) {`
Packit Service	c5cf8c	`if (dims[i] < 0) {`
Packit Service	c5cf8c	`mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,`
Packit Service	c5cf8c	`MPIR_ERR_RECOVERABLE,`
Packit Service	c5cf8c	`"MPIR_Dims_create", __LINE__,`
Packit Service	c5cf8c	`MPI_ERR_DIMS,`
Packit Service	c5cf8c	`"**argarrayneg",`
Packit Service	c5cf8c	`"**argarrayneg %s %d %d", "dims", i, dims[i]);`
Packit Service	c5cf8c	`return mpi_errno;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`if (dims[i] == 0)`
Packit Service	c5cf8c	`dims_needed++;`
Packit Service	c5cf8c	`else`
Packit Service	c5cf8c	`dims_product *= dims[i];`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Can we factor nnodes by dims_product? */`
Packit Service	c5cf8c	`if ((nnodes / dims_product) * dims_product != nnodes) {`
Packit Service	c5cf8c	`mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE,`
Packit Service	c5cf8c	`"MPIR_Dims_create", __LINE__,`
Packit Service	c5cf8c	`MPI_ERR_DIMS, "**dimspartition", 0);`
Packit Service	c5cf8c	`return mpi_errno;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`if (!dims_needed) {`
Packit Service	c5cf8c	`/* Special case - all dimensions provided */`
Packit Service	c5cf8c	`return MPI_SUCCESS;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`if (dims_needed > MAX_DIMS) {`
Packit Service	c5cf8c	`/* --BEGIN ERROR HANDLING-- */`
Packit Service	c5cf8c	`mpi_errno = MPIR_Err_create_code(MPI_SUCCESS,`
Packit Service	c5cf8c	`MPIR_ERR_RECOVERABLE,`
Packit Service	c5cf8c	`"MPIR_Dims_create", __LINE__, MPI_ERR_DIMS,`
Packit Service	c5cf8c	`"dimsmany", "dimsmany %d %d", dims_needed, MAX_DIMS);`
Packit Service	c5cf8c	`return mpi_errno;`
Packit Service	c5cf8c	`/* --END ERROR HANDLING-- */`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`nnodes /= dims_product;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Factor the remaining nodes into dims_needed components. The`
Packit Service	c5cf8c	`* MPI standard says that these should be as balanced as possible;`
Packit Service	c5cf8c	`* in particular, unfortunately, not necessarily arranged in a way`
Packit Service	c5cf8c	`* that makes sense for the underlying machine topology. The approach`
Packit Service	c5cf8c	`* used here was inspired by "Specification Guideline Violations`
Packit Service	c5cf8c	`* by MPI_Dims_create," by Jesper Traeff and Felix Luebbe, EuroMPI'15.`
Packit Service	c5cf8c	`* Among the changes are the use of a table of primes to speed up the`
Packit Service	c5cf8c	`* process of finding factors, and some effort to reduce the number`
Packit Service	c5cf8c	`* of integer division operations.`
Packit Service	c5cf8c	`*`
Packit Service	c5cf8c	`* Values are put into chosen[0..dims_needed]. These values are then`
Packit Service	c5cf8c	`* copied back into dims into the "empty" slots (values == 0).`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c
Packit Service	c5cf8c	`if (dims_needed == 1) {`
Packit Service	c5cf8c	`/* Simply put the correct value in place */`
Packit Service	c5cf8c	`for (j = 0; j < ndims; j++) {`
Packit Service	c5cf8c	`if (dims[j] == 0) {`
Packit Service	c5cf8c	`dims[j] = nnodes;`
Packit Service	c5cf8c	`break;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`return 0;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Factor nnodes */`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_START(DIMS, dims_fact);`
Packit Service	c5cf8c	`nf = factor_num(nnodes, f, &nprimes);`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_END(DIMS, dims_fact);`
Packit Service	c5cf8c
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("found %d factors\n", nf);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Remove primes > sqrt(n) */`
Packit Service	c5cf8c	`nextidx = 0;`
Packit Service	c5cf8c	`for (j = nf - 1; j > 0 && nextidx < dims_needed - 1; j--) {`
Packit Service	c5cf8c	`val = f[j].val;`
Packit Service	c5cf8c	`if (f[j].cnt == 1 && val * val > nnodes) {`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("prime %d required in idx %d\n", val, nextidx);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`chosen[nextidx++] = val;`
Packit Service	c5cf8c	`nf--;`
Packit Service	c5cf8c	`nnodes /= val;`
Packit Service	c5cf8c	`nprimes--;`
Packit Service	c5cf8c	`} else`
Packit Service	c5cf8c	`break;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Now, handle some special cases. If we find any of these,`
Packit Service	c5cf8c	`* we're done and can use the values in chosen to return values in dims */`
Packit Service	c5cf8c	`foundDecomp = 0;`
Packit Service	c5cf8c	`if (nprimes + nextidx <= dims_needed) {`
Packit Service	c5cf8c	`/* Fill with the primes */`
Packit Service	c5cf8c	`int m, cnt;`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("Nprimes = %d, number dims left = %d\n", nprimes, dims_needed - nextidx);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`i = nextidx + nprimes;`
Packit Service	c5cf8c	`for (k = 0; k < nf; k++) {`
Packit Service	c5cf8c	`cnt = f[k].cnt;`
Packit Service	c5cf8c	`val = f[k].val;`
Packit Service	c5cf8c	`for (m = 0; m < cnt; m++)`
Packit Service	c5cf8c	`chosen[--i] = val;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`i = nextidx + nprimes;`
Packit Service	c5cf8c	`while (i < ndims)`
Packit Service	c5cf8c	`chosen[i++] = 1;`
Packit Service	c5cf8c	`foundDecomp = 1;`
Packit Service	c5cf8c	`} else if (dims_needed - nextidx == 1) {`
Packit Service	c5cf8c	`chosen[nextidx] = nnodes;`
Packit Service	c5cf8c	`foundDecomp = 1;`
Packit Service	c5cf8c	`} else if (nf == 1) {`
Packit Service	c5cf8c	`/* What's left is a product of a single prime, so there is only one`
Packit Service	c5cf8c	`* possible factorization */`
Packit Service	c5cf8c	`int cnt = f[0].cnt;`
Packit Service	c5cf8c	`val = f[0].val;`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("only 1 prime = %d left\n", val);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`for (k = nextidx; k < dims_needed; k++)`
Packit Service	c5cf8c	`chosen[k] = 1;`
Packit Service	c5cf8c	`k = nextidx;`
Packit Service	c5cf8c	`while (cnt-- > 0) {`
Packit Service	c5cf8c	`if (k >= dims_needed)`
Packit Service	c5cf8c	`k = nextidx;`
Packit Service	c5cf8c	`chosen[k++] *= val;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`foundDecomp = 1;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`/* There's another case we consider, particularly since we've`
Packit Service	c5cf8c	`* factored out the "large" primes. If (cnt % (ndims-nextidx)) == 0`
Packit Service	c5cf8c	`* for every remaining factor, then`
Packit Service	c5cf8c	`* f = product of (val^ (cnt/(ndims-nextidx)))`
Packit Service	c5cf8c	`*/`
Packit Service	c5cf8c	`if (!foundDecomp) {`
Packit Service	c5cf8c	`int ndleft = dims_needed - nextidx;`
Packit Service	c5cf8c	`for (j = 0; j < nf; j++)`
Packit Service	c5cf8c	`if (f[j].cnt % ndleft != 0)`
Packit Service	c5cf8c	`break;`
Packit Service	c5cf8c	`if (j == nf) {`
Packit Service	c5cf8c	`int fval;`
Packit Service	c5cf8c	`val = 1;`
Packit Service	c5cf8c	`for (j = 0; j < nf; j++) {`
Packit Service	c5cf8c	`int pow = f[j].cnt / ndleft;`
Packit Service	c5cf8c	`fval = f[j].val;`
Packit Service	c5cf8c	`while (pow--)`
Packit Service	c5cf8c	`val *= fval;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`for (j = nextidx; j < dims_needed; j++)`
Packit Service	c5cf8c	`chosen[j] = val;`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("Used power of factors for dims\n");`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`foundDecomp = 1;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`if (foundDecomp) {`
Packit Service	c5cf8c	`j = 0;`
Packit Service	c5cf8c	`for (i = 0; i < ndims; i++) {`
Packit Service	c5cf8c	`if (dims[i] == 0) {`
Packit Service	c5cf8c	`dims[i] = chosen[j++];`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`return 0;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* ndims >= 2 */`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* No trivial solution. Balance the remaining values (note that we may`
Packit Service	c5cf8c	`* have already trimmed off some large factors */`
Packit Service	c5cf8c	`/* First, find all of the divisors given by the remaining factors */`
Packit Service	c5cf8c	`ndivs = ndivisors_from_factor(nf, (const Factors *) f);`
Packit Service	c5cf8c	`MPIR_CHKLMEM_MALLOC(divs, int , ((unsigned int) ndivs) sizeof(int), mpi_errno, "divs",`
Packit Service	c5cf8c	`MPL_MEM_COMM);`
Packit Service	c5cf8c	`ndivs = factor_to_divisors(nf, f, ndivs, divs);`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`for (i = 0; i < ndivs; i++) {`
Packit Service	c5cf8c	`if (divs[i] <= 0) {`
Packit Service	c5cf8c	`MPL_msg_printf("divs[%d]=%d!\n", i, divs[i]);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Create a simple first guess. We do this by using a round robin`
Packit Service	c5cf8c	`* distribution of the primes in the remaining values (from nextidx`
Packit Service	c5cf8c	`* to ndims) */`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_START(DIMS, dims_basefact);`
Packit Service	c5cf8c	`factor_to_dims_by_rr(nf, f, dims_needed - nextidx, chosen + nextidx);`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_END(DIMS, dims_basefact);`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* This isn't quite right, as the balance may depend on the other (larger)`
Packit Service	c5cf8c	`* divisors already chosen */`
Packit Service	c5cf8c	`/* Need to add nextidx */`
Packit Service	c5cf8c	`curbal = chosen[0] - chosen[dims_needed - 1];`
Packit Service	c5cf8c	`trydims[dims_needed - nextidx - 1] = divs[0];`
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("N: initial decomp is: ");`
Packit Service	c5cf8c	`for (i = 0; i < dims_needed; i++)`
Packit Service	c5cf8c	`MPL_msg_printf("%d%c", chosen[i], (i + 1 < dims_needed) ? 'x' : '\n');`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_START(DIMS, dims_bal);`
Packit Service	c5cf8c	`optbalance(nnodes, dims_needed - nextidx - 1, dims_needed - nextidx,`
Packit Service	c5cf8c	`ndivs, divs, trydims, &curbal, chosen + nextidx);`
Packit Service	c5cf8c	`MPIR_T_PVAR_TIMER_END(DIMS, dims_bal);`
Packit Service	c5cf8c	`MPIR_CHKLMEM_FREEALL();`
Packit Service	c5cf8c
Packit Service	c5cf8c	`if (MPIR_CVAR_DIMS_VERBOSE) {`
Packit Service	c5cf8c	`MPL_msg_printf("N: final decomp is: ");`
Packit Service	c5cf8c	`for (i = 0; i < dims_needed; i++)`
Packit Service	c5cf8c	`MPL_msg_printf("%d%c", chosen[i], (i + 1 < dims_needed) ? 'x' : '\n');`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`j = 0;`
Packit Service	c5cf8c	`for (i = 0; i < ndims; i++) {`
Packit Service	c5cf8c	`if (dims[i] == 0) {`
Packit Service	c5cf8c	`dims[i] = chosen[j++];`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`return MPI_SUCCESS;`
Packit Service	c5cf8c	`fn_fail:`
Packit Service	c5cf8c	`return mpi_errno;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`#else`
Packit Service	c5cf8c	`/* MPI_Dims_create and PMPI_Dims_create must see the same variable for this`
Packit Service	c5cf8c	`one-time initialization flag */`
Packit Service	c5cf8c	`extern volatile int MPIR_DIMS_initPCVars;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`#endif /* PMPI Local */`
Packit Service	c5cf8c
Packit Service	c5cf8c	`#undef FUNCNAME`
Packit Service	c5cf8c	`#define FUNCNAME MPI_Dims_create`
Packit Service	c5cf8c	`#undef FCNAME`
Packit Service	c5cf8c	`#define FCNAME "MPI_Dims_create"`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/*@`
Packit Service	c5cf8c	`MPI_Dims_create - Creates a division of processors in a cartesian grid`
Packit Service	c5cf8c
Packit Service	c5cf8c	`Input Parameters:`
Packit Service	c5cf8c	`+ nnodes - number of nodes in a grid (integer)`
Packit Service	c5cf8c	`- ndims - number of cartesian dimensions (integer)`
Packit Service	c5cf8c
Packit Service	c5cf8c	`Input/Output Parameters:`
Packit Service	c5cf8c	`. dims - integer array of size 'ndims' specifying the number of nodes in each`
Packit Service	c5cf8c	`dimension. A value of 0 indicates that 'MPI_Dims_create' should fill in a`
Packit Service	c5cf8c	`suitable value.`
Packit Service	c5cf8c
Packit Service	c5cf8c	`.N ThreadSafe`
Packit Service	c5cf8c
Packit Service	c5cf8c	`.N Fortran`
Packit Service	c5cf8c
Packit Service	c5cf8c	`.N Errors`
Packit Service	c5cf8c	`.N MPI_SUCCESS`
Packit Service	c5cf8c	`@*/`
Packit Service	c5cf8c	`int MPI_Dims_create(int nnodes, int ndims, int dims[])`
Packit Service	c5cf8c	`{`
Packit Service	c5cf8c	`int mpi_errno = MPI_SUCCESS;`
Packit Service	c5cf8c	`MPIR_FUNC_TERSE_STATE_DECL(MPID_STATE_MPI_DIMS_CREATE);`
Packit Service	c5cf8c
Packit Service	c5cf8c	`MPIR_ERRTEST_INITIALIZED_ORDIE();`
Packit Service	c5cf8c	`MPIR_FUNC_TERSE_ENTER(MPID_STATE_MPI_DIMS_CREATE);`
Packit Service	c5cf8c
Packit Service	c5cf8c	`if (ndims == 0)`
Packit Service	c5cf8c	`goto fn_exit;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Validate parameters and objects (post conversion) */`
Packit Service	c5cf8c	`#ifdef HAVE_ERROR_CHECKING`
Packit Service	c5cf8c	`{`
Packit Service	c5cf8c	`MPID_BEGIN_ERROR_CHECKS;`
Packit Service	c5cf8c	`{`
Packit Service	c5cf8c	`MPIR_ERRTEST_ARGNEG(nnodes, "nnodes", mpi_errno);`
Packit Service	c5cf8c	`MPIR_ERRTEST_ARGNEG(ndims, "ndims", mpi_errno);`
Packit Service	c5cf8c	`MPIR_ERRTEST_ARGNULL(dims, "dims", mpi_errno);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`MPID_END_ERROR_CHECKS;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`#endif /* HAVE_ERROR_CHECKING */`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* Initialize pvars and cvars if this is the first call */`
Packit Service	c5cf8c	`if (MPIR_DIMS_initPCVars) {`
Packit Service	c5cf8c	`MPIR_Dims_create_init();`
Packit Service	c5cf8c	`MPIR_DIMS_initPCVars = 0;`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* ... body of routine ... */`
Packit Service	c5cf8c	`if (MPIR_Process.dimsCreate != NULL) {`
Packit Service	c5cf8c	`mpi_errno = MPIR_Process.dimsCreate(nnodes, ndims, dims);`
Packit Service	c5cf8c	`} else {`
Packit Service	c5cf8c	`mpi_errno = MPIR_Dims_create_impl(nnodes, ndims, dims);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`if (mpi_errno)`
Packit Service	c5cf8c	`MPIR_ERR_POP(mpi_errno);`
Packit Service	c5cf8c	`/* ... end of body of routine ... */`
Packit Service	c5cf8c
Packit Service	c5cf8c	`fn_exit:`
Packit Service	c5cf8c	`MPIR_FUNC_TERSE_EXIT(MPID_STATE_MPI_DIMS_CREATE);`
Packit Service	c5cf8c	`return mpi_errno;`
Packit Service	c5cf8c
Packit Service	c5cf8c	`/* --BEGIN ERROR HANDLING-- */`
Packit Service	c5cf8c	`fn_fail:`
Packit Service	c5cf8c	`#ifdef HAVE_ERROR_CHECKING`
Packit Service	c5cf8c	`{`
Packit Service	c5cf8c	`mpi_errno =`
Packit Service	c5cf8c	`MPIR_Err_create_code(mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER,`
Packit Service	c5cf8c	`"mpi_dims_create", "mpi_dims_create %d %d %p", nnodes, ndims,`
Packit Service	c5cf8c	`dims);`
Packit Service	c5cf8c	`}`
Packit Service	c5cf8c	`#endif`
Packit Service	c5cf8c	`mpi_errno = MPIR_Err_return_comm(NULL, FCNAME, mpi_errno);`
Packit Service	c5cf8c	`goto fn_exit;`
Packit Service	c5cf8c	`/* --END ERROR HANDLING-- */`
Packit Service	c5cf8c	`}`

source-git / mpich

Source Code

Blame src/mpi/topo/dims_create.c