Blame randist/discrete.c

Packit 67cb25
/* randist/discrete.c
Packit 67cb25
 * 
Packit 67cb25
 * Copyright (C) 1996, 1997, 1998, 1999, 2000, 2007, 2009 James Theiler, Brian Gough
Packit 67cb25
 * 
Packit 67cb25
 * This program is free software; you can redistribute it and/or modify
Packit 67cb25
 * it under the terms of the GNU General Public License as published by
Packit 67cb25
 * the Free Software Foundation; either version 3 of the License, or (at
Packit 67cb25
 * your option) any later version.
Packit 67cb25
 * 
Packit 67cb25
 * This program is distributed in the hope that it will be useful, but
Packit 67cb25
 * WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 67cb25
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 67cb25
 * General Public License for more details.
Packit 67cb25
 * 
Packit 67cb25
 * You should have received a copy of the GNU General Public License along
Packit 67cb25
 * with this library; if not, write to the Free Software Foundation, Inc.,
Packit 67cb25
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
Packit 67cb25
 */
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
   Random Discrete Events
Packit 67cb25
   
Packit 67cb25
   Given K discrete events with different probabilities P[k]
Packit 67cb25
   produce a value k consistent with its probability.
Packit 67cb25
Packit 67cb25
   This program is free software; you can redistribute it and/or
Packit 67cb25
   modify it under the terms of the GNU General Public License as
Packit 67cb25
   published by the Free Software Foundation; either version 3 of the
Packit 67cb25
   License, or (at your option) any later version.
Packit 67cb25
Packit 67cb25
   This program is distributed in the hope that it will be useful,
Packit 67cb25
   but WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 67cb25
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 67cb25
   General Public License for more details.  You should have received
Packit 67cb25
   a copy of the GNU General Public License along with this library; if
Packit 67cb25
   not, write to the Free Software Foundation, Inc., 51 Franklin Street,
Packit 67cb25
   Fifth Floor, Boston, MA 02110-1301, USA.
Packit 67cb25
*/
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
 * Based on: Alastair J Walker, An efficient method for generating
Packit 67cb25
 * discrete random variables with general distributions, ACM Trans
Packit 67cb25
 * Math Soft 3, 253-256 (1977).  See also: D. E. Knuth, The Art of
Packit 67cb25
 * Computer Programming, Volume 2 (Seminumerical algorithms), 3rd
Packit 67cb25
 * edition, Addison-Wesley (1997), p120.
Packit 67cb25
Packit 67cb25
 * Walker's algorithm does some preprocessing, and provides two
Packit 67cb25
 * arrays: floating point F[k] and integer A[k].  A value k is chosen
Packit 67cb25
 * from 0..K-1 with equal likelihood, and then a uniform random number
Packit 67cb25
 * u is compared to F[k].  If it is less than F[k], then k is
Packit 67cb25
 * returned.  Otherwise, A[k] is returned.
Packit 67cb25
   
Packit 67cb25
 * Walker's original paper describes an O(K^2) algorithm for setting
Packit 67cb25
 * up the F and A arrays.  I found this disturbing since I wanted to
Packit 67cb25
 * use very large values of K.  I'm sure I'm not the first to realize
Packit 67cb25
 * this, but in fact the preprocessing can be done in O(K) steps.
Packit 67cb25
Packit 67cb25
 * A figure of merit for the preprocessing is the average value for
Packit 67cb25
 * the F[k]'s (that is, SUM_k F[k]/K); this corresponds to the
Packit 67cb25
 * probability that k is returned, instead of A[k], thereby saving a
Packit 67cb25
 * redirection.  Walker's O(K^2) preprocessing will generally improve
Packit 67cb25
 * that figure of merit, compared to my cheaper O(K) method; from some
Packit 67cb25
 * experiments with a perl script, I get values of around 0.6 for my
Packit 67cb25
 * method and just under 0.75 for Walker's.  Knuth has pointed out
Packit 67cb25
 * that finding _the_ optimum lookup tables, which maximize the
Packit 67cb25
 * average F[k], is a combinatorially difficult problem.  But any
Packit 67cb25
 * valid preprocessing will still provide O(1) time for the call to
Packit 67cb25
 * gsl_ran_discrete().  I find that if I artificially set F[k]=1 --
Packit 67cb25
 * ie, better than optimum! -- I get a speedup of maybe 20%, so that's
Packit 67cb25
 * the maximum I could expect from the most expensive preprocessing.
Packit 67cb25
 * Folding in the difference of 0.6 vs 0.75, I'd estimate that the
Packit 67cb25
 * speedup would be less than 10%.
Packit 67cb25
Packit 67cb25
 * I've not implemented it here, but one compromise is to sort the
Packit 67cb25
 * probabilities once, and then work from the two ends inward.  This
Packit 67cb25
 * requires O(K log K), still lots cheaper than O(K^2), and from my
Packit 67cb25
 * experiments with the perl script, the figure of merit is within
Packit 67cb25
 * about 0.01 for K up to 1000, and no sign of diverging (in fact,
Packit 67cb25
 * they seemed to be converging, but it's hard to say with just a
Packit 67cb25
 * handful of runs).
Packit 67cb25
Packit 67cb25
 * The O(K) algorithm goes through all the p_k's and decides if they
Packit 67cb25
 * are "smalls" or "bigs" according to whether they are less than or
Packit 67cb25
 * greater than the mean value 1/K.  The indices to the smalls and the
Packit 67cb25
 * bigs are put in separate stacks, and then we work through the
Packit 67cb25
 * stacks together.  For each small, we pair it up with the next big
Packit 67cb25
 * in the stack (Walker always wanted to pair up the smallest small
Packit 67cb25
 * with the biggest big).  The small "borrows" from the big just
Packit 67cb25
 * enough to bring the small up to mean.  This reduces the size of the
Packit 67cb25
 * big, so the (smaller) big is compared again to the mean, and if it
Packit 67cb25
 * is smaller, it gets "popped" from the big stack and "pushed" to the
Packit 67cb25
 * small stack.  Otherwise, it stays put.  Since every time we pop a
Packit 67cb25
 * small, we are able to deal with it right then and there, and we
Packit 67cb25
 * never have to pop more than K smalls, then the algorithm is O(K).
Packit 67cb25
Packit 67cb25
 * This implementation sets up two separate stacks, and allocates K
Packit 67cb25
 * elements between them.  Since neither stack ever grows, we do an
Packit 67cb25
 * extra O(K) pass through the data to determine how many smalls and
Packit 67cb25
 * bigs there are to begin with and allocate appropriately.  In all
Packit 67cb25
 * there are 2*K*sizeof(double) transient bytes of memory that are
Packit 67cb25
 * used than returned, and K*(sizeof(int)+sizeof(double)) bytes used
Packit 67cb25
 * in the lookup table.
Packit 67cb25
   
Packit 67cb25
 * Walker spoke of using two random numbers (an integer 0..K-1, and a
Packit 67cb25
 * floating point u in [0,1]), but Knuth points out that one can just
Packit 67cb25
 * use the integer and fractional parts of K*u where u is in [0,1].
Packit 67cb25
 * In fact, Knuth further notes that taking F'[k]=(k+F[k])/K, one can
Packit 67cb25
 * directly compare u to F'[k] without having to explicitly set
Packit 67cb25
 * u=K*u-int(K*u).
Packit 67cb25
Packit 67cb25
 * Usage:
Packit 67cb25
Packit 67cb25
 * Starting with an array of probabilities P, initialize and do
Packit 67cb25
 * preprocessing with a call to:
Packit 67cb25
Packit 67cb25
 *    gsl_rng *r;
Packit 67cb25
 *    gsl_ran_discrete_t *f;
Packit 67cb25
 *    f = gsl_ran_discrete_preproc(K,P);
Packit 67cb25
   
Packit 67cb25
 * Then, whenever a random index 0..K-1 is desired, use
Packit 67cb25
Packit 67cb25
 *    k = gsl_ran_discrete(r,f);
Packit 67cb25
Packit 67cb25
 * Note that several different randevent struct's can be
Packit 67cb25
 * simultaneously active.
Packit 67cb25
Packit 67cb25
 * Aside: A very clever alternative approach is described in
Packit 67cb25
 * Abramowitz and Stegun, p 950, citing: Marsaglia, Random variables
Packit 67cb25
 * and computers, Proc Third Prague Conference in Probability Theory,
Packit 67cb25
 * 1962.  A more accesible reference is: G. Marsaglia, Generating
Packit 67cb25
 * discrete random numbers in a computer, Comm ACM 6, 37-38 (1963).
Packit 67cb25
 * If anybody is interested, I (jt) have also coded up this version as
Packit 67cb25
 * part of another software package.  However, I've done some
Packit 67cb25
 * comparisons, and the Walker method is both faster and more stingy
Packit 67cb25
 * with memory.  So, in the end I decided not to include it with the
Packit 67cb25
 * GSL package.
Packit 67cb25
   
Packit 67cb25
 * Written 26 Jan 1999, James Theiler, jt@lanl.gov
Packit 67cb25
 * Adapted to GSL, 30 Jan 1999, jt
Packit 67cb25
Packit 67cb25
 */
Packit 67cb25
Packit 67cb25
#include <config.h>
Packit 67cb25
#include <stdio.h>              /* used for NULL, also fprintf(stderr,...) */
Packit 67cb25
#include <stdlib.h>             /* used for malloc's */
Packit 67cb25
#include <math.h>
Packit 67cb25
#include <gsl/gsl_errno.h>
Packit 67cb25
#include <gsl/gsl_rng.h>
Packit 67cb25
#include <gsl/gsl_randist.h>
Packit 67cb25
#define DEBUG 0
Packit 67cb25
#define KNUTH_CONVENTION 1      /* Saves a few steps of arithmetic
Packit 67cb25
                                 * in the call to gsl_ran_discrete()
Packit 67cb25
                                 */
Packit 67cb25
Packit 67cb25
/*** Begin Stack (this code is used just in this file) ***/
Packit 67cb25
Packit 67cb25
/* Stack code converted to use unsigned indices (i.e. s->i == 0 now
Packit 67cb25
   means an empty stack, instead of -1), for consistency and to give a
Packit 67cb25
   bigger allowable range. BJG */
Packit 67cb25
Packit 67cb25
typedef struct {
Packit 67cb25
    size_t N;                      /* max number of elts on stack */
Packit 67cb25
    size_t *v;                     /* array of values on the stack */
Packit 67cb25
    size_t i;                      /* index of top of stack */
Packit 67cb25
} gsl_stack_t;
Packit 67cb25
Packit 67cb25
static gsl_stack_t *
Packit 67cb25
new_stack(size_t N) {
Packit 67cb25
    gsl_stack_t *s;
Packit 67cb25
    s = (gsl_stack_t *)malloc(sizeof(gsl_stack_t));
Packit 67cb25
    s->N = N;
Packit 67cb25
    s->i = 0;                  /* indicates stack is empty */
Packit 67cb25
    s->v = (size_t *)malloc(sizeof(size_t)*N);
Packit 67cb25
    return s;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
static int
Packit 67cb25
push_stack(gsl_stack_t *s, size_t v)
Packit 67cb25
{
Packit 67cb25
    if ((s->i) >= (s->N)) {
Packit 67cb25
      return -1; /* stack overflow (shouldn't happen) */
Packit 67cb25
    }
Packit 67cb25
    (s->v)[s->i] = v;
Packit 67cb25
    s->i += 1;
Packit 67cb25
    return 0;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
static size_t pop_stack(gsl_stack_t *s)
Packit 67cb25
{
Packit 67cb25
    if ((s->i) == 0) {
Packit 67cb25
      GSL_ERROR ("internal error - stack exhausted", GSL_ESANITY);
Packit 67cb25
    }
Packit 67cb25
    s->i -= 1;
Packit 67cb25
    return ((s->v)[s->i]);
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
static inline size_t size_stack(const gsl_stack_t *s)
Packit 67cb25
{
Packit 67cb25
    return s->i;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
static void free_stack(gsl_stack_t *s)
Packit 67cb25
{
Packit 67cb25
    free((char *)(s->v));
Packit 67cb25
    free((char *)s);
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/*** End Stack ***/
Packit 67cb25
Packit 67cb25
Packit 67cb25
/*** Begin Walker's Algorithm ***/
Packit 67cb25
Packit 67cb25
gsl_ran_discrete_t *
Packit 67cb25
gsl_ran_discrete_preproc(size_t Kevents, const double *ProbArray)
Packit 67cb25
{
Packit 67cb25
    size_t k,b,s;
Packit 67cb25
    gsl_ran_discrete_t *g;
Packit 67cb25
    size_t nBigs, nSmalls;
Packit 67cb25
    gsl_stack_t *Bigs;
Packit 67cb25
    gsl_stack_t *Smalls;
Packit 67cb25
    double *E;
Packit 67cb25
    double pTotal = 0.0, mean, d;
Packit 67cb25
    
Packit 67cb25
    if (Kevents < 1) {
Packit 67cb25
      /* Could probably treat Kevents=1 as a special case */
Packit 67cb25
Packit 67cb25
      GSL_ERROR_VAL ("number of events must be a positive integer", 
Packit 67cb25
                        GSL_EINVAL, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
    /* Make sure elements of ProbArray[] are positive.
Packit 67cb25
     * Won't enforce that sum is unity; instead will just normalize
Packit 67cb25
     */
Packit 67cb25
Packit 67cb25
    for (k=0; k
Packit 67cb25
        if (ProbArray[k] < 0) {
Packit 67cb25
          GSL_ERROR_VAL ("probabilities must be non-negative",
Packit 67cb25
                            GSL_EINVAL, 0) ;
Packit 67cb25
        }
Packit 67cb25
        pTotal += ProbArray[k];
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
    /* Begin setting up the main "object" (just a struct, no steroids) */
Packit 67cb25
    g = (gsl_ran_discrete_t *)malloc(sizeof(gsl_ran_discrete_t));
Packit 67cb25
    g->K = Kevents;
Packit 67cb25
    g->F = (double *)malloc(sizeof(double)*Kevents);
Packit 67cb25
    g->A = (size_t *)malloc(sizeof(size_t)*Kevents);
Packit 67cb25
Packit 67cb25
    E = (double *)malloc(sizeof(double)*Kevents);
Packit 67cb25
Packit 67cb25
    if (E==NULL) {
Packit 67cb25
      GSL_ERROR_VAL ("Cannot allocate memory for randevent", GSL_ENOMEM, 0);
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
    for (k=0; k
Packit 67cb25
        E[k] = ProbArray[k]/pTotal;
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
    /* Now create the Bigs and the Smalls */
Packit 67cb25
    mean = 1.0/Kevents;
Packit 67cb25
    nSmalls=nBigs=0;
Packit 67cb25
    {
Packit 67cb25
      /* Temporarily use which[k] = g->A[k] to indicate small or large */
Packit 67cb25
      size_t * const which = g->A;
Packit 67cb25
Packit 67cb25
      for (k=0; k
Packit 67cb25
        if (E[k] < mean) { 
Packit 67cb25
          ++nSmalls; which[k] = 0;
Packit 67cb25
        } else { 
Packit 67cb25
          ++nBigs; which[k] = 1; 
Packit 67cb25
        }
Packit 67cb25
      }
Packit 67cb25
Packit 67cb25
      Bigs   = new_stack(nBigs);
Packit 67cb25
      Smalls = new_stack(nSmalls);
Packit 67cb25
      for (k=0; k
Packit 67cb25
        gsl_stack_t * Dest = which[k] ? Bigs : Smalls;
Packit 67cb25
        int status = push_stack(Dest,k);
Packit 67cb25
        if (status)
Packit 67cb25
          GSL_ERROR_VAL ("failed to build stacks", GSL_EFAILED, 0);
Packit 67cb25
      }
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
    /* Now work through the smalls */
Packit 67cb25
    while (size_stack(Smalls) > 0) {
Packit 67cb25
        s = pop_stack(Smalls);
Packit 67cb25
        if (size_stack(Bigs) == 0) {
Packit 67cb25
            (g->A)[s]=s;
Packit 67cb25
            (g->F)[s]=1.0;
Packit 67cb25
            continue;
Packit 67cb25
        }
Packit 67cb25
        b = pop_stack(Bigs);
Packit 67cb25
        (g->A)[s]=b;
Packit 67cb25
        (g->F)[s]=Kevents*E[s];
Packit 67cb25
#if DEBUG
Packit 67cb25
        fprintf(stderr,"s=%2d, A=%2d, F=%.4f\n",s,(g->A)[s],(g->F)[s]);
Packit 67cb25
#endif        
Packit 67cb25
        d = mean - E[s];
Packit 67cb25
        E[s] += d;              /* now E[s] == mean */
Packit 67cb25
        E[b] -= d;
Packit 67cb25
        if (E[b] < mean) {
Packit 67cb25
            push_stack(Smalls,b); /* no longer big, join ranks of the small */
Packit 67cb25
        }
Packit 67cb25
        else if (E[b] > mean) {
Packit 67cb25
            push_stack(Bigs,b); /* still big, put it back where you found it */
Packit 67cb25
        }
Packit 67cb25
        else {
Packit 67cb25
            /* E[b]==mean implies it is finished too */
Packit 67cb25
            (g->A)[b]=b;
Packit 67cb25
            (g->F)[b]=1.0;
Packit 67cb25
        }
Packit 67cb25
    }
Packit 67cb25
    while (size_stack(Bigs) > 0) {
Packit 67cb25
        b = pop_stack(Bigs);
Packit 67cb25
        (g->A)[b]=b;
Packit 67cb25
        (g->F)[b]=1.0;
Packit 67cb25
    }
Packit 67cb25
    /* Stacks have been emptied, and A and F have been filled */
Packit 67cb25
Packit 67cb25
    if ( size_stack(Smalls) != 0) {
Packit 67cb25
      GSL_ERROR_VAL ("Smalls stack has not been emptied",
Packit 67cb25
                     GSL_ESANITY, 0 );
Packit 67cb25
    }
Packit 67cb25
    
Packit 67cb25
#if 0
Packit 67cb25
    /* if 1, then artificially set all F[k]'s to unity.  This will
Packit 67cb25
     * give wrong answers, but you'll get them faster.  But, not
Packit 67cb25
     * that much faster (I get maybe 20%); that's an upper bound
Packit 67cb25
     * on what the optimal preprocessing would give.
Packit 67cb25
     */
Packit 67cb25
    for (k=0; k
Packit 67cb25
        (g->F)[k] = 1.0;
Packit 67cb25
    }
Packit 67cb25
#endif
Packit 67cb25
Packit 67cb25
#if KNUTH_CONVENTION
Packit 67cb25
    /* For convenience, set F'[k]=(k+F[k])/K */
Packit 67cb25
    /* This saves some arithmetic in gsl_ran_discrete(); I find that
Packit 67cb25
     * it doesn't actually make much difference.
Packit 67cb25
     */
Packit 67cb25
    for (k=0; k
Packit 67cb25
        (g->F)[k] += k;
Packit 67cb25
        (g->F)[k] /= Kevents;
Packit 67cb25
    }
Packit 67cb25
#endif    
Packit 67cb25
Packit 67cb25
    free_stack(Bigs);
Packit 67cb25
    free_stack(Smalls);
Packit 67cb25
    free((char *)E);
Packit 67cb25
Packit 67cb25
    return g;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
size_t
Packit 67cb25
gsl_ran_discrete(const gsl_rng *r, const gsl_ran_discrete_t *g)
Packit 67cb25
{
Packit 67cb25
    size_t c=0;
Packit 67cb25
    double u,f;
Packit 67cb25
    u = gsl_rng_uniform(r);
Packit 67cb25
#if KNUTH_CONVENTION
Packit 67cb25
    c = (u*(g->K));
Packit 67cb25
#else
Packit 67cb25
    u *= g->K;
Packit 67cb25
    c = u;
Packit 67cb25
    u -= c;
Packit 67cb25
#endif
Packit 67cb25
    f = (g->F)[c];
Packit 67cb25
    /* fprintf(stderr,"c,f,u: %d %.4f %f\n",c,f,u); */
Packit 67cb25
    if (f == 1.0) return c;
Packit 67cb25
Packit 67cb25
    if (u < f) {
Packit 67cb25
        return c;
Packit 67cb25
    }
Packit 67cb25
    else {
Packit 67cb25
        return (g->A)[c];
Packit 67cb25
    }
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
void gsl_ran_discrete_free(gsl_ran_discrete_t *g)
Packit 67cb25
{
Packit 67cb25
    RETURN_IF_NULL (g);
Packit 67cb25
    free((char *)(g->A));
Packit 67cb25
    free((char *)(g->F));
Packit 67cb25
    free((char *)g);
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
double
Packit 67cb25
gsl_ran_discrete_pdf(size_t k, const gsl_ran_discrete_t *g)
Packit 67cb25
{
Packit 67cb25
    size_t i,K;
Packit 67cb25
    double f,p=0;
Packit 67cb25
    K= g->K;
Packit 67cb25
    if (k>K) return 0;
Packit 67cb25
    for (i=0; i
Packit 67cb25
        f = (g->F)[i];
Packit 67cb25
#if KNUTH_CONVENTION
Packit 67cb25
        f = K*f-i;
Packit 67cb25
#endif        
Packit 67cb25
        if (i==k) {
Packit 67cb25
            p += f;
Packit 67cb25
        } else if (k == (g->A)[i]) {
Packit 67cb25
            p += 1.0 - f;
Packit 67cb25
        }
Packit 67cb25
    }
Packit 67cb25
    return p/K;
Packit 67cb25
}