Blame cdf/hypergeometric.c

Packit 67cb25
/* cdf/hypergeometric.c
Packit 67cb25
 *
Packit 67cb25
 * Copyright (C) 2004 Jason H. Stover.
Packit 67cb25
 *
Packit 67cb25
 * This program is free software; you can redistribute it and/or modify
Packit 67cb25
 * it under the terms of the GNU General Public License as published by
Packit 67cb25
 * the Free Software Foundation; either version 3 of the License, or (at
Packit 67cb25
 * your option) any later version.
Packit 67cb25
 *
Packit 67cb25
 * This program is distributed in the hope that it will be useful, but
Packit 67cb25
 * WITHOUT ANY WARRANTY; without even the implied warranty of
Packit 67cb25
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Packit 67cb25
 * General Public License for more details.
Packit 67cb25
 *
Packit 67cb25
 * You should have received a copy of the GNU General Public License
Packit 67cb25
 * along with this program; if not, write to the Free Software Foundation,
Packit 67cb25
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
Packit 67cb25
 */
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
 * Computes the cumulative distribution function for a hypergeometric
Packit 67cb25
 * random variable. A hypergeometric random variable X is the number
Packit 67cb25
 * of elements of type 1 in a sample of size t, drawn from a population
Packit 67cb25
 * of size n1 + n2, in which n1 are of type 1 and n2 are of type 2.
Packit 67cb25
 *
Packit 67cb25
 * This algorithm computes Pr( X <= k ) by summing the terms from
Packit 67cb25
 * the mass function, Pr( X = k ).
Packit 67cb25
 *
Packit 67cb25
 * References:
Packit 67cb25
 *
Packit 67cb25
 * T. Wu. An accurate computation of the hypergeometric distribution 
Packit 67cb25
 * function. ACM Transactions on Mathematical Software. Volume 19, number 1,
Packit 67cb25
 * March 1993.
Packit 67cb25
 *  This algorithm is not used, since it requires factoring the
Packit 67cb25
 *  numerator and denominator, then cancelling. It is more accurate
Packit 67cb25
 *  than the algorithm used here, but the cancellation requires more
Packit 67cb25
 *  time than the algorithm used here.
Packit 67cb25
 *
Packit 67cb25
 * W. Feller. An Introduction to Probability Theory and Its Applications,
Packit 67cb25
 * third edition. 1968. Chapter 2, section 6. 
Packit 67cb25
 */
Packit 67cb25
Packit 67cb25
#include <config.h>
Packit 67cb25
#include <math.h>
Packit 67cb25
#include <gsl/gsl_math.h>
Packit 67cb25
#include <gsl/gsl_errno.h>
Packit 67cb25
#include <gsl/gsl_cdf.h>
Packit 67cb25
#include <gsl/gsl_randist.h>
Packit 67cb25
Packit 67cb25
#include "error.h"
Packit 67cb25
Packit 67cb25
static double
Packit 67cb25
lower_tail (const unsigned int k, const unsigned int n1,
Packit 67cb25
            const unsigned int n2, const unsigned int t)
Packit 67cb25
{
Packit 67cb25
  double relerr;
Packit 67cb25
  int i = k;
Packit 67cb25
  double s, P;
Packit 67cb25
Packit 67cb25
  s = gsl_ran_hypergeometric_pdf (i, n1, n2, t);
Packit 67cb25
  P = s;
Packit 67cb25
  
Packit 67cb25
  while (i > 0)
Packit 67cb25
    {
Packit 67cb25
      double factor =
Packit 67cb25
        (i / (n1 - i + 1.0)) * ((n2 + i - t) / (t - i + 1.0));
Packit 67cb25
      s *= factor;
Packit 67cb25
      P += s;
Packit 67cb25
      relerr = s / P;
Packit 67cb25
      if (relerr < GSL_DBL_EPSILON)
Packit 67cb25
        break;
Packit 67cb25
      i--;
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  return P;
Packit 67cb25
}
Packit 67cb25
  
Packit 67cb25
static double 
Packit 67cb25
upper_tail (const unsigned int k, const unsigned int n1,
Packit 67cb25
            const unsigned int n2, const unsigned int t)
Packit 67cb25
{
Packit 67cb25
  double relerr;
Packit 67cb25
  unsigned int i = k + 1;
Packit 67cb25
  double s, Q;
Packit 67cb25
  
Packit 67cb25
  s = gsl_ran_hypergeometric_pdf (i, n1, n2, t);
Packit 67cb25
  Q = s;
Packit 67cb25
  
Packit 67cb25
  while (i < t)
Packit 67cb25
    {
Packit 67cb25
      double factor =
Packit 67cb25
        ((n1 - i) / (i + 1.0)) * ((t - i) / (n2 + i + 1.0 - t));
Packit 67cb25
      s *= factor;
Packit 67cb25
      Q += s;
Packit 67cb25
      relerr = s / Q;
Packit 67cb25
      if (relerr < GSL_DBL_EPSILON)
Packit 67cb25
        break;
Packit 67cb25
      i++;
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  return Q;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
Packit 67cb25
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
 * Pr (X <= k)
Packit 67cb25
 */
Packit 67cb25
double
Packit 67cb25
gsl_cdf_hypergeometric_P (const unsigned int k,
Packit 67cb25
                          const unsigned int n1,
Packit 67cb25
                          const unsigned int n2, const unsigned int t)
Packit 67cb25
{
Packit 67cb25
  double P;
Packit 67cb25
Packit 67cb25
  if (t > (n1 + n2))
Packit 67cb25
    {
Packit 67cb25
      CDF_ERROR ("t larger than population size", GSL_EDOM);
Packit 67cb25
    }
Packit 67cb25
  else if (k >= n1 || k >= t)
Packit 67cb25
    {
Packit 67cb25
      P = 1.0;
Packit 67cb25
    }
Packit 67cb25
  else if (k < 0.0)
Packit 67cb25
    {
Packit 67cb25
      P = 0.0;
Packit 67cb25
    }
Packit 67cb25
  else
Packit 67cb25
    {
Packit 67cb25
      double midpoint = ((double)t * n1) / ((double)n1 + (double)n2);
Packit 67cb25
Packit 67cb25
      if (k >= midpoint)
Packit 67cb25
        {
Packit 67cb25
          P = 1 - upper_tail (k, n1, n2, t);
Packit 67cb25
        }
Packit 67cb25
      else
Packit 67cb25
        {
Packit 67cb25
          P = lower_tail (k, n1, n2, t);
Packit 67cb25
        }
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  return P;
Packit 67cb25
}
Packit 67cb25
Packit 67cb25
/*
Packit 67cb25
 * Pr (X > k)
Packit 67cb25
 */
Packit 67cb25
double
Packit 67cb25
gsl_cdf_hypergeometric_Q (const unsigned int k,
Packit 67cb25
                          const unsigned int n1,
Packit 67cb25
                          const unsigned int n2, const unsigned int t)
Packit 67cb25
{
Packit 67cb25
  double Q;
Packit 67cb25
Packit 67cb25
  if (t > (n1 + n2))
Packit 67cb25
    {
Packit 67cb25
      CDF_ERROR ("t larger than population size", GSL_EDOM);
Packit 67cb25
    }
Packit 67cb25
  else if (k >= n1 || k >= t)
Packit 67cb25
    {
Packit 67cb25
      Q = 0.0;
Packit 67cb25
    }
Packit 67cb25
  else if (k < 0.0)
Packit 67cb25
    {
Packit 67cb25
      Q = 1.0;
Packit 67cb25
    }
Packit 67cb25
  else
Packit 67cb25
    {
Packit 67cb25
      double midpoint = ((double)t * n1) / ((double)n1 + (double)n2);
Packit 67cb25
Packit 67cb25
      if (k < midpoint)
Packit 67cb25
        {
Packit 67cb25
          Q = 1 - lower_tail (k, n1, n2, t);
Packit 67cb25
        }
Packit 67cb25
      else
Packit 67cb25
        {
Packit 67cb25
          Q = upper_tail (k, n1, n2, t);
Packit 67cb25
        }
Packit 67cb25
    }
Packit 67cb25
Packit 67cb25
  return Q;
Packit 67cb25
}