|
Packit |
67cb25 |
/* cdf/hypergeometric.c
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* Copyright (C) 2004 Jason H. Stover.
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* This program is free software; you can redistribute it and/or modify
|
|
Packit |
67cb25 |
* it under the terms of the GNU General Public License as published by
|
|
Packit |
67cb25 |
* the Free Software Foundation; either version 3 of the License, or (at
|
|
Packit |
67cb25 |
* your option) any later version.
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* This program is distributed in the hope that it will be useful, but
|
|
Packit |
67cb25 |
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
Packit |
67cb25 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Packit |
67cb25 |
* General Public License for more details.
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* You should have received a copy of the GNU General Public License
|
|
Packit |
67cb25 |
* along with this program; if not, write to the Free Software Foundation,
|
|
Packit |
67cb25 |
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
Packit |
67cb25 |
*/
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/*
|
|
Packit |
67cb25 |
* Computes the cumulative distribution function for a hypergeometric
|
|
Packit |
67cb25 |
* random variable. A hypergeometric random variable X is the number
|
|
Packit |
67cb25 |
* of elements of type 1 in a sample of size t, drawn from a population
|
|
Packit |
67cb25 |
* of size n1 + n2, in which n1 are of type 1 and n2 are of type 2.
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* This algorithm computes Pr( X <= k ) by summing the terms from
|
|
Packit |
67cb25 |
* the mass function, Pr( X = k ).
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* References:
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* T. Wu. An accurate computation of the hypergeometric distribution
|
|
Packit |
67cb25 |
* function. ACM Transactions on Mathematical Software. Volume 19, number 1,
|
|
Packit |
67cb25 |
* March 1993.
|
|
Packit |
67cb25 |
* This algorithm is not used, since it requires factoring the
|
|
Packit |
67cb25 |
* numerator and denominator, then cancelling. It is more accurate
|
|
Packit |
67cb25 |
* than the algorithm used here, but the cancellation requires more
|
|
Packit |
67cb25 |
* time than the algorithm used here.
|
|
Packit |
67cb25 |
*
|
|
Packit |
67cb25 |
* W. Feller. An Introduction to Probability Theory and Its Applications,
|
|
Packit |
67cb25 |
* third edition. 1968. Chapter 2, section 6.
|
|
Packit |
67cb25 |
*/
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
#include <config.h>
|
|
Packit |
67cb25 |
#include <math.h>
|
|
Packit |
67cb25 |
#include <gsl/gsl_math.h>
|
|
Packit |
67cb25 |
#include <gsl/gsl_errno.h>
|
|
Packit |
67cb25 |
#include <gsl/gsl_cdf.h>
|
|
Packit |
67cb25 |
#include <gsl/gsl_randist.h>
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
#include "error.h"
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
static double
|
|
Packit |
67cb25 |
lower_tail (const unsigned int k, const unsigned int n1,
|
|
Packit |
67cb25 |
const unsigned int n2, const unsigned int t)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double relerr;
|
|
Packit |
67cb25 |
int i = k;
|
|
Packit |
67cb25 |
double s, P;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
s = gsl_ran_hypergeometric_pdf (i, n1, n2, t);
|
|
Packit |
67cb25 |
P = s;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
while (i > 0)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double factor =
|
|
Packit |
67cb25 |
(i / (n1 - i + 1.0)) * ((n2 + i - t) / (t - i + 1.0));
|
|
Packit |
67cb25 |
s *= factor;
|
|
Packit |
67cb25 |
P += s;
|
|
Packit |
67cb25 |
relerr = s / P;
|
|
Packit |
67cb25 |
if (relerr < GSL_DBL_EPSILON)
|
|
Packit |
67cb25 |
break;
|
|
Packit |
67cb25 |
i--;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return P;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
static double
|
|
Packit |
67cb25 |
upper_tail (const unsigned int k, const unsigned int n1,
|
|
Packit |
67cb25 |
const unsigned int n2, const unsigned int t)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double relerr;
|
|
Packit |
67cb25 |
unsigned int i = k + 1;
|
|
Packit |
67cb25 |
double s, Q;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
s = gsl_ran_hypergeometric_pdf (i, n1, n2, t);
|
|
Packit |
67cb25 |
Q = s;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
while (i < t)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double factor =
|
|
Packit |
67cb25 |
((n1 - i) / (i + 1.0)) * ((t - i) / (n2 + i + 1.0 - t));
|
|
Packit |
67cb25 |
s *= factor;
|
|
Packit |
67cb25 |
Q += s;
|
|
Packit |
67cb25 |
relerr = s / Q;
|
|
Packit |
67cb25 |
if (relerr < GSL_DBL_EPSILON)
|
|
Packit |
67cb25 |
break;
|
|
Packit |
67cb25 |
i++;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return Q;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/*
|
|
Packit |
67cb25 |
* Pr (X <= k)
|
|
Packit |
67cb25 |
*/
|
|
Packit |
67cb25 |
double
|
|
Packit |
67cb25 |
gsl_cdf_hypergeometric_P (const unsigned int k,
|
|
Packit |
67cb25 |
const unsigned int n1,
|
|
Packit |
67cb25 |
const unsigned int n2, const unsigned int t)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double P;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
if (t > (n1 + n2))
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
CDF_ERROR ("t larger than population size", GSL_EDOM);
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else if (k >= n1 || k >= t)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
P = 1.0;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else if (k < 0.0)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
P = 0.0;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double midpoint = ((double)t * n1) / ((double)n1 + (double)n2);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
if (k >= midpoint)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
P = 1 - upper_tail (k, n1, n2, t);
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
P = lower_tail (k, n1, n2, t);
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return P;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
/*
|
|
Packit |
67cb25 |
* Pr (X > k)
|
|
Packit |
67cb25 |
*/
|
|
Packit |
67cb25 |
double
|
|
Packit |
67cb25 |
gsl_cdf_hypergeometric_Q (const unsigned int k,
|
|
Packit |
67cb25 |
const unsigned int n1,
|
|
Packit |
67cb25 |
const unsigned int n2, const unsigned int t)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double Q;
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
if (t > (n1 + n2))
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
CDF_ERROR ("t larger than population size", GSL_EDOM);
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else if (k >= n1 || k >= t)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
Q = 0.0;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else if (k < 0.0)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
Q = 1.0;
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
double midpoint = ((double)t * n1) / ((double)n1 + (double)n2);
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
if (k < midpoint)
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
Q = 1 - lower_tail (k, n1, n2, t);
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
else
|
|
Packit |
67cb25 |
{
|
|
Packit |
67cb25 |
Q = upper_tail (k, n1, n2, t);
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
}
|
|
Packit |
67cb25 |
|
|
Packit |
67cb25 |
return Q;
|
|
Packit |
67cb25 |
}
|