summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/extensions/bayesian-spam-filter/nsIncompleteGamma.h
blob: 9b96459c7c0019ebe5f4e6385512802c6af687b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#ifndef nsIncompleteGamma_h__
#define nsIncompleteGamma_h__

/* An implementation of the incomplete gamma functions for real
   arguments. P is defined as

                          x
                         /
                  1      [     a - 1  - t
    P(a, x) =  --------  I    t      e    dt
               Gamma(a)  ]
                         /
                          0

   and

                          infinity
                         /
                  1      [     a - 1  - t
    Q(a, x) =  --------  I    t      e    dt
               Gamma(a)  ]
                         /
                          x

   so that P(a,x) + Q(a,x) = 1.

   Both a series expansion and a continued fraction exist.  This
   implementation uses the more efficient method based on the arguments.

   Either case involves calculating a multiplicative term:
      e^(-x)*x^a/Gamma(a).
   Here we calculate the log of this term. Most math libraries have a
   "lgamma" function but it is not re-entrant. Some libraries have a
   "lgamma_r" which is re-entrant. Use it if possible. I have included a
   simple replacement but it is certainly not as accurate.

   Relative errors are almost always < 1e-10 and usually < 1e-14. Very
   small and very large arguments cause trouble.

   The region where a < 0.5 and x < 0.5 has poor error properties and is
   not too stable. Get a better routine if you need results in this
   region.

   The error argument will be set negative if there is a domain error or
   positive for an internal calculation error, currently lack of
   convergence. A value is always returned, though.

 */

#include <math.h>
#include <float.h>

// the main routine
static double nsIncompleteGammaP(double a, double x, int* error);

// nsLnGamma(z): either a wrapper around lgamma_r or the internal function.
// C_m = B[2*m]/(2*m*(2*m-1)) where B is a Bernoulli number
static const double C_1 = 1.0 / 12.0;
static const double C_2 = -1.0 / 360.0;
static const double C_3 = 1.0 / 1260.0;
static const double C_4 = -1.0 / 1680.0;
static const double C_5 = 1.0 / 1188.0;
static const double C_6 = -691.0 / 360360.0;
static const double C_7 = 1.0 / 156.0;
static const double C_8 = -3617.0 / 122400.0;
static const double C_9 = 43867.0 / 244188.0;
static const double C_10 = -174611.0 / 125400.0;
static const double C_11 = 77683.0 / 5796.0;

// truncated asymptotic series in 1/z
static inline double lngamma_asymp(double z) {
  double w, w2, sum;
  w = 1.0 / z;
  w2 = w * w;
  sum =
      w *
      (w2 * (w2 * (w2 * (w2 * (w2 * (w2 * (w2 * (w2 * (w2 * (C_11 * w2 + C_10) +
                                                       C_9) +
                                                 C_8) +
                                           C_7) +
                                     C_6) +
                               C_5) +
                         C_4) +
                   C_3) +
             C_2) +
       C_1);

  return sum;
}

struct fact_table_s {
  double fact;
  double lnfact;
};

// for speed and accuracy
static const struct fact_table_s FactTable[] = {
    {1.000000000000000, 0.0000000000000000000000e+00},
    {1.000000000000000, 0.0000000000000000000000e+00},
    {2.000000000000000, 6.9314718055994530942869e-01},
    {6.000000000000000, 1.7917594692280550007892e+00},
    {24.00000000000000, 3.1780538303479456197550e+00},
    {120.0000000000000, 4.7874917427820459941458e+00},
    {720.0000000000000, 6.5792512120101009952602e+00},
    {5040.000000000000, 8.5251613610654142999881e+00},
    {40320.00000000000, 1.0604602902745250228925e+01},
    {362880.0000000000, 1.2801827480081469610995e+01},
    {3628800.000000000, 1.5104412573075515295248e+01},
    {39916800.00000000, 1.7502307845873885839769e+01},
    {479001600.0000000, 1.9987214495661886149228e+01},
    {6227020800.000000, 2.2552163853123422886104e+01},
    {87178291200.00000, 2.5191221182738681499610e+01},
    {1307674368000.000, 2.7899271383840891566988e+01},
    {20922789888000.00, 3.0671860106080672803835e+01},
    {355687428096000.0, 3.3505073450136888885825e+01},
    {6402373705728000., 3.6395445208033053576674e+01}};
#define FactTableLength (int)(sizeof(FactTable) / sizeof(FactTable[0]))

// for speed
static const double ln_2pi_2 = 0.918938533204672741803;  // log(2*PI)/2

/* A simple lgamma function, not very robust.

   Valid for z_in > 0 ONLY.

   For z_in > 8 precision is quite good, relative errors < 1e-14 and
   usually better. For z_in < 8 relative errors increase but are usually
   < 1e-10. In two small regions, 1 +/- .001 and 2 +/- .001 errors
   increase quickly.
*/
static double nsLnGamma(double z_in, int* gsign) {
  double scale, z, sum, result;
  *gsign = 1;

  int zi = (int)z_in;
  if (z_in == (double)zi) {
    if (0 < zi && zi <= FactTableLength)
      return FactTable[zi - 1].lnfact;  // gamma(z) = (z-1)!
  }

  for (scale = 1.0, z = z_in; z < 8.0; ++z) scale *= z;

  sum = lngamma_asymp(z);
  result = (z - 0.5) * log(z) - z + ln_2pi_2 - log(scale);
  result += sum;
  return result;
}

// log( e^(-x)*x^a/Gamma(a) )
static inline double lnPQfactor(double a, double x) {
  int gsign;  // ignored because a > 0
  return a * log(x) - x - nsLnGamma(a, &gsign);
}

static double Pseries(double a, double x, int* error) {
  double sum, term;
  const double eps = 2.0 * DBL_EPSILON;
  const int imax = 5000;
  int i;

  sum = term = 1.0 / a;
  for (i = 1; i < imax; ++i) {
    term *= x / (a + i);
    sum += term;
    if (fabs(term) < eps * fabs(sum)) break;
  }

  if (i >= imax) *error = 1;

  return sum;
}

static double Qcontfrac(double a, double x, int* error) {
  double result, D, C, e, f, term;
  const double eps = 2.0 * DBL_EPSILON;
  const double small = DBL_EPSILON * DBL_EPSILON * DBL_EPSILON * DBL_EPSILON;
  const int imax = 5000;
  int i;

  // modified Lentz method
  f = x - a + 1.0;
  if (fabs(f) < small) f = small;
  C = f + 1.0 / small;
  D = 1.0 / f;
  result = D;
  for (i = 1; i < imax; ++i) {
    e = i * (a - i);
    f += 2.0;
    D = f + e * D;
    if (fabs(D) < small) D = small;
    D = 1.0 / D;
    C = f + e / C;
    if (fabs(C) < small) C = small;
    term = C * D;
    result *= term;
    if (fabs(term - 1.0) < eps) break;
  }

  if (i >= imax) *error = 1;
  return result;
}

static double nsIncompleteGammaP(double a, double x, int* error) {
  double result, dom, ldom;
  //  domain errors. the return values are meaningless but have
  //  to return something.
  *error = -1;
  if (a <= 0.0) return 1.0;
  if (x < 0.0) return 0.0;
  *error = 0;
  if (x == 0.0) return 0.0;

  ldom = lnPQfactor(a, x);
  dom = exp(ldom);
  // might need to adjust the crossover point
  if (a <= 0.5) {
    if (x < a + 1.0)
      result = dom * Pseries(a, x, error);
    else
      result = 1.0 - dom * Qcontfrac(a, x, error);
  } else {
    if (x < a)
      result = dom * Pseries(a, x, error);
    else
      result = 1.0 - dom * Qcontfrac(a, x, error);
  }

  // not clear if this can ever happen
  if (result > 1.0) result = 1.0;
  if (result < 0.0) result = 0.0;
  return result;
}

#endif