diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-24 09:54:23 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-24 09:54:44 +0000 |
commit | 836b47cb7e99a977c5a23b059ca1d0b5065d310e (patch) | |
tree | 1604da8f482d02effa033c94a84be42bc0c848c3 /database/KolmogorovSmirnovDist.c | |
parent | Releasing debian version 1.44.3-2. (diff) | |
download | netdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.tar.xz netdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.zip |
Merging upstream version 1.46.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'database/KolmogorovSmirnovDist.c')
-rw-r--r-- | database/KolmogorovSmirnovDist.c | 788 |
1 files changed, 0 insertions, 788 deletions
diff --git a/database/KolmogorovSmirnovDist.c b/database/KolmogorovSmirnovDist.c deleted file mode 100644 index 1486abc7b..000000000 --- a/database/KolmogorovSmirnovDist.c +++ /dev/null @@ -1,788 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0 - -/******************************************************************** - * - * File: KolmogorovSmirnovDist.c - * Environment: ISO C99 or ANSI C89 - * Author: Richard Simard - * Organization: DIRO, Université de Montréal - * Date: 1 February 2012 - * Version 1.1 - - * Copyright 1 march 2010 by Université de Montréal, - Richard Simard and Pierre L'Ecuyer - ===================================================================== - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, version 3 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - - =====================================================================*/ - -#include "KolmogorovSmirnovDist.h" -#include <math.h> -#include <stdlib.h> - -#define num_Pi 3.14159265358979323846 /* PI */ -#define num_Ln2 0.69314718055994530941 /* log(2) */ - -/* For x close to 0 or 1, we use the exact formulae of Ruben-Gambino in all - cases. For n <= NEXACT, we use exact algorithms: the Durbin matrix and - the Pomeranz algorithms. For n > NEXACT, we use asymptotic methods - except for x close to 0 where we still use the method of Durbin - for n <= NKOLMO. For n > NKOLMO, we use asymptotic methods only and - so the precision is less for x close to 0. - We could increase the limit NKOLMO to 10^6 to get better precision - for x close to 0, but at the price of a slower speed. */ -#define NEXACT 500 -#define NKOLMO 100000 - -/* The Durbin matrix algorithm for the Kolmogorov-Smirnov distribution */ -static double DurbinMatrix (int n, double d); - - -/*========================================================================*/ -#if 0 - -/* For ANSI C89 only, not for ISO C99 */ -#define MAXI 50 -#define EPSILON 1.0e-15 - -double log1p (double x) -{ - /* returns a value equivalent to log(1 + x) accurate also for small x. */ - if (fabs (x) > 0.1) { - return log (1.0 + x); - } else { - double term = x; - double sum = x; - int s = 2; - while ((fabs (term) > EPSILON * fabs (sum)) && (s < MAXI)) { - term *= -x; - sum += term / s; - s++; - } - return sum; - } -} - -#undef MAXI -#undef EPSILON - -#endif - -/*========================================================================*/ -#define MFACT 30 - -/* The natural logarithm of factorial n! for 0 <= n <= MFACT */ -static double LnFactorial[MFACT + 1] = { - 0., - 0., - 0.6931471805599453, - 1.791759469228055, - 3.178053830347946, - 4.787491742782046, - 6.579251212010101, - 8.525161361065415, - 10.60460290274525, - 12.80182748008147, - 15.10441257307552, - 17.50230784587389, - 19.98721449566188, - 22.55216385312342, - 25.19122118273868, - 27.89927138384088, - 30.67186010608066, - 33.50507345013688, - 36.39544520803305, - 39.33988418719949, - 42.33561646075348, - 45.3801388984769, - 48.47118135183522, - 51.60667556776437, - 54.7847293981123, - 58.00360522298051, - 61.26170176100199, - 64.55753862700632, - 67.88974313718154, - 71.257038967168, - 74.65823634883016 -}; - -/*------------------------------------------------------------------------*/ - -static double getLogFactorial (int n) -{ - /* Returns the natural logarithm of factorial n! */ - if (n <= MFACT) { - return LnFactorial[n]; - - } else { - double x = (double) (n + 1); - double y = 1.0 / (x * x); - double z = ((-(5.95238095238E-4 * y) + 7.936500793651E-4) * y - - 2.7777777777778E-3) * y + 8.3333333333333E-2; - z = ((x - 0.5) * log (x) - x) + 9.1893853320467E-1 + z / x; - return z; - } -} - -/*------------------------------------------------------------------------*/ - -static double rapfac (int n) -{ - /* Computes n! / n^n */ - int i; - double res = 1.0 / n; - for (i = 2; i <= n; i++) { - res *= (double) i / n; - } - return res; -} - - -/*========================================================================*/ - -static double **CreateMatrixD (int N, int M) -{ - int i; - double **T2; - - T2 = (double **) malloc (N * sizeof (double *)); - T2[0] = (double *) malloc ((size_t) N * M * sizeof (double)); - for (i = 1; i < N; i++) - T2[i] = T2[0] + i * M; - return T2; -} - - -static void DeleteMatrixD (double **T) -{ - free (T[0]); - free (T); -} - - -/*========================================================================*/ - -static double KSPlusbarAsymp (int n, double x) -{ - /* Compute the probability of the KS+ distribution using an asymptotic - formula */ - double t = (6.0 * n * x + 1); - double z = t * t / (18.0 * n); - double v = 1.0 - (2.0 * z * z - 4.0 * z - 1.0) / (18.0 * n); - if (v <= 0.0) - return 0.0; - v = v * exp (-z); - if (v >= 1.0) - return 1.0; - return v; -} - - -/*-------------------------------------------------------------------------*/ - -static double KSPlusbarUpper (int n, double x) -{ - /* Compute the probability of the KS+ distribution in the upper tail using - Smirnov's stable formula */ - const double EPSILON = 1.0E-12; - double q; - double Sum = 0.0; - double term; - double t; - double LogCom; - double LOGJMAX; - int j; - int jdiv; - int jmax = (int) (n * (1.0 - x)); - - if (n > 200000) - return KSPlusbarAsymp (n, x); - - /* Avoid log(0) for j = jmax and q ~ 1.0 */ - if ((1.0 - x - (double) jmax / n) <= 0.0) - jmax--; - - if (n > 3000) - jdiv = 2; - else - jdiv = 3; - - j = jmax / jdiv + 1; - LogCom = getLogFactorial (n) - getLogFactorial (j) - - getLogFactorial (n - j); - LOGJMAX = LogCom; - - while (j <= jmax) { - q = (double) j / n + x; - term = LogCom + (j - 1) * log (q) + (n - j) * log1p (-q); - t = exp (term); - Sum += t; - LogCom += log ((double) (n - j) / (j + 1)); - if (t <= Sum * EPSILON) - break; - j++; - } - - j = jmax / jdiv; - LogCom = LOGJMAX + log ((double) (j + 1) / (n - j)); - - while (j > 0) { - q = (double) j / n + x; - term = LogCom + (j - 1) * log (q) + (n - j) * log1p (-q); - t = exp (term); - Sum += t; - LogCom += log ((double) j / (n - j + 1)); - if (t <= Sum * EPSILON) - break; - j--; - } - - Sum *= x; - /* add the term j = 0 */ - Sum += exp (n * log1p (-x)); - return Sum; -} - - -/*========================================================================*/ - -static double Pelz (int n, double x) -{ - /* Approximating the Lower Tail-Areas of the Kolmogorov-Smirnov One-Sample - Statistic, - Wolfgang Pelz and I. J. Good, - Journal of the Royal Statistical Society, Series B. - Vol. 38, No. 2 (1976), pp. 152-156 - */ - - const int JMAX = 20; - const double EPS = 1.0e-10; - const double C = 2.506628274631001; /* sqrt(2*Pi) */ - const double C2 = 1.2533141373155001; /* sqrt(Pi/2) */ - const double PI2 = num_Pi * num_Pi; - const double PI4 = PI2 * PI2; - const double RACN = sqrt ((double) n); - const double z = RACN * x; - const double z2 = z * z; - const double z4 = z2 * z2; - const double z6 = z4 * z2; - const double w = PI2 / (2.0 * z * z); - double ti, term, tom; - double sum; - int j; - - term = 1; - j = 0; - sum = 0; - while (j <= JMAX && term > EPS * sum) { - ti = j + 0.5; - term = exp (-ti * ti * w); - sum += term; - j++; - } - sum *= C / z; - - term = 1; - tom = 0; - j = 0; - while (j <= JMAX && fabs (term) > EPS * fabs (tom)) { - ti = j + 0.5; - term = (PI2 * ti * ti - z2) * exp (-ti * ti * w); - tom += term; - j++; - } - sum += tom * C2 / (RACN * 3.0 * z4); - - term = 1; - tom = 0; - j = 0; - while (j <= JMAX && fabs (term) > EPS * fabs (tom)) { - ti = j + 0.5; - term = 6 * z6 + 2 * z4 + PI2 * (2 * z4 - 5 * z2) * ti * ti + - PI4 * (1 - 2 * z2) * ti * ti * ti * ti; - term *= exp (-ti * ti * w); - tom += term; - j++; - } - sum += tom * C2 / (n * 36.0 * z * z6); - - term = 1; - tom = 0; - j = 1; - while (j <= JMAX && term > EPS * tom) { - ti = j; - term = PI2 * ti * ti * exp (-ti * ti * w); - tom += term; - j++; - } - sum -= tom * C2 / (n * 18.0 * z * z2); - - term = 1; - tom = 0; - j = 0; - while (j <= JMAX && fabs (term) > EPS * fabs (tom)) { - ti = j + 0.5; - ti = ti * ti; - term = -30 * z6 - 90 * z6 * z2 + PI2 * (135 * z4 - 96 * z6) * ti + - PI4 * (212 * z4 - 60 * z2) * ti * ti + PI2 * PI4 * ti * ti * ti * (5 - - 30 * z2); - term *= exp (-ti * w); - tom += term; - j++; - } - sum += tom * C2 / (RACN * n * 3240.0 * z4 * z6); - - term = 1; - tom = 0; - j = 1; - while (j <= JMAX && fabs (term) > EPS * fabs (tom)) { - ti = j * j; - term = (3 * PI2 * ti * z2 - PI4 * ti * ti) * exp (-ti * w); - tom += term; - j++; - } - sum += tom * C2 / (RACN * n * 108.0 * z6); - - return sum; -} - - -/*=========================================================================*/ - -static void CalcFloorCeil ( - int n, /* sample size */ - double t, /* = nx */ - double *A, /* A_i */ - double *Atflo, /* floor (A_i - t) */ - double *Atcei /* ceiling (A_i + t) */ - ) -{ - /* Precompute A_i, floors, and ceilings for limits of sums in the Pomeranz - algorithm */ - int i; - int ell = (int) t; /* floor (t) */ - double z = t - ell; /* t - floor (t) */ - double w = ceil (t) - t; - - if (z > 0.5) { - for (i = 2; i <= 2 * n + 2; i += 2) - Atflo[i] = i / 2 - 2 - ell; - for (i = 1; i <= 2 * n + 2; i += 2) - Atflo[i] = i / 2 - 1 - ell; - - for (i = 2; i <= 2 * n + 2; i += 2) - Atcei[i] = i / 2 + ell; - for (i = 1; i <= 2 * n + 2; i += 2) - Atcei[i] = i / 2 + 1 + ell; - - } else if (z > 0.0) { - for (i = 1; i <= 2 * n + 2; i++) - Atflo[i] = i / 2 - 1 - ell; - - for (i = 2; i <= 2 * n + 2; i++) - Atcei[i] = i / 2 + ell; - Atcei[1] = 1 + ell; - - } else { /* z == 0 */ - for (i = 2; i <= 2 * n + 2; i += 2) - Atflo[i] = i / 2 - 1 - ell; - for (i = 1; i <= 2 * n + 2; i += 2) - Atflo[i] = i / 2 - ell; - - for (i = 2; i <= 2 * n + 2; i += 2) - Atcei[i] = i / 2 - 1 + ell; - for (i = 1; i <= 2 * n + 2; i += 2) - Atcei[i] = i / 2 + ell; - } - - if (w < z) - z = w; - A[0] = A[1] = 0; - A[2] = z; - A[3] = 1 - A[2]; - for (i = 4; i <= 2 * n + 1; i++) - A[i] = A[i - 2] + 1; - A[2 * n + 2] = n; -} - - -/*========================================================================*/ - -static double Pomeranz (int n, double x) -{ - /* The Pomeranz algorithm to compute the KS distribution */ - const double EPS = 1.0e-15; - const int ENO = 350; - const double RENO = ldexp (1.0, ENO); /* for renormalization of V */ - int coreno; /* counter: how many renormalizations */ - const double t = n * x; - double w, sum, minsum; - int i, j, k, s; - int r1, r2; /* Indices i and i-1 for V[i][] */ - int jlow, jup, klow, kup, kup0; - double *A; - double *Atflo; - double *Atcei; - double **V; - double **H; /* = pow(w, j) / Factorial(j) */ - - A = (double *) calloc ((size_t) (2 * n + 3), sizeof (double)); - Atflo = (double *) calloc ((size_t) (2 * n + 3), sizeof (double)); - Atcei = (double *) calloc ((size_t) (2 * n + 3), sizeof (double)); - V = (double **) CreateMatrixD (2, n + 2); - H = (double **) CreateMatrixD (4, n + 2); - - CalcFloorCeil (n, t, A, Atflo, Atcei); - - for (j = 1; j <= n + 1; j++) - V[0][j] = 0; - for (j = 2; j <= n + 1; j++) - V[1][j] = 0; - V[1][1] = RENO; - coreno = 1; - - /* Precompute H[][] = (A[j] - A[j-1]^k / k! for speed */ - H[0][0] = 1; - w = 2.0 * A[2] / n; - for (j = 1; j <= n + 1; j++) - H[0][j] = w * H[0][j - 1] / j; - - H[1][0] = 1; - w = (1.0 - 2.0 * A[2]) / n; - for (j = 1; j <= n + 1; j++) - H[1][j] = w * H[1][j - 1] / j; - - H[2][0] = 1; - w = A[2] / n; - for (j = 1; j <= n + 1; j++) - H[2][j] = w * H[2][j - 1] / j; - - H[3][0] = 1; - for (j = 1; j <= n + 1; j++) - H[3][j] = 0; - - r1 = 0; - r2 = 1; - for (i = 2; i <= 2 * n + 2; i++) { - jlow = 2 + (int) Atflo[i]; - if (jlow < 1) - jlow = 1; - jup = (int) Atcei[i]; - if (jup > n + 1) - jup = n + 1; - - klow = 2 + (int) Atflo[i - 1]; - if (klow < 1) - klow = 1; - kup0 = (int) Atcei[i - 1]; - - /* Find to which case it corresponds */ - w = (A[i] - A[i - 1]) / n; - s = -1; - for (j = 0; j < 4; j++) { - if (fabs (w - H[j][1]) <= EPS) { - s = j; - break; - } - } - /* assert (s >= 0, "Pomeranz: s < 0"); */ - - minsum = RENO; - r1 = (r1 + 1) & 1; /* i - 1 */ - r2 = (r2 + 1) & 1; /* i */ - - for (j = jlow; j <= jup; j++) { - kup = kup0; - if (kup > j) - kup = j; - sum = 0; - for (k = kup; k >= klow; k--) - sum += V[r1][k] * H[s][j - k]; - V[r2][j] = sum; - if (sum < minsum) - minsum = sum; - } - - if (minsum < 1.0e-280) { - /* V is too small: renormalize to avoid underflow of probabilities */ - for (j = jlow; j <= jup; j++) - V[r2][j] *= RENO; - coreno++; /* keep track of log of RENO */ - } - } - - sum = V[r2][n + 1]; - free (A); - free (Atflo); - free (Atcei); - DeleteMatrixD (H); - DeleteMatrixD (V); - w = getLogFactorial (n) - coreno * ENO * num_Ln2 + log (sum); - if (w >= 0.) - return 1.; - return exp (w); -} - - -/*========================================================================*/ - -static double cdfSpecial (int n, double x) -{ - /* The KS distribution is known exactly for these cases */ - - /* For nx^2 > 18, KSfbar(n, x) is smaller than 5e-16 */ - if ((n * x * x >= 18.0) || (x >= 1.0)) - return 1.0; - - if (x <= 0.5 / n) - return 0.0; - - if (n == 1) - return 2.0 * x - 1.0; - - if (x <= 1.0 / n) { - double t = 2.0 * x * n - 1.0; - double w; - if (n <= NEXACT) { - w = rapfac (n); - return w * pow (t, (double) n); - } - w = getLogFactorial (n) + n * log (t / n); - return exp (w); - } - - if (x >= 1.0 - 1.0 / n) { - return 1.0 - 2.0 * pow (1.0 - x, (double) n); - } - - return -1.0; -} - - -/*========================================================================*/ - -double KScdf (int n, double x) -{ - const double w = n * x * x; - double u = cdfSpecial (n, x); - if (u >= 0.0) - return u; - - if (n <= NEXACT) { - if (w < 0.754693) - return DurbinMatrix (n, x); - if (w < 4.0) - return Pomeranz (n, x); - return 1.0 - KSfbar (n, x); - } - - if ((w * x * n <= 7.0) && (n <= NKOLMO)) - return DurbinMatrix (n, x); - - return Pelz (n, x); -} - - -/*=========================================================================*/ - -static double fbarSpecial (int n, double x) -{ - const double w = n * x * x; - - if ((w >= 370.0) || (x >= 1.0)) - return 0.0; - if ((w <= 0.0274) || (x <= 0.5 / n)) - return 1.0; - if (n == 1) - return 2.0 - 2.0 * x; - - if (x <= 1.0 / n) { - double z; - double t = 2.0 * x * n - 1.0; - if (n <= NEXACT) { - z = rapfac (n); - return 1.0 - z * pow (t, (double) n); - } - z = getLogFactorial (n) + n * log (t / n); - return 1.0 - exp (z); - } - - if (x >= 1.0 - 1.0 / n) { - return 2.0 * pow (1.0 - x, (double) n); - } - return -1.0; -} - - -/*========================================================================*/ - -double KSfbar (int n, double x) -{ - const double w = n * x * x; - double v = fbarSpecial (n, x); - if (v >= 0.0) - return v; - - if (n <= NEXACT) { - if (w < 4.0) - return 1.0 - KScdf (n, x); - else - return 2.0 * KSPlusbarUpper (n, x); - } - - if (w >= 2.65) - return 2.0 * KSPlusbarUpper (n, x); - - return 1.0 - KScdf (n, x); -} - - -/*========================================================================= - -The following implements the Durbin matrix algorithm and was programmed by -G. Marsaglia, Wai Wan Tsang and Jingbo Wong. - -I have made small modifications in their program. (Richard Simard) - - - -=========================================================================*/ - -/* - The C program to compute Kolmogorov's distribution - - K(n,d) = Prob(D_n < d), where - - D_n = max(x_1-0/n,x_2-1/n...,x_n-(n-1)/n,1/n-x_1,2/n-x_2,...,n/n-x_n) - - with x_1<x_2,...<x_n a purported set of n independent uniform [0,1) - random variables sorted into increasing order. - See G. Marsaglia, Wai Wan Tsang and Jingbo Wong, - J.Stat.Software, 8, 18, pp 1--4, (2003). -*/ - -#define NORM 1.0e140 -#define INORM 1.0e-140 -#define LOGNORM 140 - - -/* Matrix product */ -static void mMultiply (double *A, double *B, double *C, int m); - -/* Matrix power */ -static void mPower (double *A, int eA, double *V, int *eV, int m, int n); - - -static double DurbinMatrix (int n, double d) -{ - int k, m, i, j, g, eH, eQ; - double h, s, *H, *Q; - /* OMIT NEXT TWO LINES IF YOU REQUIRE >7 DIGIT ACCURACY IN THE RIGHT TAIL */ -#if 0 - s = d * d * n; - if (s > 7.24 || (s > 3.76 && n > 99)) - return 1 - 2 * exp (-(2.000071 + .331 / sqrt (n) + 1.409 / n) * s); -#endif - k = (int) (n * d) + 1; - m = 2 * k - 1; - h = k - n * d; - H = (double *) malloc ((m * m) * sizeof (double)); - Q = (double *) malloc ((m * m) * sizeof (double)); - for (i = 0; i < m; i++) - for (j = 0; j < m; j++) - if (i - j + 1 < 0) - H[i * m + j] = 0; - else - H[i * m + j] = 1; - for (i = 0; i < m; i++) { - H[i * m] -= pow (h, (double) (i + 1)); - H[(m - 1) * m + i] -= pow (h, (double) (m - i)); - } - H[(m - 1) * m] += (2 * h - 1 > 0 ? pow (2 * h - 1, (double) m) : 0); - for (i = 0; i < m; i++) - for (j = 0; j < m; j++) - if (i - j + 1 > 0) - for (g = 1; g <= i - j + 1; g++) - H[i * m + j] /= g; - eH = 0; - mPower (H, eH, Q, &eQ, m, n); - s = Q[(k - 1) * m + k - 1]; - - for (i = 1; i <= n; i++) { - s = s * (double) i / n; - if (s < INORM) { - s *= NORM; - eQ -= LOGNORM; - } - } - s *= pow (10., (double) eQ); - free (H); - free (Q); - return s; -} - - -static void mMultiply (double *A, double *B, double *C, int m) -{ - int i, j, k; - double s; - for (i = 0; i < m; i++) - for (j = 0; j < m; j++) { - s = 0.; - for (k = 0; k < m; k++) - s += A[i * m + k] * B[k * m + j]; - C[i * m + j] = s; - } -} - - -static void renormalize (double *V, int m, int *p) -{ - int i; - for (i = 0; i < m * m; i++) - V[i] *= INORM; - *p += LOGNORM; -} - - -static void mPower (double *A, int eA, double *V, int *eV, int m, int n) -{ - double *B; - int eB, i; - if (n == 1) { - for (i = 0; i < m * m; i++) - V[i] = A[i]; - *eV = eA; - return; - } - mPower (A, eA, V, eV, m, n / 2); - B = (double *) malloc ((m * m) * sizeof (double)); - mMultiply (V, V, B, m); - eB = 2 * (*eV); - if (B[(m / 2) * m + (m / 2)] > NORM) - renormalize (B, m, &eB); - - if (n % 2 == 0) { - for (i = 0; i < m * m; i++) - V[i] = B[i]; - *eV = eB; - } else { - mMultiply (A, B, V, m); - *eV = eA + eB; - } - - if (V[(m / 2) * m + (m / 2)] > NORM) - renormalize (V, m, eV); - free (B); -} |