summaryrefslogtreecommitdiffstats
path: root/database
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2022-06-09 04:52:39 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2022-06-09 04:52:39 +0000
commit89f3604407aff8f4cb2ed958252c61e23c767e24 (patch)
tree7fbf408102cab051557d38193524d8c6e991d070 /database
parentAdding upstream version 1.34.1. (diff)
downloadnetdata-upstream/1.35.0.tar.xz
netdata-upstream/1.35.0.zip
Adding upstream version 1.35.0.upstream/1.35.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'database')
-rw-r--r--database/KolmogorovSmirnovDist.c788
-rw-r--r--database/KolmogorovSmirnovDist.h91
-rw-r--r--database/engine/journalfile.c1
-rw-r--r--database/engine/metadata_log/logfile.c20
-rw-r--r--database/engine/pagecache.c16
-rw-r--r--database/engine/pagecache.h1
-rw-r--r--database/engine/rrdengine.c53
-rw-r--r--database/engine/rrdengine.h25
-rwxr-xr-xdatabase/engine/rrdengineapi.c202
-rw-r--r--database/metric_correlations.c300
-rw-r--r--database/metric_correlations.h11
-rw-r--r--database/ram/rrddim_mem.c67
-rw-r--r--database/ram/rrddim_mem.h29
-rw-r--r--database/rrd.c27
-rw-r--r--database/rrd.h162
-rw-r--r--database/rrdcalc.c157
-rw-r--r--database/rrdcalc.h1
-rw-r--r--database/rrddim.c224
-rw-r--r--database/rrdhost.c32
-rw-r--r--database/rrdset.c118
-rw-r--r--database/sqlite/sqlite_aclk.c118
-rw-r--r--database/sqlite/sqlite_aclk.h15
-rw-r--r--database/sqlite/sqlite_aclk_alert.c172
-rw-r--r--database/sqlite/sqlite_aclk_alert.h1
-rw-r--r--database/sqlite/sqlite_aclk_chart.c315
-rw-r--r--database/sqlite/sqlite_aclk_chart.h15
-rw-r--r--database/sqlite/sqlite_aclk_node.c21
-rw-r--r--database/sqlite/sqlite_aclk_node.h1
-rw-r--r--database/sqlite/sqlite_functions.c123
-rw-r--r--database/sqlite/sqlite_functions.h6
-rw-r--r--database/sqlite/sqlite_health.c164
-rw-r--r--database/storage_engine.c120
-rw-r--r--database/storage_engine.h30
33 files changed, 2727 insertions, 699 deletions
diff --git a/database/KolmogorovSmirnovDist.c b/database/KolmogorovSmirnovDist.c
new file mode 100644
index 000000000..1486abc7b
--- /dev/null
+++ b/database/KolmogorovSmirnovDist.c
@@ -0,0 +1,788 @@
+// SPDX-License-Identifier: GPL-3.0
+
+/********************************************************************
+ *
+ * File: KolmogorovSmirnovDist.c
+ * Environment: ISO C99 or ANSI C89
+ * Author: Richard Simard
+ * Organization: DIRO, Université de Montréal
+ * Date: 1 February 2012
+ * Version 1.1
+
+ * Copyright 1 march 2010 by Université de Montréal,
+ Richard Simard and Pierre L'Ecuyer
+ =====================================================================
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, version 3 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+ =====================================================================*/
+
+#include "KolmogorovSmirnovDist.h"
+#include <math.h>
+#include <stdlib.h>
+
+#define num_Pi 3.14159265358979323846 /* PI */
+#define num_Ln2 0.69314718055994530941 /* log(2) */
+
+/* For x close to 0 or 1, we use the exact formulae of Ruben-Gambino in all
+ cases. For n <= NEXACT, we use exact algorithms: the Durbin matrix and
+ the Pomeranz algorithms. For n > NEXACT, we use asymptotic methods
+ except for x close to 0 where we still use the method of Durbin
+ for n <= NKOLMO. For n > NKOLMO, we use asymptotic methods only and
+ so the precision is less for x close to 0.
+ We could increase the limit NKOLMO to 10^6 to get better precision
+ for x close to 0, but at the price of a slower speed. */
+#define NEXACT 500
+#define NKOLMO 100000
+
+/* The Durbin matrix algorithm for the Kolmogorov-Smirnov distribution */
+static double DurbinMatrix (int n, double d);
+
+
+/*========================================================================*/
+#if 0
+
+/* For ANSI C89 only, not for ISO C99 */
+#define MAXI 50
+#define EPSILON 1.0e-15
+
+double log1p (double x)
+{
+ /* returns a value equivalent to log(1 + x) accurate also for small x. */
+ if (fabs (x) > 0.1) {
+ return log (1.0 + x);
+ } else {
+ double term = x;
+ double sum = x;
+ int s = 2;
+ while ((fabs (term) > EPSILON * fabs (sum)) && (s < MAXI)) {
+ term *= -x;
+ sum += term / s;
+ s++;
+ }
+ return sum;
+ }
+}
+
+#undef MAXI
+#undef EPSILON
+
+#endif
+
+/*========================================================================*/
+#define MFACT 30
+
+/* The natural logarithm of factorial n! for 0 <= n <= MFACT */
+static double LnFactorial[MFACT + 1] = {
+ 0.,
+ 0.,
+ 0.6931471805599453,
+ 1.791759469228055,
+ 3.178053830347946,
+ 4.787491742782046,
+ 6.579251212010101,
+ 8.525161361065415,
+ 10.60460290274525,
+ 12.80182748008147,
+ 15.10441257307552,
+ 17.50230784587389,
+ 19.98721449566188,
+ 22.55216385312342,
+ 25.19122118273868,
+ 27.89927138384088,
+ 30.67186010608066,
+ 33.50507345013688,
+ 36.39544520803305,
+ 39.33988418719949,
+ 42.33561646075348,
+ 45.3801388984769,
+ 48.47118135183522,
+ 51.60667556776437,
+ 54.7847293981123,
+ 58.00360522298051,
+ 61.26170176100199,
+ 64.55753862700632,
+ 67.88974313718154,
+ 71.257038967168,
+ 74.65823634883016
+};
+
+/*------------------------------------------------------------------------*/
+
+static double getLogFactorial (int n)
+{
+ /* Returns the natural logarithm of factorial n! */
+ if (n <= MFACT) {
+ return LnFactorial[n];
+
+ } else {
+ double x = (double) (n + 1);
+ double y = 1.0 / (x * x);
+ double z = ((-(5.95238095238E-4 * y) + 7.936500793651E-4) * y -
+ 2.7777777777778E-3) * y + 8.3333333333333E-2;
+ z = ((x - 0.5) * log (x) - x) + 9.1893853320467E-1 + z / x;
+ return z;
+ }
+}
+
+/*------------------------------------------------------------------------*/
+
+static double rapfac (int n)
+{
+ /* Computes n! / n^n */
+ int i;
+ double res = 1.0 / n;
+ for (i = 2; i <= n; i++) {
+ res *= (double) i / n;
+ }
+ return res;
+}
+
+
+/*========================================================================*/
+
+static double **CreateMatrixD (int N, int M)
+{
+ int i;
+ double **T2;
+
+ T2 = (double **) malloc (N * sizeof (double *));
+ T2[0] = (double *) malloc ((size_t) N * M * sizeof (double));
+ for (i = 1; i < N; i++)
+ T2[i] = T2[0] + i * M;
+ return T2;
+}
+
+
+static void DeleteMatrixD (double **T)
+{
+ free (T[0]);
+ free (T);
+}
+
+
+/*========================================================================*/
+
+static double KSPlusbarAsymp (int n, double x)
+{
+ /* Compute the probability of the KS+ distribution using an asymptotic
+ formula */
+ double t = (6.0 * n * x + 1);
+ double z = t * t / (18.0 * n);
+ double v = 1.0 - (2.0 * z * z - 4.0 * z - 1.0) / (18.0 * n);
+ if (v <= 0.0)
+ return 0.0;
+ v = v * exp (-z);
+ if (v >= 1.0)
+ return 1.0;
+ return v;
+}
+
+
+/*-------------------------------------------------------------------------*/
+
+static double KSPlusbarUpper (int n, double x)
+{
+ /* Compute the probability of the KS+ distribution in the upper tail using
+ Smirnov's stable formula */
+ const double EPSILON = 1.0E-12;
+ double q;
+ double Sum = 0.0;
+ double term;
+ double t;
+ double LogCom;
+ double LOGJMAX;
+ int j;
+ int jdiv;
+ int jmax = (int) (n * (1.0 - x));
+
+ if (n > 200000)
+ return KSPlusbarAsymp (n, x);
+
+ /* Avoid log(0) for j = jmax and q ~ 1.0 */
+ if ((1.0 - x - (double) jmax / n) <= 0.0)
+ jmax--;
+
+ if (n > 3000)
+ jdiv = 2;
+ else
+ jdiv = 3;
+
+ j = jmax / jdiv + 1;
+ LogCom = getLogFactorial (n) - getLogFactorial (j) -
+ getLogFactorial (n - j);
+ LOGJMAX = LogCom;
+
+ while (j <= jmax) {
+ q = (double) j / n + x;
+ term = LogCom + (j - 1) * log (q) + (n - j) * log1p (-q);
+ t = exp (term);
+ Sum += t;
+ LogCom += log ((double) (n - j) / (j + 1));
+ if (t <= Sum * EPSILON)
+ break;
+ j++;
+ }
+
+ j = jmax / jdiv;
+ LogCom = LOGJMAX + log ((double) (j + 1) / (n - j));
+
+ while (j > 0) {
+ q = (double) j / n + x;
+ term = LogCom + (j - 1) * log (q) + (n - j) * log1p (-q);
+ t = exp (term);
+ Sum += t;
+ LogCom += log ((double) j / (n - j + 1));
+ if (t <= Sum * EPSILON)
+ break;
+ j--;
+ }
+
+ Sum *= x;
+ /* add the term j = 0 */
+ Sum += exp (n * log1p (-x));
+ return Sum;
+}
+
+
+/*========================================================================*/
+
+static double Pelz (int n, double x)
+{
+ /* Approximating the Lower Tail-Areas of the Kolmogorov-Smirnov One-Sample
+ Statistic,
+ Wolfgang Pelz and I. J. Good,
+ Journal of the Royal Statistical Society, Series B.
+ Vol. 38, No. 2 (1976), pp. 152-156
+ */
+
+ const int JMAX = 20;
+ const double EPS = 1.0e-10;
+ const double C = 2.506628274631001; /* sqrt(2*Pi) */
+ const double C2 = 1.2533141373155001; /* sqrt(Pi/2) */
+ const double PI2 = num_Pi * num_Pi;
+ const double PI4 = PI2 * PI2;
+ const double RACN = sqrt ((double) n);
+ const double z = RACN * x;
+ const double z2 = z * z;
+ const double z4 = z2 * z2;
+ const double z6 = z4 * z2;
+ const double w = PI2 / (2.0 * z * z);
+ double ti, term, tom;
+ double sum;
+ int j;
+
+ term = 1;
+ j = 0;
+ sum = 0;
+ while (j <= JMAX && term > EPS * sum) {
+ ti = j + 0.5;
+ term = exp (-ti * ti * w);
+ sum += term;
+ j++;
+ }
+ sum *= C / z;
+
+ term = 1;
+ tom = 0;
+ j = 0;
+ while (j <= JMAX && fabs (term) > EPS * fabs (tom)) {
+ ti = j + 0.5;
+ term = (PI2 * ti * ti - z2) * exp (-ti * ti * w);
+ tom += term;
+ j++;
+ }
+ sum += tom * C2 / (RACN * 3.0 * z4);
+
+ term = 1;
+ tom = 0;
+ j = 0;
+ while (j <= JMAX && fabs (term) > EPS * fabs (tom)) {
+ ti = j + 0.5;
+ term = 6 * z6 + 2 * z4 + PI2 * (2 * z4 - 5 * z2) * ti * ti +
+ PI4 * (1 - 2 * z2) * ti * ti * ti * ti;
+ term *= exp (-ti * ti * w);
+ tom += term;
+ j++;
+ }
+ sum += tom * C2 / (n * 36.0 * z * z6);
+
+ term = 1;
+ tom = 0;
+ j = 1;
+ while (j <= JMAX && term > EPS * tom) {
+ ti = j;
+ term = PI2 * ti * ti * exp (-ti * ti * w);
+ tom += term;
+ j++;
+ }
+ sum -= tom * C2 / (n * 18.0 * z * z2);
+
+ term = 1;
+ tom = 0;
+ j = 0;
+ while (j <= JMAX && fabs (term) > EPS * fabs (tom)) {
+ ti = j + 0.5;
+ ti = ti * ti;
+ term = -30 * z6 - 90 * z6 * z2 + PI2 * (135 * z4 - 96 * z6) * ti +
+ PI4 * (212 * z4 - 60 * z2) * ti * ti + PI2 * PI4 * ti * ti * ti * (5 -
+ 30 * z2);
+ term *= exp (-ti * w);
+ tom += term;
+ j++;
+ }
+ sum += tom * C2 / (RACN * n * 3240.0 * z4 * z6);
+
+ term = 1;
+ tom = 0;
+ j = 1;
+ while (j <= JMAX && fabs (term) > EPS * fabs (tom)) {
+ ti = j * j;
+ term = (3 * PI2 * ti * z2 - PI4 * ti * ti) * exp (-ti * w);
+ tom += term;
+ j++;
+ }
+ sum += tom * C2 / (RACN * n * 108.0 * z6);
+
+ return sum;
+}
+
+
+/*=========================================================================*/
+
+static void CalcFloorCeil (
+ int n, /* sample size */
+ double t, /* = nx */
+ double *A, /* A_i */
+ double *Atflo, /* floor (A_i - t) */
+ double *Atcei /* ceiling (A_i + t) */
+ )
+{
+ /* Precompute A_i, floors, and ceilings for limits of sums in the Pomeranz
+ algorithm */
+ int i;
+ int ell = (int) t; /* floor (t) */
+ double z = t - ell; /* t - floor (t) */
+ double w = ceil (t) - t;
+
+ if (z > 0.5) {
+ for (i = 2; i <= 2 * n + 2; i += 2)
+ Atflo[i] = i / 2 - 2 - ell;
+ for (i = 1; i <= 2 * n + 2; i += 2)
+ Atflo[i] = i / 2 - 1 - ell;
+
+ for (i = 2; i <= 2 * n + 2; i += 2)
+ Atcei[i] = i / 2 + ell;
+ for (i = 1; i <= 2 * n + 2; i += 2)
+ Atcei[i] = i / 2 + 1 + ell;
+
+ } else if (z > 0.0) {
+ for (i = 1; i <= 2 * n + 2; i++)
+ Atflo[i] = i / 2 - 1 - ell;
+
+ for (i = 2; i <= 2 * n + 2; i++)
+ Atcei[i] = i / 2 + ell;
+ Atcei[1] = 1 + ell;
+
+ } else { /* z == 0 */
+ for (i = 2; i <= 2 * n + 2; i += 2)
+ Atflo[i] = i / 2 - 1 - ell;
+ for (i = 1; i <= 2 * n + 2; i += 2)
+ Atflo[i] = i / 2 - ell;
+
+ for (i = 2; i <= 2 * n + 2; i += 2)
+ Atcei[i] = i / 2 - 1 + ell;
+ for (i = 1; i <= 2 * n + 2; i += 2)
+ Atcei[i] = i / 2 + ell;
+ }
+
+ if (w < z)
+ z = w;
+ A[0] = A[1] = 0;
+ A[2] = z;
+ A[3] = 1 - A[2];
+ for (i = 4; i <= 2 * n + 1; i++)
+ A[i] = A[i - 2] + 1;
+ A[2 * n + 2] = n;
+}
+
+
+/*========================================================================*/
+
+static double Pomeranz (int n, double x)
+{
+ /* The Pomeranz algorithm to compute the KS distribution */
+ const double EPS = 1.0e-15;
+ const int ENO = 350;
+ const double RENO = ldexp (1.0, ENO); /* for renormalization of V */
+ int coreno; /* counter: how many renormalizations */
+ const double t = n * x;
+ double w, sum, minsum;
+ int i, j, k, s;
+ int r1, r2; /* Indices i and i-1 for V[i][] */
+ int jlow, jup, klow, kup, kup0;
+ double *A;
+ double *Atflo;
+ double *Atcei;
+ double **V;
+ double **H; /* = pow(w, j) / Factorial(j) */
+
+ A = (double *) calloc ((size_t) (2 * n + 3), sizeof (double));
+ Atflo = (double *) calloc ((size_t) (2 * n + 3), sizeof (double));
+ Atcei = (double *) calloc ((size_t) (2 * n + 3), sizeof (double));
+ V = (double **) CreateMatrixD (2, n + 2);
+ H = (double **) CreateMatrixD (4, n + 2);
+
+ CalcFloorCeil (n, t, A, Atflo, Atcei);
+
+ for (j = 1; j <= n + 1; j++)
+ V[0][j] = 0;
+ for (j = 2; j <= n + 1; j++)
+ V[1][j] = 0;
+ V[1][1] = RENO;
+ coreno = 1;
+
+ /* Precompute H[][] = (A[j] - A[j-1]^k / k! for speed */
+ H[0][0] = 1;
+ w = 2.0 * A[2] / n;
+ for (j = 1; j <= n + 1; j++)
+ H[0][j] = w * H[0][j - 1] / j;
+
+ H[1][0] = 1;
+ w = (1.0 - 2.0 * A[2]) / n;
+ for (j = 1; j <= n + 1; j++)
+ H[1][j] = w * H[1][j - 1] / j;
+
+ H[2][0] = 1;
+ w = A[2] / n;
+ for (j = 1; j <= n + 1; j++)
+ H[2][j] = w * H[2][j - 1] / j;
+
+ H[3][0] = 1;
+ for (j = 1; j <= n + 1; j++)
+ H[3][j] = 0;
+
+ r1 = 0;
+ r2 = 1;
+ for (i = 2; i <= 2 * n + 2; i++) {
+ jlow = 2 + (int) Atflo[i];
+ if (jlow < 1)
+ jlow = 1;
+ jup = (int) Atcei[i];
+ if (jup > n + 1)
+ jup = n + 1;
+
+ klow = 2 + (int) Atflo[i - 1];
+ if (klow < 1)
+ klow = 1;
+ kup0 = (int) Atcei[i - 1];
+
+ /* Find to which case it corresponds */
+ w = (A[i] - A[i - 1]) / n;
+ s = -1;
+ for (j = 0; j < 4; j++) {
+ if (fabs (w - H[j][1]) <= EPS) {
+ s = j;
+ break;
+ }
+ }
+ /* assert (s >= 0, "Pomeranz: s < 0"); */
+
+ minsum = RENO;
+ r1 = (r1 + 1) & 1; /* i - 1 */
+ r2 = (r2 + 1) & 1; /* i */
+
+ for (j = jlow; j <= jup; j++) {
+ kup = kup0;
+ if (kup > j)
+ kup = j;
+ sum = 0;
+ for (k = kup; k >= klow; k--)
+ sum += V[r1][k] * H[s][j - k];
+ V[r2][j] = sum;
+ if (sum < minsum)
+ minsum = sum;
+ }
+
+ if (minsum < 1.0e-280) {
+ /* V is too small: renormalize to avoid underflow of probabilities */
+ for (j = jlow; j <= jup; j++)
+ V[r2][j] *= RENO;
+ coreno++; /* keep track of log of RENO */
+ }
+ }
+
+ sum = V[r2][n + 1];
+ free (A);
+ free (Atflo);
+ free (Atcei);
+ DeleteMatrixD (H);
+ DeleteMatrixD (V);
+ w = getLogFactorial (n) - coreno * ENO * num_Ln2 + log (sum);
+ if (w >= 0.)
+ return 1.;
+ return exp (w);
+}
+
+
+/*========================================================================*/
+
+static double cdfSpecial (int n, double x)
+{
+ /* The KS distribution is known exactly for these cases */
+
+ /* For nx^2 > 18, KSfbar(n, x) is smaller than 5e-16 */
+ if ((n * x * x >= 18.0) || (x >= 1.0))
+ return 1.0;
+
+ if (x <= 0.5 / n)
+ return 0.0;
+
+ if (n == 1)
+ return 2.0 * x - 1.0;
+
+ if (x <= 1.0 / n) {
+ double t = 2.0 * x * n - 1.0;
+ double w;
+ if (n <= NEXACT) {
+ w = rapfac (n);
+ return w * pow (t, (double) n);
+ }
+ w = getLogFactorial (n) + n * log (t / n);
+ return exp (w);
+ }
+
+ if (x >= 1.0 - 1.0 / n) {
+ return 1.0 - 2.0 * pow (1.0 - x, (double) n);
+ }
+
+ return -1.0;
+}
+
+
+/*========================================================================*/
+
+double KScdf (int n, double x)
+{
+ const double w = n * x * x;
+ double u = cdfSpecial (n, x);
+ if (u >= 0.0)
+ return u;
+
+ if (n <= NEXACT) {
+ if (w < 0.754693)
+ return DurbinMatrix (n, x);
+ if (w < 4.0)
+ return Pomeranz (n, x);
+ return 1.0 - KSfbar (n, x);
+ }
+
+ if ((w * x * n <= 7.0) && (n <= NKOLMO))
+ return DurbinMatrix (n, x);
+
+ return Pelz (n, x);
+}
+
+
+/*=========================================================================*/
+
+static double fbarSpecial (int n, double x)
+{
+ const double w = n * x * x;
+
+ if ((w >= 370.0) || (x >= 1.0))
+ return 0.0;
+ if ((w <= 0.0274) || (x <= 0.5 / n))
+ return 1.0;
+ if (n == 1)
+ return 2.0 - 2.0 * x;
+
+ if (x <= 1.0 / n) {
+ double z;
+ double t = 2.0 * x * n - 1.0;
+ if (n <= NEXACT) {
+ z = rapfac (n);
+ return 1.0 - z * pow (t, (double) n);
+ }
+ z = getLogFactorial (n) + n * log (t / n);
+ return 1.0 - exp (z);
+ }
+
+ if (x >= 1.0 - 1.0 / n) {
+ return 2.0 * pow (1.0 - x, (double) n);
+ }
+ return -1.0;
+}
+
+
+/*========================================================================*/
+
+double KSfbar (int n, double x)
+{
+ const double w = n * x * x;
+ double v = fbarSpecial (n, x);
+ if (v >= 0.0)
+ return v;
+
+ if (n <= NEXACT) {
+ if (w < 4.0)
+ return 1.0 - KScdf (n, x);
+ else
+ return 2.0 * KSPlusbarUpper (n, x);
+ }
+
+ if (w >= 2.65)
+ return 2.0 * KSPlusbarUpper (n, x);
+
+ return 1.0 - KScdf (n, x);
+}
+
+
+/*=========================================================================
+
+The following implements the Durbin matrix algorithm and was programmed by
+G. Marsaglia, Wai Wan Tsang and Jingbo Wong.
+
+I have made small modifications in their program. (Richard Simard)
+
+
+
+=========================================================================*/
+
+/*
+ The C program to compute Kolmogorov's distribution
+
+ K(n,d) = Prob(D_n < d), where
+
+ D_n = max(x_1-0/n,x_2-1/n...,x_n-(n-1)/n,1/n-x_1,2/n-x_2,...,n/n-x_n)
+
+ with x_1<x_2,...<x_n a purported set of n independent uniform [0,1)
+ random variables sorted into increasing order.
+ See G. Marsaglia, Wai Wan Tsang and Jingbo Wong,
+ J.Stat.Software, 8, 18, pp 1--4, (2003).
+*/
+
+#define NORM 1.0e140
+#define INORM 1.0e-140
+#define LOGNORM 140
+
+
+/* Matrix product */
+static void mMultiply (double *A, double *B, double *C, int m);
+
+/* Matrix power */
+static void mPower (double *A, int eA, double *V, int *eV, int m, int n);
+
+
+static double DurbinMatrix (int n, double d)
+{
+ int k, m, i, j, g, eH, eQ;
+ double h, s, *H, *Q;
+ /* OMIT NEXT TWO LINES IF YOU REQUIRE >7 DIGIT ACCURACY IN THE RIGHT TAIL */
+#if 0
+ s = d * d * n;
+ if (s > 7.24 || (s > 3.76 && n > 99))
+ return 1 - 2 * exp (-(2.000071 + .331 / sqrt (n) + 1.409 / n) * s);
+#endif
+ k = (int) (n * d) + 1;
+ m = 2 * k - 1;
+ h = k - n * d;
+ H = (double *) malloc ((m * m) * sizeof (double));
+ Q = (double *) malloc ((m * m) * sizeof (double));
+ for (i = 0; i < m; i++)
+ for (j = 0; j < m; j++)
+ if (i - j + 1 < 0)
+ H[i * m + j] = 0;
+ else
+ H[i * m + j] = 1;
+ for (i = 0; i < m; i++) {
+ H[i * m] -= pow (h, (double) (i + 1));
+ H[(m - 1) * m + i] -= pow (h, (double) (m - i));
+ }
+ H[(m - 1) * m] += (2 * h - 1 > 0 ? pow (2 * h - 1, (double) m) : 0);
+ for (i = 0; i < m; i++)
+ for (j = 0; j < m; j++)
+ if (i - j + 1 > 0)
+ for (g = 1; g <= i - j + 1; g++)
+ H[i * m + j] /= g;
+ eH = 0;
+ mPower (H, eH, Q, &eQ, m, n);
+ s = Q[(k - 1) * m + k - 1];
+
+ for (i = 1; i <= n; i++) {
+ s = s * (double) i / n;
+ if (s < INORM) {
+ s *= NORM;
+ eQ -= LOGNORM;
+ }
+ }
+ s *= pow (10., (double) eQ);
+ free (H);
+ free (Q);
+ return s;
+}
+
+
+static void mMultiply (double *A, double *B, double *C, int m)
+{
+ int i, j, k;
+ double s;
+ for (i = 0; i < m; i++)
+ for (j = 0; j < m; j++) {
+ s = 0.;
+ for (k = 0; k < m; k++)
+ s += A[i * m + k] * B[k * m + j];
+ C[i * m + j] = s;
+ }
+}
+
+
+static void renormalize (double *V, int m, int *p)
+{
+ int i;
+ for (i = 0; i < m * m; i++)
+ V[i] *= INORM;
+ *p += LOGNORM;
+}
+
+
+static void mPower (double *A, int eA, double *V, int *eV, int m, int n)
+{
+ double *B;
+ int eB, i;
+ if (n == 1) {
+ for (i = 0; i < m * m; i++)
+ V[i] = A[i];
+ *eV = eA;
+ return;
+ }
+ mPower (A, eA, V, eV, m, n / 2);
+ B = (double *) malloc ((m * m) * sizeof (double));
+ mMultiply (V, V, B, m);
+ eB = 2 * (*eV);
+ if (B[(m / 2) * m + (m / 2)] > NORM)
+ renormalize (B, m, &eB);
+
+ if (n % 2 == 0) {
+ for (i = 0; i < m * m; i++)
+ V[i] = B[i];
+ *eV = eB;
+ } else {
+ mMultiply (A, B, V, m);
+ *eV = eA + eB;
+ }
+
+ if (V[(m / 2) * m + (m / 2)] > NORM)
+ renormalize (V, m, eV);
+ free (B);
+}
diff --git a/database/KolmogorovSmirnovDist.h b/database/KolmogorovSmirnovDist.h
new file mode 100644
index 000000000..cf455042a
--- /dev/null
+++ b/database/KolmogorovSmirnovDist.h
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-3.0
+
+#ifndef KOLMOGOROVSMIRNOVDIST_H
+#define KOLMOGOROVSMIRNOVDIST_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/********************************************************************
+ *
+ * File: KolmogorovSmirnovDist.h
+ * Environment: ISO C99 or ANSI C89
+ * Author: Richard Simard
+ * Organization: DIRO, Université de Montréal
+ * Date: 1 February 2012
+ * Version 1.1
+ *
+ * Copyright March 2010 by Université de Montréal,
+ Richard Simard and Pierre L'Ecuyer
+ =====================================================================
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, version 3 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+ =====================================================================*/
+/*
+ *
+ * The Kolmogorov-Smirnov test statistic D_n is defined by
+ *
+ * D_n = sup_x |F(x) - S_n(x)|
+ *
+ * where n is the sample size, F(x) is a completely specified theoretical
+ * distribution, and S_n(x) is an empirical distribution function.
+ *
+ *
+ * The function
+ *
+ * double KScdf (int n, double x);
+ *
+ * computes the cumulative probability P[D_n <= x] of the 2-sided 1-sample
+ * Kolmogorov-Smirnov distribution with sample size n at x.
+ * It returns at least 13 decimal digits of precision for n <= 500,
+ * at least 7 decimal digits of precision for 500 < n <= 100000,
+ * and a few correct decimal digits for n > 100000.
+ *
+ */
+
+double KScdf (int n, double x);
+
+
+/*
+ * The function
+ *
+ * double KSfbar (int n, double x);
+ *
+ * computes the complementary cumulative probability P[D_n >= x] of the
+ * 2-sided 1-sample Kolmogorov-Smirnov distribution with sample size n at x.
+ * It returns at least 10 decimal digits of precision for n <= 500,
+ * at least 6 decimal digits of precision for 500 < n <= 200000,
+ * and a few correct decimal digits for n > 200000.
+ *
+ */
+
+double KSfbar (int n, double x);
+
+
+/*
+ * NOTE:
+ * The ISO C99 function log1p of the standard math library does not exist in
+ * ANSI C89. Here, it is programmed explicitly in KolmogorovSmirnovDist.c.
+
+ * For ANSI C89 compilers, change the preprocessor condition to make it
+ * available.
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/database/engine/journalfile.c b/database/engine/journalfile.c
index 1541eb10a..0b3d3eeb8 100644
--- a/database/engine/journalfile.c
+++ b/database/engine/journalfile.c
@@ -84,6 +84,7 @@ void * wal_get_transaction_buffer(struct rrdengine_worker_config* wc, unsigned s
if (unlikely(ret)) {
fatal("posix_memalign:%s", strerror(ret));
}
+ memset(ctx->commit_log.buf, 0, buf_size);
buf_pos = ctx->commit_log.buf_pos = 0;
ctx->commit_log.buf_size = buf_size;
}
diff --git a/database/engine/metadata_log/logfile.c b/database/engine/metadata_log/logfile.c
index f5bd9b2d2..07eb9b6fe 100644
--- a/database/engine/metadata_log/logfile.c
+++ b/database/engine/metadata_log/logfile.c
@@ -375,19 +375,15 @@ static int scan_metalog_files(struct metalog_instance *ctx)
struct metalog_pluginsd_state metalog_parser_state;
metalog_pluginsd_state_init(&metalog_parser_state, ctx);
- PARSER_USER_OBJECT metalog_parser_object;
- metalog_parser_object.enabled = cd.enabled;
- metalog_parser_object.host = ctx->rrdeng_ctx->host;
- metalog_parser_object.cd = &cd;
- metalog_parser_object.trust_durations = 0;
- metalog_parser_object.private = &metalog_parser_state;
+ PARSER_USER_OBJECT metalog_parser_object = {
+ .enabled = cd.enabled,
+ .host = ctx->rrdeng_ctx->host,
+ .cd = &cd,
+ .trust_durations = 0,
+ .private = &metalog_parser_state
+ };
PARSER *parser = parser_init(metalog_parser_object.host, &metalog_parser_object, NULL, PARSER_INPUT_SPLIT);
- if (unlikely(!parser)) {
- error("Failed to initialize metadata log parser.");
- failed_to_load = matched_files;
- goto after_failed_to_parse;
- }
parser_add_keyword(parser, PLUGINSD_KEYWORD_HOST, metalog_pluginsd_host);
parser_add_keyword(parser, PLUGINSD_KEYWORD_GUID, pluginsd_guid);
parser_add_keyword(parser, PLUGINSD_KEYWORD_CONTEXT, pluginsd_context);
@@ -428,10 +424,8 @@ static int scan_metalog_files(struct metalog_instance *ctx)
size_t count __maybe_unused = metalog_parser_object.count;
debug(D_METADATALOG, "Parsing count=%u", (unsigned)count);
-after_failed_to_parse:
freez(metalogfiles);
-
return matched_files;
}
diff --git a/database/engine/pagecache.c b/database/engine/pagecache.c
index 40e24b321..cddbf9e1f 100644
--- a/database/engine/pagecache.c
+++ b/database/engine/pagecache.c
@@ -356,7 +356,7 @@ static void pg_cache_evict_unsafe(struct rrdengine_instance *ctx, struct rrdeng_
{
struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
- freez(pg_cache_descr->page);
+ dbengine_page_free(pg_cache_descr->page);
pg_cache_descr->page = NULL;
pg_cache_descr->flags &= ~RRD_PAGE_POPULATED;
pg_cache_release_pages_unsafe(ctx, 1);
@@ -437,7 +437,6 @@ uint8_t pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_d
ret = JudyLDel(&page_index->JudyL_array, (Word_t)(descr->start_time / USEC_PER_SEC), PJE0);
if (unlikely(0 == ret)) {
uv_rwlock_wrunlock(&page_index->lock);
- error("Page under deletion was not in index.");
if (unlikely(debug_flags & D_RRDENGINE)) {
print_page_descr(descr);
}
@@ -1067,10 +1066,13 @@ pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index
page_not_in_cache = 0;
uv_rwlock_rdlock(&page_index->lock);
+ int retry_count = 0;
while (1) {
descr = find_first_page_in_time_range(page_index, start_time, end_time);
- if (NULL == descr || 0 == descr->page_length) {
+ if (NULL == descr || 0 == descr->page_length || retry_count == MAX_PAGE_CACHE_RETRY_WAIT) {
/* non-empty page not found */
+ if (retry_count == MAX_PAGE_CACHE_RETRY_WAIT)
+ error_report("Page cache timeout while waiting for page %p : returning FAIL", descr);
uv_rwlock_rdunlock(&page_index->lock);
pg_cache_release_pages(ctx, 1);
@@ -1114,7 +1116,11 @@ pg_cache_lookup_next(struct rrdengine_instance *ctx, struct pg_cache_page_index
print_page_cache_descr(descr);
if (!(flags & RRD_PAGE_POPULATED))
page_not_in_cache = 1;
- pg_cache_wait_event_unsafe(descr);
+
+ if (pg_cache_timedwait_event_unsafe(descr, 1) == UV_ETIMEDOUT) {
+ error_report("Page cache timeout while waiting for page %p : retry count = %d", descr, retry_count);
+ ++retry_count;
+ }
rrdeng_page_descr_mutex_unlock(ctx, descr);
/* reset scan to find again */
@@ -1222,7 +1228,7 @@ void free_page_cache(struct rrdengine_instance *ctx)
/* Check rrdenglocking.c */
pg_cache_descr = descr->pg_cache_descr;
if (pg_cache_descr->flags & RRD_PAGE_POPULATED) {
- freez(pg_cache_descr->page);
+ dbengine_page_free(pg_cache_descr->page);
bytes_freed += RRDENG_BLOCK_SIZE;
}
rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr);
diff --git a/database/engine/pagecache.h b/database/engine/pagecache.h
index d5350ef56..0ba4639ce 100644
--- a/database/engine/pagecache.h
+++ b/database/engine/pagecache.h
@@ -11,6 +11,7 @@ struct extent_info;
struct rrdeng_page_descr;
#define INVALID_TIME (0)
+#define MAX_PAGE_CACHE_RETRY_WAIT (3)
/* Page flags */
#define RRD_PAGE_DIRTY (1LU << 0)
diff --git a/database/engine/rrdengine.c b/database/engine/rrdengine.c
index a975cfa6e..9f43f4456 100644
--- a/database/engine/rrdengine.c
+++ b/database/engine/rrdengine.c
@@ -11,8 +11,24 @@ rrdeng_stats_t global_flushing_pressure_page_deletions = 0;
static unsigned pages_per_extent = MAX_PAGES_PER_EXTENT;
+#if WORKER_UTILIZATION_MAX_JOB_TYPES < (RRDENG_MAX_OPCODE + 2)
+#error Please increase WORKER_UTILIZATION_MAX_JOB_TYPES to at least (RRDENG_MAX_OPCODE + 2)
+#endif
+
+void *dbengine_page_alloc() {
+ void *page = netdata_mmap(NULL, RRDENG_BLOCK_SIZE, MAP_PRIVATE, enable_ksm);
+ if(!page) fatal("Cannot allocate dbengine page cache page, with mmap()");
+ return page;
+}
+
+void dbengine_page_free(void *page) {
+ munmap(page, RRDENG_BLOCK_SIZE);
+}
+
static void sanity_check(void)
{
+ BUILD_BUG_ON(WORKER_UTILIZATION_MAX_JOB_TYPES < (RRDENG_MAX_OPCODE + 2));
+
/* Magic numbers must fit in the super-blocks */
BUILD_BUG_ON(strlen(RRDENG_DF_MAGIC) > RRDENG_MAGIC_SZ);
BUILD_BUG_ON(strlen(RRDENG_JF_MAGIC) > RRDENG_MAGIC_SZ);
@@ -176,7 +192,7 @@ void read_cached_extent_cb(struct rrdengine_worker_config* wc, unsigned idx, str
struct extent_info *extent = xt_io_descr->descr_array[0]->extent;
for (i = 0 ; i < xt_io_descr->descr_count; ++i) {
- page = mallocz(RRDENG_BLOCK_SIZE);
+ page = dbengine_page_alloc();
descr = xt_io_descr->descr_array[i];
for (j = 0, page_offset = 0 ; j < extent->number_of_pages ; ++j) {
/* care, we don't hold the descriptor mutex */
@@ -331,7 +347,7 @@ after_crc_check:
continue; /* Failed to reserve a suitable page */
is_prefetched_page = 1;
}
- page = mallocz(RRDENG_BLOCK_SIZE);
+ page = dbengine_page_alloc();
/* care, we don't hold the descriptor mutex */
if (have_read_error) {
@@ -735,6 +751,7 @@ static int do_flush_pages(struct rrdengine_worker_config* wc, int force, struct
fatal("posix_memalign:%s", strerror(ret));
/* freez(xt_io_descr);*/
}
+ memset(xt_io_descr->buf, 0, ALIGN_BYTES_CEILING(size_bytes));
(void) memcpy(xt_io_descr->descr_array, eligible_pages, sizeof(struct rrdeng_page_descr *) * count);
xt_io_descr->descr_count = count;
@@ -1074,13 +1091,17 @@ void async_cb(uv_async_t *handle)
void timer_cb(uv_timer_t* handle)
{
+ worker_is_busy(RRDENG_MAX_OPCODE + 1);
+
struct rrdengine_worker_config* wc = handle->data;
struct rrdengine_instance *ctx = wc->ctx;
uv_stop(handle->loop);
uv_update_time(handle->loop);
- if (unlikely(!ctx->metalog_ctx->initialized))
+ if (unlikely(!ctx->metalog_ctx->initialized)) {
+ worker_is_idle();
return; /* Wait for the metadata log to initialize */
+ }
rrdeng_test_quota(wc);
debug(D_RRDENGINE, "%s: timeout reached.", __func__);
if (likely(!wc->now_deleting_files && !wc->now_invalidating_dirty_pages)) {
@@ -1122,12 +1143,26 @@ void timer_cb(uv_timer_t* handle)
debug(D_RRDENGINE, "%s", get_rrdeng_statistics(wc->ctx, buf, sizeof(buf)));
}
#endif
+
+ worker_is_idle();
}
#define MAX_CMD_BATCH_SIZE (256)
void rrdeng_worker(void* arg)
{
+ worker_register("DBENGINE");
+ worker_register_job_name(RRDENG_NOOP, "noop");
+ worker_register_job_name(RRDENG_READ_PAGE, "page read");
+ worker_register_job_name(RRDENG_READ_EXTENT, "extent read");
+ worker_register_job_name(RRDENG_COMMIT_PAGE, "commit");
+ worker_register_job_name(RRDENG_FLUSH_PAGES, "flush");
+ worker_register_job_name(RRDENG_SHUTDOWN, "shutdown");
+ worker_register_job_name(RRDENG_INVALIDATE_OLDEST_MEMORY_PAGE, "page lru");
+ worker_register_job_name(RRDENG_QUIESCE, "quiesce");
+ worker_register_job_name(RRDENG_MAX_OPCODE, "cleanup");
+ worker_register_job_name(RRDENG_MAX_OPCODE + 1, "timer");
+
struct rrdengine_worker_config* wc = arg;
struct rrdengine_instance *ctx = wc->ctx;
uv_loop_t* loop;
@@ -1175,8 +1210,11 @@ void rrdeng_worker(void* arg)
fatal_assert(0 == uv_timer_start(&timer_req, timer_cb, TIMER_PERIOD_MS, TIMER_PERIOD_MS));
shutdown = 0;
+ int set_name = 0;
while (likely(shutdown == 0 || rrdeng_threads_alive(wc))) {
+ worker_is_idle();
uv_run(loop, UV_RUN_DEFAULT);
+ worker_is_busy(RRDENG_MAX_OPCODE);
rrdeng_cleanup_finished_threads(wc);
/* wait for commands */
@@ -1193,6 +1231,9 @@ void rrdeng_worker(void* arg)
opcode = cmd.opcode;
++cmd_batch_size;
+ if(likely(opcode != RRDENG_NOOP))
+ worker_is_busy(opcode);
+
switch (opcode) {
case RRDENG_NOOP:
/* the command queue was empty, do nothing */
@@ -1219,6 +1260,10 @@ void rrdeng_worker(void* arg)
break;
case RRDENG_READ_EXTENT:
do_read_extent(wc, cmd.read_extent.page_cache_descr, cmd.read_extent.page_count, 1);
+ if (unlikely(!set_name)) {
+ set_name = 1;
+ uv_thread_set_name_np(ctx->worker_config.thread, "DBENGINE");
+ }
break;
case RRDENG_COMMIT_PAGE:
do_commit_transaction(wc, STORE_DATA, NULL);
@@ -1265,6 +1310,7 @@ void rrdeng_worker(void* arg)
fatal_assert(0 == uv_loop_close(loop));
freez(loop);
+ worker_unregister();
return;
error_after_timer_init:
@@ -1277,6 +1323,7 @@ error_after_loop_init:
wc->error = UV_EAGAIN;
/* wake up initialization thread */
completion_mark_complete(&ctx->rrdengine_completion);
+ worker_unregister();
}
/* C entry point for development purposes
diff --git a/database/engine/rrdengine.h b/database/engine/rrdengine.h
index b0c8e4d02..c6f89a37a 100644
--- a/database/engine/rrdengine.h
+++ b/database/engine/rrdengine.h
@@ -34,6 +34,28 @@ struct rrdengine_instance;
#define RRDENG_FILE_NUMBER_SCAN_TMPL "%1u-%10u"
#define RRDENG_FILE_NUMBER_PRINT_TMPL "%1.1u-%10.10u"
+struct rrdeng_collect_handle {
+ struct rrdeng_page_descr *descr, *prev_descr;
+ unsigned long page_correlation_id;
+ struct rrdengine_instance *ctx;
+ // set to 1 when this dimension is not page aligned with the other dimensions in the chart
+ uint8_t unaligned_page;
+};
+
+struct rrdeng_query_handle {
+ struct rrdeng_page_descr *descr;
+ struct rrdengine_instance *ctx;
+ struct pg_cache_page_index *page_index;
+ time_t next_page_time;
+ time_t now;
+ unsigned position;
+ unsigned entries;
+ storage_number *page;
+ usec_t page_end_time;
+ uint32_t page_length;
+ usec_t dt;
+ time_t dt_sec;
+};
typedef enum {
RRDENGINE_STATUS_UNINITIALIZED = 0,
@@ -227,6 +249,9 @@ struct rrdengine_instance {
struct rrdengine_statistics stats;
};
+extern void *dbengine_page_alloc(void);
+extern void dbengine_page_free(void *page);
+
extern int init_rrd_files(struct rrdengine_instance *ctx);
extern void finalize_rrd_files(struct rrdengine_instance *ctx);
extern void rrdeng_test_quota(struct rrdengine_worker_config* wc);
diff --git a/database/engine/rrdengineapi.c b/database/engine/rrdengineapi.c
index 6ebee1459..76010a7c2 100755
--- a/database/engine/rrdengineapi.c
+++ b/database/engine/rrdengineapi.c
@@ -126,12 +126,13 @@ void rrdeng_store_metric_init(RRDDIM *rd)
struct pg_cache_page_index *page_index;
ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
- handle = &rd->state->handle.rrdeng;
- handle->ctx = ctx;
+ handle = callocz(1, sizeof(struct rrdeng_collect_handle));
+ handle->ctx = ctx;
handle->descr = NULL;
handle->prev_descr = NULL;
handle->unaligned_page = 0;
+ rd->state->handle = (STORAGE_COLLECT_HANDLE *)handle;
page_index = rd->state->page_index;
uv_rwlock_wrlock(&page_index->lock);
@@ -162,7 +163,7 @@ void rrdeng_store_metric_flush_current_page(RRDDIM *rd)
struct rrdengine_instance *ctx;
struct rrdeng_page_descr *descr;
- handle = &rd->state->handle.rrdeng;
+ handle = (struct rrdeng_collect_handle *)rd->state->handle;
ctx = handle->ctx;
if (unlikely(!ctx))
return;
@@ -202,7 +203,7 @@ void rrdeng_store_metric_flush_current_page(RRDDIM *rd)
/* handle->prev_descr = descr;*/
}
} else {
- freez(descr->pg_cache_descr->page);
+ dbengine_page_free(descr->pg_cache_descr->page);
rrdeng_destroy_pg_cache_descr(ctx, descr->pg_cache_descr);
freez(descr);
}
@@ -211,14 +212,13 @@ void rrdeng_store_metric_flush_current_page(RRDDIM *rd)
void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, storage_number number)
{
- struct rrdeng_collect_handle *handle;
+ struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)rd->state->handle;
struct rrdengine_instance *ctx;
struct page_cache *pg_cache;
struct rrdeng_page_descr *descr;
storage_number *page;
uint8_t must_flush_unaligned_page = 0, perfect_page_alignment = 0;
- handle = &rd->state->handle.rrdeng;
ctx = handle->ctx;
pg_cache = &ctx->pg_cache;
descr = handle->descr;
@@ -301,7 +301,7 @@ int rrdeng_store_metric_finalize(RRDDIM *rd)
struct pg_cache_page_index *page_index;
uint8_t can_delete_metric = 0;
- handle = &rd->state->handle.rrdeng;
+ handle = (struct rrdeng_collect_handle *)rd->state->handle;
ctx = handle->ctx;
page_index = rd->state->page_index;
rrdeng_store_metric_flush_current_page(rd);
@@ -314,6 +314,7 @@ int rrdeng_store_metric_finalize(RRDDIM *rd)
can_delete_metric = 1;
}
uv_rwlock_wrunlock(&page_index->lock);
+ freez(handle);
return can_delete_metric;
}
@@ -406,6 +407,7 @@ unsigned rrdeng_variable_step_boundaries(RRDSET *st, time_t start_time, time_t e
is_first_region_initialized = 0;
region_points = 0;
+ int is_out_of_order_reported = 0;
/* pages loop */
for (i = 0, curr = NULL, prev = NULL ; i < pages_nr ; ++i) {
old_prev = prev;
@@ -446,7 +448,7 @@ unsigned rrdeng_variable_step_boundaries(RRDSET *st, time_t start_time, time_t e
is_metric_out_of_order = 1;
if (is_metric_earlier_than_range || unlikely(is_metric_out_of_order)) {
if (unlikely(is_metric_out_of_order))
- info("Ignoring metric with out of order timestamp.");
+ is_out_of_order_reported++;
continue; /* next entry */
}
/* here is a valid metric */
@@ -519,6 +521,8 @@ unsigned rrdeng_variable_step_boundaries(RRDSET *st, time_t start_time, time_t e
freez(region_info_array);
}
}
+ if (is_out_of_order_reported)
+ info("Ignored %d metrics with out of order timestamp in %u regions.", is_out_of_order_reported, regions);
return regions;
}
@@ -535,12 +539,14 @@ void rrdeng_load_metric_init(RRDDIM *rd, struct rrddim_query_handle *rrdimm_hand
ctx = get_rrdeng_ctx_from_host(rd->rrdset->rrdhost);
rrdimm_handle->start_time = start_time;
rrdimm_handle->end_time = end_time;
- handle = &rrdimm_handle->rrdeng;
+
+ handle = callocz(1, sizeof(struct rrdeng_query_handle));
handle->next_page_time = start_time;
handle->now = start_time;
handle->position = 0;
handle->ctx = ctx;
handle->descr = NULL;
+ rrdimm_handle->handle = (STORAGE_QUERY_HANDLE *)handle;
pages_nr = pg_cache_preload(ctx, rd->state->rrdeng_uuid, start_time * USEC_PER_SEC, end_time * USEC_PER_SEC,
NULL, &handle->page_index);
if (unlikely(NULL == handle->page_index || 0 == pages_nr))
@@ -548,102 +554,109 @@ void rrdeng_load_metric_init(RRDDIM *rd, struct rrddim_query_handle *rrdimm_hand
handle->next_page_time = INVALID_TIME;
}
-/* Returns the metric and sets its timestamp into current_time */
-storage_number rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle, time_t *current_time)
-{
- struct rrdeng_query_handle *handle;
- struct rrdengine_instance *ctx;
- struct rrdeng_page_descr *descr;
- storage_number *page, ret;
- unsigned position, entries;
- usec_t next_page_time = 0, current_position_time, page_end_time = 0;
+static int rrdeng_load_page_next(struct rrddim_query_handle *rrdimm_handle) {
+ struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle;
+
+ struct rrdengine_instance *ctx = handle->ctx;
+ struct rrdeng_page_descr *descr = handle->descr;
+
uint32_t page_length;
+ usec_t page_end_time;
+ unsigned position;
- handle = &rrdimm_handle->rrdeng;
- if (unlikely(INVALID_TIME == handle->next_page_time)) {
- return SN_EMPTY_SLOT;
- }
- ctx = handle->ctx;
- if (unlikely(NULL == (descr = handle->descr))) {
- /* it's the first call */
- next_page_time = handle->next_page_time * USEC_PER_SEC;
- } else {
- pg_cache_atomic_get_pg_info(descr, &page_end_time, &page_length);
- }
- position = handle->position + 1;
+ if (likely(descr)) {
+ // Drop old page's reference
- if (unlikely(NULL == descr ||
- position >= (page_length / sizeof(storage_number)))) {
- /* We need to get a new page */
- if (descr) {
- /* Drop old page's reference */
#ifdef NETDATA_INTERNAL_CHECKS
- rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
+ rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
#endif
- pg_cache_put(ctx, descr);
- handle->descr = NULL;
- handle->next_page_time = (page_end_time / USEC_PER_SEC) + 1;
- if (unlikely(handle->next_page_time > rrdimm_handle->end_time)) {
- goto no_more_metrics;
- }
- next_page_time = handle->next_page_time * USEC_PER_SEC;
- }
- descr = pg_cache_lookup_next(ctx, handle->page_index, &handle->page_index->id,
- next_page_time, rrdimm_handle->end_time * USEC_PER_SEC);
- if (NULL == descr) {
- goto no_more_metrics;
- }
+ pg_cache_put(ctx, descr);
+ handle->descr = NULL;
+ handle->next_page_time = (handle->page_end_time / USEC_PER_SEC) + 1;
+
+ if (unlikely(handle->next_page_time > rrdimm_handle->end_time))
+ return 1;
+ }
+
+ usec_t next_page_time = handle->next_page_time * USEC_PER_SEC;
+ descr = pg_cache_lookup_next(ctx, handle->page_index, &handle->page_index->id, next_page_time, rrdimm_handle->end_time * USEC_PER_SEC);
+ if (NULL == descr)
+ return 1;
+
#ifdef NETDATA_INTERNAL_CHECKS
- rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, 1);
+ rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, 1);
#endif
- handle->descr = descr;
- pg_cache_atomic_get_pg_info(descr, &page_end_time, &page_length);
- if (unlikely(INVALID_TIME == descr->start_time ||
- INVALID_TIME == page_end_time)) {
- goto no_more_metrics;
- }
- if (unlikely(descr->start_time != page_end_time && next_page_time > descr->start_time)) {
- /* we're in the middle of the page somewhere */
- entries = page_length / sizeof(storage_number);
- position = ((uint64_t)(next_page_time - descr->start_time)) * (entries - 1) /
- (page_end_time - descr->start_time);
- } else {
- position = 0;
- }
+
+ handle->descr = descr;
+ pg_cache_atomic_get_pg_info(descr, &page_end_time, &page_length);
+ if (unlikely(INVALID_TIME == descr->start_time || INVALID_TIME == page_end_time))
+ return 1;
+
+ if (unlikely(descr->start_time != page_end_time && next_page_time > descr->start_time)) {
+ // we're in the middle of the page somewhere
+ unsigned entries = page_length / sizeof(storage_number);
+ position = ((uint64_t)(next_page_time - descr->start_time)) * (entries - 1) /
+ (page_end_time - descr->start_time);
}
- page = descr->pg_cache_descr->page;
- ret = page[position];
- entries = page_length / sizeof(storage_number);
- if (entries > 1) {
- usec_t dt;
+ else
+ position = 0;
+
+ handle->page_end_time = page_end_time;
+ handle->page_length = page_length;
+ handle->page = descr->pg_cache_descr->page;
+ usec_t entries = handle->entries = page_length / sizeof(storage_number);
+ if (likely(entries > 1))
+ handle->dt = (page_end_time - descr->start_time) / (entries - 1);
+ else
+ handle->dt = 0;
- dt = (page_end_time - descr->start_time) / (entries - 1);
- current_position_time = descr->start_time + position * dt;
- } else {
- current_position_time = descr->start_time;
+ handle->dt_sec = handle->dt / USEC_PER_SEC;
+ handle->position = position;
+
+ return 0;
+}
+
+/* Returns the metric and sets its timestamp into current_time */
+storage_number rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle, time_t *current_time) {
+ struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle;
+
+ if (unlikely(INVALID_TIME == handle->next_page_time))
+ return SN_EMPTY_SLOT;
+
+ struct rrdeng_page_descr *descr = handle->descr;
+ unsigned position = handle->position + 1;
+ time_t now = handle->now + handle->dt_sec;
+
+ if (unlikely(!descr || position >= handle->entries)) {
+ // We need to get a new page
+ if(rrdeng_load_page_next(rrdimm_handle)) {
+ // next calls will not load any more metrics
+ handle->next_page_time = INVALID_TIME;
+ return SN_EMPTY_SLOT;
+ }
+
+ descr = handle->descr;
+ position = handle->position;
+ now = (descr->start_time + position * handle->dt) / USEC_PER_SEC;
}
+
+ storage_number ret = handle->page[position];
handle->position = position;
- handle->now = current_position_time / USEC_PER_SEC;
-/* fatal_assert(handle->now >= rrdimm_handle->start_time && handle->now <= rrdimm_handle->end_time);
- The above assertion is an approximation and needs to take update_every into account */
- if (unlikely(handle->now >= rrdimm_handle->end_time)) {
- /* next calls will not load any more metrics */
+ handle->now = now;
+
+ if (unlikely(now >= rrdimm_handle->end_time)) {
+ // next calls will not load any more metrics
handle->next_page_time = INVALID_TIME;
}
- *current_time = handle->now;
- return ret;
-no_more_metrics:
- handle->next_page_time = INVALID_TIME;
- return SN_EMPTY_SLOT;
+ *current_time = now;
+ return ret;
}
int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle)
{
- struct rrdeng_query_handle *handle;
-
- handle = &rrdimm_handle->rrdeng;
+ struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle;
return (INVALID_TIME == handle->next_page_time);
}
@@ -652,19 +665,20 @@ int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_handle)
*/
void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle)
{
- struct rrdeng_query_handle *handle;
- struct rrdengine_instance *ctx;
- struct rrdeng_page_descr *descr;
+ struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrdimm_handle->handle;
+ struct rrdengine_instance *ctx = handle->ctx;
+ struct rrdeng_page_descr *descr = handle->descr;
- handle = &rrdimm_handle->rrdeng;
- ctx = handle->ctx;
- descr = handle->descr;
if (descr) {
#ifdef NETDATA_INTERNAL_CHECKS
rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1);
#endif
pg_cache_put(ctx, descr);
}
+
+ // whatever is allocated at rrdeng_load_metric_init() should be freed here
+ freez(handle);
+ rrdimm_handle->handle = NULL;
}
time_t rrdeng_metric_latest_time(RRDDIM *rd)
@@ -724,7 +738,7 @@ void *rrdeng_create_page(struct rrdengine_instance *ctx, uuid_t *id, struct rrde
descr = pg_cache_create_descr();
descr->id = id; /* TODO: add page type: metric, log, something? */
- page = mallocz(RRDENG_BLOCK_SIZE); /*TODO: add page size */
+ page = dbengine_page_alloc(); /*TODO: add page size */
rrdeng_page_descr_mutex_lock(ctx, descr);
pg_cache_descr = descr->pg_cache_descr;
pg_cache_descr->page = page;
@@ -949,7 +963,7 @@ int rrdeng_init(RRDHOST *host, struct rrdengine_instance **ctxp, char *dbfiles_p
/* wait for worker thread to initialize */
completion_wait_for(&ctx->rrdengine_completion);
completion_destroy(&ctx->rrdengine_completion);
- uv_thread_set_name_np(ctx->worker_config.thread, "DBENGINE");
+ uv_thread_set_name_np(ctx->worker_config.thread, "LIBUV_WORKER");
if (ctx->worker_config.error) {
goto error_after_rrdeng_worker;
}
diff --git a/database/metric_correlations.c b/database/metric_correlations.c
new file mode 100644
index 000000000..3b8968c99
--- /dev/null
+++ b/database/metric_correlations.c
@@ -0,0 +1,300 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "daemon/common.h"
+#include "KolmogorovSmirnovDist.h"
+
+#define MAX_POINTS 10000
+int enable_metric_correlations = CONFIG_BOOLEAN_NO;
+int metric_correlations_version = 1;
+
+struct charts {
+ RRDSET *st;
+ struct charts *next;
+};
+
+struct per_dim {
+ char *dimension;
+ calculated_number baseline[MAX_POINTS];
+ calculated_number highlight[MAX_POINTS];
+
+ double baseline_diffs[MAX_POINTS];
+ double highlight_diffs[MAX_POINTS];
+};
+
+int find_index(double arr[], long int n, double K, long int start)
+{
+ for (long int i = start; i < n; i++) {
+ if (K<arr[i]){
+ return i;
+ }
+ }
+ return n;
+}
+
+int compare(const void *left, const void *right) {
+ double lt = *(double *)left;
+ double rt = *(double *)right;
+
+ if(unlikely(lt < rt)) return -1;
+ if(unlikely(lt > rt)) return 1;
+ return 0;
+}
+
+void kstwo(double data1[], long int n1, double data2[], long int n2, double *d, double *prob)
+{
+ double en1, en2, en, data_all[MAX_POINTS*2], cdf1[MAX_POINTS], cdf2[MAX_POINTS], cddiffs[MAX_POINTS];
+ double min = 0.0, max = 0.0;
+ qsort(data1, n1, sizeof(double), compare);
+ qsort(data2, n2, sizeof(double), compare);
+
+ for (int i = 0; i < n1; i++)
+ data_all[i] = data1[i];
+ for (int i = 0; i < n2; i++)
+ data_all[n1 + i] = data2[i];
+
+ en1 = (double)n1;
+ en2 = (double)n2;
+ *d = 0.0;
+ cddiffs[0]=0; //for uninitialized warning
+
+ for (int i=0; i<n1+n2;i++)
+ cdf1[i] = find_index(data1, n1, data_all[i], 0) / en1; //TODO, use the start to reduce loops
+
+ for (int i=0; i<n1+n2;i++)
+ cdf2[i] = find_index(data2, n2, data_all[i], 0) / en2;
+
+ for ( int i=0;i<n2+n1;i++)
+ cddiffs[i] = cdf1[i] - cdf2[i];
+
+ min = cddiffs[0];
+ for ( int i=0;i<n2+n1;i++) {
+ if (cddiffs[i] < min)
+ min = cddiffs[i];
+ }
+
+ //clip min
+ if (fabs(min) < 0) min = 0;
+ else if (fabs(min) > 1) min = 1;
+
+ max = fabs(cddiffs[0]);
+ for ( int i=0;i<n2+n1;i++)
+ if (cddiffs[i] >= max) max = cddiffs[i];
+
+ if (fabs(min) < max)
+ *d = max;
+ else
+ *d = fabs(min);
+
+
+
+ en = (en1*en2 / (en1 + en2));
+ *prob = KSfbar(round(en), *d);
+}
+
+void fill_nan (struct per_dim *d, long int hp, long int bp)
+{
+ int k;
+
+ for (k = 0; k < bp; k++) {
+ if (isnan(d->baseline[k])) {
+ d->baseline[k] = 0.0;
+ }
+ }
+
+ for (k = 0; k < hp; k++) {
+ if (isnan(d->highlight[k])) {
+ d->highlight[k] = 0.0;
+ }
+ }
+}
+
+//TODO check counters
+void run_diffs_and_rev (struct per_dim *d, long int hp, long int bp)
+{
+ int k, j;
+
+ for (k = 0, j = bp; k < bp - 1; k++, j--)
+ d->baseline_diffs[k] = (double)d->baseline[j - 2] - (double)d->baseline[j - 1];
+ for (k = 0, j = hp; k < hp - 1; k++, j--) {
+ d->highlight_diffs[k] = (double)d->highlight[j - 2] - (double)d->highlight[j - 1];
+ }
+}
+
+int run_metric_correlations (BUFFER *wb, RRDSET *st, long long baseline_after, long long baseline_before, long long highlight_after, long long highlight_before, long long max_points)
+{
+ uint32_t options = 0x00000000;
+ int group_method = RRDR_GROUPING_AVERAGE;
+ long group_time = 0;
+ struct context_param *context_param_list = NULL;
+ long c;
+ int i=0, j=0;
+ int b_dims = 0;
+ long int baseline_points = 0, highlight_points = 0;
+
+ struct per_dim *pd = NULL;
+
+ //TODO get everything in one go, when baseline is right before highlight
+ //get baseline
+ ONEWAYALLOC *owa = onewayalloc_create(0);
+ RRDR *rb = rrd2rrdr(owa, st, max_points, baseline_after, baseline_before, group_method, group_time, options, NULL, context_param_list, 0);
+ if(!rb) {
+ info("Cannot generate metric correlations output with these parameters on this chart.");
+ onewayalloc_destroy(owa);
+ return 0;
+ } else {
+ baseline_points = rrdr_rows(rb);
+ pd = mallocz(sizeof(struct per_dim) * rb->d);
+ b_dims = rb->d;
+ for (c = 0; c != rrdr_rows(rb) ; ++c) {
+ RRDDIM *d;
+ for (j = 0, d = rb->st->dimensions ; d && j < rb->d ; ++j, d = d->next) {
+ calculated_number *cn = &rb->v[ c * rb->d ];
+ if (!c) {
+ //TODO use points from query
+ pd[j].dimension = strdupz (d->name);
+ pd[j].baseline[c] = cn[j];
+ } else {
+ pd[j].baseline[c] = cn[j];
+ }
+ }
+ }
+ }
+ rrdr_free(owa, rb);
+ onewayalloc_destroy(owa);
+ if (!pd)
+ return 0;
+
+ //get highlight
+ owa = onewayalloc_create(0);
+ RRDR *rh = rrd2rrdr(owa, st, max_points, highlight_after, highlight_before, group_method, group_time, options, NULL, context_param_list, 0);
+ if(!rh) {
+ info("Cannot generate metric correlations output with these parameters on this chart.");
+ freez(pd);
+ onewayalloc_destroy(owa);
+ return 0;
+ } else {
+ if (rh->d != b_dims) {
+ //TODO handle different dims
+ rrdr_free(owa, rh);
+ onewayalloc_destroy(owa);
+ freez(pd);
+ return 0;
+ }
+ highlight_points = rrdr_rows(rh);
+ for (c = 0; c != rrdr_rows(rh) ; ++c) {
+ RRDDIM *d;
+ for (j = 0, d = rh->st->dimensions ; d && j < rh->d ; ++j, d = d->next) {
+ calculated_number *cn = &rh->v[ c * rh->d ];
+ pd[j].highlight[c] = cn[j];
+ }
+ }
+ }
+ rrdr_free(owa, rh);
+ onewayalloc_destroy(owa);
+
+ for (i = 0; i < b_dims; i++) {
+ fill_nan(&pd[i], highlight_points, baseline_points);
+ }
+
+ for (i = 0; i < b_dims; i++) {
+ run_diffs_and_rev(&pd[i], highlight_points, baseline_points);
+ }
+
+ double d=0, prob=0;
+ for (i=0;i < j ;i++) {
+ if (baseline_points && highlight_points) {
+ kstwo(pd[i].baseline_diffs, baseline_points-1, pd[i].highlight_diffs, highlight_points-1, &d, &prob);
+ buffer_sprintf(wb, "\t\t\t\t\"%s\": %f", pd[i].dimension, prob);
+ if (i != j-1)
+ buffer_sprintf(wb, ",\n");
+ else
+ buffer_sprintf(wb, "\n");
+ }
+ }
+
+ freez(pd);
+ return j;
+}
+
+void metric_correlations (RRDHOST *host, BUFFER *wb, long long baseline_after, long long baseline_before, long long highlight_after, long long highlight_before, long long max_points)
+{
+ info ("Running metric correlations, highlight_after: %lld, highlight_before: %lld, baseline_after: %lld, baseline_before: %lld, max_points: %lld", highlight_after, highlight_before, baseline_after, baseline_before, max_points);
+
+ if (!enable_metric_correlations) {
+ error("Metric correlations functionality is not enabled.");
+ buffer_strcat(wb, "{\"error\": \"Metric correlations functionality is not enabled.\" }");
+ return;
+ }
+
+ if (highlight_before <= highlight_after || baseline_before <= baseline_after) {
+ error("Invalid baseline or highlight ranges.");
+ buffer_strcat(wb, "{\"error\": \"Invalid baseline or highlight ranges.\" }");
+ return;
+ }
+
+ long long dims = 0, total_dims = 0;
+ RRDSET *st;
+ size_t c = 0;
+ BUFFER *wdims = buffer_create(1000);
+
+ if (!max_points || max_points > MAX_POINTS)
+ max_points = MAX_POINTS;
+
+ //dont lock here and wait for results
+ //get the charts and run mc after
+ //should not be a problem for the query
+ struct charts *charts = NULL;
+ rrdhost_rdlock(host);
+ rrdset_foreach_read(st, host) {
+ if (rrdset_is_available_for_viewers(st)) {
+ rrdset_rdlock(st);
+ struct charts *chart = callocz(1, sizeof(struct charts));
+ chart->st = st;
+ chart->next = NULL;
+ if (charts) {
+ chart->next = charts;
+ }
+ charts = chart;
+ }
+ }
+ rrdhost_unlock(host);
+
+ buffer_strcat(wb, "{\n\t\"correlated_charts\": {");
+
+ for (struct charts *ch = charts; ch; ch = ch->next) {
+ buffer_flush(wdims);
+ dims = run_metric_correlations(wdims, ch->st, baseline_after, baseline_before, highlight_after, highlight_before, max_points);
+ if (dims) {
+ if (c)
+ buffer_strcat(wb, "\t\t},");
+ buffer_strcat(wb, "\n\t\t\"");
+ buffer_strcat(wb, ch->st->id);
+ buffer_strcat(wb, "\": {\n");
+ buffer_strcat(wb, "\t\t\t\"context\": \"");
+ buffer_strcat(wb, ch->st->context);
+ buffer_strcat(wb, "\",\n\t\t\t\"dimensions\": {\n");
+ buffer_sprintf(wb, "%s", buffer_tostring(wdims));
+ buffer_strcat(wb, "\t\t\t}\n");
+ total_dims += dims;
+ c++;
+ }
+ }
+ buffer_strcat(wb, "\t\t}\n");
+ buffer_sprintf(wb, "\t},\n\t\"total_dimensions_count\": %lld\n}", total_dims);
+
+ if (!total_dims) {
+ buffer_flush(wb);
+ buffer_strcat(wb, "{\"error\": \"No results from metric correlations.\" }");
+ }
+
+ struct charts* ch;
+ while(charts){
+ ch = charts;
+ charts = charts->next;
+ rrdset_unlock(ch->st);
+ free(ch);
+ }
+
+ buffer_free(wdims);
+ info ("Done running metric correlations");
+}
diff --git a/database/metric_correlations.h b/database/metric_correlations.h
new file mode 100644
index 000000000..83ea9b74d
--- /dev/null
+++ b/database/metric_correlations.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_METRIC_CORRELATIONS_H
+#define NETDATA_METRIC_CORRELATIONS_H 1
+
+extern int enable_metric_correlations;
+extern int metric_correlations_version;
+
+void metric_correlations (RRDHOST *host, BUFFER *wb, long long selected_after, long long selected_before, long long reference_after, long long reference_before, long long max_points);
+
+#endif //NETDATA_METRIC_CORRELATIONS_H
diff --git a/database/ram/rrddim_mem.c b/database/ram/rrddim_mem.c
new file mode 100644
index 000000000..b17f03ca5
--- /dev/null
+++ b/database/ram/rrddim_mem.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "rrddim_mem.h"
+
+// ----------------------------------------------------------------------------
+// RRDDIM legacy data collection functions
+
+void rrddim_collect_init(RRDDIM *rd) {
+ rd->values[rd->rrdset->current_entry] = SN_EMPTY_SLOT;
+ rd->state->handle = calloc(1, sizeof(struct mem_collect_handle));
+}
+void rrddim_collect_store_metric(RRDDIM *rd, usec_t point_in_time, storage_number number) {
+ (void)point_in_time;
+ rd->values[rd->rrdset->current_entry] = number;
+}
+int rrddim_collect_finalize(RRDDIM *rd) {
+ free((struct mem_collect_handle*)rd->state->handle);
+ return 0;
+}
+
+// ----------------------------------------------------------------------------
+// RRDDIM legacy database query functions
+
+void rrddim_query_init(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time) {
+ handle->rd = rd;
+ handle->start_time = start_time;
+ handle->end_time = end_time;
+ struct mem_query_handle* h = calloc(1, sizeof(struct mem_query_handle));
+ h->slot = rrdset_time2slot(rd->rrdset, start_time);
+ h->last_slot = rrdset_time2slot(rd->rrdset, end_time);
+ h->finished = 0;
+ handle->handle = (STORAGE_QUERY_HANDLE *)h;
+}
+
+storage_number rrddim_query_next_metric(struct rrddim_query_handle *handle, time_t *current_time) {
+ RRDDIM *rd = handle->rd;
+ struct mem_query_handle* h = (struct mem_query_handle*)handle->handle;
+ long entries = rd->rrdset->entries;
+ long slot = h->slot;
+
+ (void)current_time;
+ if (unlikely(h->slot == h->last_slot))
+ h->finished = 1;
+ storage_number n = rd->values[slot++];
+
+ if(unlikely(slot >= entries)) slot = 0;
+ h->slot = slot;
+
+ return n;
+}
+
+int rrddim_query_is_finished(struct rrddim_query_handle *handle) {
+ struct mem_query_handle* h = (struct mem_query_handle*)handle->handle;
+ return h->finished;
+}
+
+void rrddim_query_finalize(struct rrddim_query_handle *handle) {
+ freez(handle->handle);
+}
+
+time_t rrddim_query_latest_time(RRDDIM *rd) {
+ return rrdset_last_entry_t_nolock(rd->rrdset);
+}
+
+time_t rrddim_query_oldest_time(RRDDIM *rd) {
+ return rrdset_first_entry_t_nolock(rd->rrdset);
+}
diff --git a/database/ram/rrddim_mem.h b/database/ram/rrddim_mem.h
new file mode 100644
index 000000000..9a215387a
--- /dev/null
+++ b/database/ram/rrddim_mem.h
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_RRDDIMMEM_H
+#define NETDATA_RRDDIMMEM_H
+
+#include "database/rrd.h"
+
+struct mem_collect_handle {
+ long slot;
+ long entries;
+};
+struct mem_query_handle {
+ long slot;
+ long last_slot;
+ uint8_t finished;
+};
+
+extern void rrddim_collect_init(RRDDIM *rd);
+extern void rrddim_collect_store_metric(RRDDIM *rd, usec_t point_in_time, storage_number number);
+extern int rrddim_collect_finalize(RRDDIM *rd);
+
+extern void rrddim_query_init(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time);
+extern storage_number rrddim_query_next_metric(struct rrddim_query_handle *handle, time_t *current_time);
+extern int rrddim_query_is_finished(struct rrddim_query_handle *handle);
+extern void rrddim_query_finalize(struct rrddim_query_handle *handle);
+extern time_t rrddim_query_latest_time(RRDDIM *rd);
+extern time_t rrddim_query_oldest_time(RRDDIM *rd);
+
+#endif
diff --git a/database/rrd.c b/database/rrd.c
index 321d35615..f91039ea5 100644
--- a/database/rrd.c
+++ b/database/rrd.c
@@ -2,6 +2,7 @@
#define NETDATA_RRD_INTERNALS 1
#include "rrd.h"
+#include "storage_engine.h"
// ----------------------------------------------------------------------------
// globals
@@ -47,24 +48,19 @@ inline const char *rrd_memory_mode_name(RRD_MEMORY_MODE id) {
return RRD_MEMORY_MODE_DBENGINE_NAME;
}
+ STORAGE_ENGINE* eng = storage_engine_get(id);
+ if (eng) {
+ return eng->name;
+ }
+
return RRD_MEMORY_MODE_SAVE_NAME;
}
RRD_MEMORY_MODE rrd_memory_mode_id(const char *name) {
- if(unlikely(!strcmp(name, RRD_MEMORY_MODE_RAM_NAME)))
- return RRD_MEMORY_MODE_RAM;
-
- else if(unlikely(!strcmp(name, RRD_MEMORY_MODE_MAP_NAME)))
- return RRD_MEMORY_MODE_MAP;
-
- else if(unlikely(!strcmp(name, RRD_MEMORY_MODE_NONE_NAME)))
- return RRD_MEMORY_MODE_NONE;
-
- else if(unlikely(!strcmp(name, RRD_MEMORY_MODE_ALLOC_NAME)))
- return RRD_MEMORY_MODE_ALLOC;
-
- else if(unlikely(!strcmp(name, RRD_MEMORY_MODE_DBENGINE_NAME)))
- return RRD_MEMORY_MODE_DBENGINE;
+ STORAGE_ENGINE* eng = storage_engine_find(name);
+ if (eng) {
+ return eng->id;
+ }
return RRD_MEMORY_MODE_SAVE;
}
@@ -139,8 +135,7 @@ const char *rrdset_type_name(RRDSET_TYPE chart_type) {
// ----------------------------------------------------------------------------
// RRD - cache directory
-char *rrdset_cache_dir(RRDHOST *host, const char *id, const char *config_section) {
- UNUSED(config_section);
+char *rrdset_cache_dir(RRDHOST *host, const char *id) {
char *ret = NULL;
char b[FILENAME_MAX + 1];
diff --git a/database/rrd.h b/database/rrd.h
index 071e1d038..dc32b2a2d 100644
--- a/database/rrd.h
+++ b/database/rrd.h
@@ -170,17 +170,12 @@ typedef enum rrddim_flags {
RRDDIM_FLAG_ACLK = (1 << 4),
RRDDIM_FLAG_PENDING_FOREACH_ALARM = (1 << 5), // set when foreach alarm has not been initialized yet
+ RRDDIM_FLAG_META_HIDDEN = (1 << 6), // Status of hidden option in the metadata database
} RRDDIM_FLAGS;
-#ifdef HAVE_C___ATOMIC
#define rrddim_flag_check(rd, flag) (__atomic_load_n(&((rd)->flags), __ATOMIC_SEQ_CST) & (flag))
#define rrddim_flag_set(rd, flag) __atomic_or_fetch(&((rd)->flags), (flag), __ATOMIC_SEQ_CST)
#define rrddim_flag_clear(rd, flag) __atomic_and_fetch(&((rd)->flags), ~(flag), __ATOMIC_SEQ_CST)
-#else
-#define rrddim_flag_check(rd, flag) ((rd)->flags & (flag))
-#define rrddim_flag_set(rd, flag) (rd)->flags |= (flag)
-#define rrddim_flag_clear(rd, flag) (rd)->flags &= ~(flag)
-#endif
typedef enum label_source {
LABEL_SOURCE_AUTO = 0,
@@ -332,53 +327,56 @@ struct rrddim {
};
// ----------------------------------------------------------------------------
-// iterator state for RRD dimension data collection
-union rrddim_collect_handle {
- struct {
- long slot;
- long entries;
- } slotted; // state the legacy code uses
-#ifdef ENABLE_DBENGINE
- struct rrdeng_collect_handle {
- struct rrdeng_page_descr *descr, *prev_descr;
- unsigned long page_correlation_id;
- struct rrdengine_instance *ctx;
- // set to 1 when this dimension is not page aligned with the other dimensions in the chart
- uint8_t unaligned_page;
- } rrdeng; // state the database engine uses
-#endif
-};
+// engine-specific iterator state for dimension data collection
+typedef struct storage_collect_handle STORAGE_COLLECT_HANDLE;
// ----------------------------------------------------------------------------
-// iterator state for RRD dimension data queries
-
-#ifdef ENABLE_DBENGINE
-struct rrdeng_query_handle {
- struct rrdeng_page_descr *descr;
- struct rrdengine_instance *ctx;
- struct pg_cache_page_index *page_index;
- time_t next_page_time;
- time_t now;
- unsigned position;
-};
-#endif
+// engine-specific iterator state for dimension data queries
+typedef struct storage_query_handle STORAGE_QUERY_HANDLE;
+// ----------------------------------------------------------------------------
+// iterator state for RRD dimension data queries
struct rrddim_query_handle {
RRDDIM *rd;
time_t start_time;
time_t end_time;
- union {
- struct {
- long slot;
- long last_slot;
- uint8_t finished;
- } slotted; // state the legacy code uses
-#ifdef ENABLE_DBENGINE
- struct rrdeng_query_handle rrdeng; // state the database engine uses
-#endif
- };
+ STORAGE_QUERY_HANDLE* handle;
};
+// ------------------------------------------------------------------------
+// function pointers that handle data collection
+struct rrddim_collect_ops {
+ // an initialization function to run before starting collection
+ void (*init)(RRDDIM *rd);
+
+ // run this to store each metric into the database
+ void (*store_metric)(RRDDIM *rd, usec_t point_in_time, storage_number number);
+
+ // an finalization function to run after collection is over
+ // returns 1 if it's safe to delete the dimension
+ int (*finalize)(RRDDIM *rd);
+};
+
+// function pointers that handle database queries
+struct rrddim_query_ops {
+ // run this before starting a series of next_metric() database queries
+ void (*init)(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time);
+
+ // run this to load each metric number from the database
+ storage_number (*next_metric)(struct rrddim_query_handle *handle, time_t *current_time);
+
+ // run this to test if the series of next_metric() database queries is finished
+ int (*is_finished)(struct rrddim_query_handle *handle);
+
+ // run this after finishing a series of load_metric() database queries
+ void (*finalize)(struct rrddim_query_handle *handle);
+
+ // get the timestamp of the last entry of this metric
+ time_t (*latest_time)(RRDDIM *rd);
+
+ // get the timestamp of the first entry of this metric
+ time_t (*oldest_time)(RRDDIM *rd);
+};
// ----------------------------------------------------------------------------
// volatile state per RRD dimension
@@ -391,42 +389,9 @@ struct rrddim_volatile {
int aclk_live_status;
#endif
uuid_t metric_uuid; // global UUID for this metric (unique_across hosts)
- union rrddim_collect_handle handle;
- // ------------------------------------------------------------------------
- // function pointers that handle data collection
- struct rrddim_collect_ops {
- // an initialization function to run before starting collection
- void (*init)(RRDDIM *rd);
-
- // run this to store each metric into the database
- void (*store_metric)(RRDDIM *rd, usec_t point_in_time, storage_number number);
-
- // an finalization function to run after collection is over
- // returns 1 if it's safe to delete the dimension
- int (*finalize)(RRDDIM *rd);
- } collect_ops;
-
- // function pointers that handle database queries
- struct rrddim_query_ops {
- // run this before starting a series of next_metric() database queries
- void (*init)(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time);
-
- // run this to load each metric number from the database
- storage_number (*next_metric)(struct rrddim_query_handle *handle, time_t *current_time);
-
- // run this to test if the series of next_metric() database queries is finished
- int (*is_finished)(struct rrddim_query_handle *handle);
-
- // run this after finishing a series of load_metric() database queries
- void (*finalize)(struct rrddim_query_handle *handle);
-
- // get the timestamp of the last entry of this metric
- time_t (*latest_time)(RRDDIM *rd);
-
- // get the timestamp of the first entry of this metric
- time_t (*oldest_time)(RRDDIM *rd);
- } query_ops;
-
+ STORAGE_COLLECT_HANDLE* handle;
+ struct rrddim_collect_ops collect_ops;
+ struct rrddim_query_ops query_ops;
ml_dimension_t ml_dimension;
};
@@ -434,6 +399,7 @@ struct rrddim_volatile {
// volatile state per chart
struct rrdset_volatile {
char *old_title;
+ char *old_units;
char *old_context;
uuid_t hash_id;
struct label *new_labels;
@@ -459,7 +425,6 @@ struct rrdset_volatile {
// and may lead to missing information.
typedef enum rrdset_flags {
- RRDSET_FLAG_ENABLED = 1 << 0, // enables or disables a chart
RRDSET_FLAG_DETAIL = 1 << 1, // if set, the data set should be considered as a detail of another
// (the master data set should be the one that has the same family and is not detail)
RRDSET_FLAG_DEBUG = 1 << 2, // enables or disables debugging for a chart
@@ -483,16 +448,9 @@ typedef enum rrdset_flags {
RRDSET_FLAG_ANOMALY_DETECTION = 1 << 18 // flag to identify anomaly detection charts.
} RRDSET_FLAGS;
-#ifdef HAVE_C___ATOMIC
#define rrdset_flag_check(st, flag) (__atomic_load_n(&((st)->flags), __ATOMIC_SEQ_CST) & (flag))
#define rrdset_flag_set(st, flag) __atomic_or_fetch(&((st)->flags), flag, __ATOMIC_SEQ_CST)
#define rrdset_flag_clear(st, flag) __atomic_and_fetch(&((st)->flags), ~flag, __ATOMIC_SEQ_CST)
-#else
-#define rrdset_flag_check(st, flag) ((st)->flags & (flag))
-#define rrdset_flag_set(st, flag) (st)->flags |= (flag)
-#define rrdset_flag_clear(st, flag) (st)->flags &= ~(flag)
-#endif
-#define rrdset_flag_check_noatomic(st, flag) ((st)->flags & (flag))
struct rrdset {
// ------------------------------------------------------------------------
@@ -511,7 +469,7 @@ struct rrdset {
// since the config always has a higher priority
// (the user overwrites the name of the charts)
- char *config_section; // the config section for the chart
+ void *unused_ptr; // Unused field (previously it held the config section of the chart)
char *type; // the type of graph RRD_TYPE_* (a category, for determining graphing options)
char *family; // grouping sets under the same family
@@ -642,15 +600,9 @@ typedef enum rrdhost_flags {
RRDHOST_FLAG_PENDING_FOREACH_ALARMS = 1 << 7, // contains dims with uninitialized foreach alarms
} RRDHOST_FLAGS;
-#ifdef HAVE_C___ATOMIC
#define rrdhost_flag_check(host, flag) (__atomic_load_n(&((host)->flags), __ATOMIC_SEQ_CST) & (flag))
#define rrdhost_flag_set(host, flag) __atomic_or_fetch(&((host)->flags), flag, __ATOMIC_SEQ_CST)
#define rrdhost_flag_clear(host, flag) __atomic_and_fetch(&((host)->flags), ~flag, __ATOMIC_SEQ_CST)
-#else
-#define rrdhost_flag_check(host, flag) ((host)->flags & (flag))
-#define rrdhost_flag_set(host, flag) (host)->flags |= (flag)
-#define rrdhost_flag_clear(host, flag) (host)->flags &= ~(flag)
-#endif
#ifdef NETDATA_INTERNAL_CHECKS
#define rrdset_debug(st, fmt, args...) do { if(unlikely(debug_flags & D_RRD_STATS && rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) \
@@ -767,6 +719,7 @@ struct rrdhost_system_info {
char *install_type;
char *prebuilt_arch;
char *prebuilt_dist;
+ int mc_version;
};
struct rrdhost {
@@ -815,6 +768,8 @@ struct rrdhost {
unsigned int rrdpush_send_enabled; // 1 when this host sends metrics to another netdata
char *rrdpush_send_destination; // where to send metrics to
char *rrdpush_send_api_key; // the api key at the receiving netdata
+ struct rrdpush_destinations *destinations; // a linked list of possible destinations
+ struct rrdpush_destinations *destination; // the current destination from the above list
// the following are state information for the threading
// streaming metrics from this netdata to an upstream netdata
@@ -842,10 +797,14 @@ struct rrdhost {
volatile size_t connected_senders; // when remote hosts are streaming to this
// host, this is the counter of connected clients
+ time_t senders_connect_time; // the time the last sender was connected
+ time_t senders_last_chart_command; // the time of the last CHART streaming command
time_t senders_disconnected_time; // the time the last sender was disconnected
struct receiver_state *receiver;
netdata_mutex_t receiver_lock;
+ int trigger_chart_obsoletion_check; // set when child connects, will instruct parent to
+ // trigger a check for obsoleted charts since previous connect
// ------------------------------------------------------------------------
// health monitoring options
@@ -1120,8 +1079,8 @@ extern void rrdset_is_obsolete(RRDSET *st);
extern void rrdset_isnot_obsolete(RRDSET *st);
// checks if the RRDSET should be offered to viewers
-#define rrdset_is_available_for_viewers(st) (rrdset_flag_check(st, RRDSET_FLAG_ENABLED) && !rrdset_flag_check(st, RRDSET_FLAG_HIDDEN) && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && !rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && (st)->dimensions && (st)->rrd_memory_mode != RRD_MEMORY_MODE_NONE)
-#define rrdset_is_available_for_exporting_and_alarms(st) (rrdset_flag_check(st, RRDSET_FLAG_ENABLED) && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && !rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && (st)->dimensions)
+#define rrdset_is_available_for_viewers(st) (!rrdset_flag_check(st, RRDSET_FLAG_HIDDEN) && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && !rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && (st)->dimensions && (st)->rrd_memory_mode != RRD_MEMORY_MODE_NONE)
+#define rrdset_is_available_for_exporting_and_alarms(st) (!rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && !rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && (st)->dimensions)
#define rrdset_is_archived(st) (rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && (st)->dimensions)
// get the total duration in seconds of the round robin database
@@ -1322,7 +1281,9 @@ extern void rrddim_isnot_obsolete(RRDSET *st, RRDDIM *rd);
extern collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number value);
extern collected_number rrddim_set(RRDSET *st, const char *id, collected_number value);
-
+#if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
+extern time_t calc_dimension_liveness(RRDDIM *rd, time_t now);
+#endif
extern long align_entries_to_pagesize(RRD_MEMORY_MODE mode, long entries);
// ----------------------------------------------------------------------------
@@ -1339,10 +1300,9 @@ extern int alarm_compare_name(void *a, void *b);
extern avl_tree_lock rrdhost_root_index;
extern char *rrdset_strncpyz_name(char *to, const char *from, size_t length);
-extern char *rrdset_cache_dir(RRDHOST *host, const char *id, const char *config_section);
+extern char *rrdset_cache_dir(RRDHOST *host, const char *id);
-#define rrddim_free(st, rd) rrddim_free_custom(st, rd, 0)
-extern void rrddim_free_custom(RRDSET *st, RRDDIM *rd, int db_rotated);
+extern void rrddim_free(RRDSET *st, RRDDIM *rd);
extern int rrddim_compare(void* a, void* b);
extern int rrdset_compare(void* a, void* b);
diff --git a/database/rrdcalc.c b/database/rrdcalc.c
index 1b1a14960..b29a0ffc0 100644
--- a/database/rrdcalc.c
+++ b/database/rrdcalc.c
@@ -81,35 +81,32 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) {
if(!rc->units) rc->units = strdupz(st->units);
- if(!rrdcalc_isrepeating(rc)) {
- time_t now = now_realtime_sec();
- ALARM_ENTRY *ae = health_create_alarm_entry(
- host,
- rc->id,
- rc->next_event_id++,
- rc->config_hash_id,
- now,
- rc->name,
- rc->rrdset->id,
- rc->rrdset->family,
- rc->classification,
- rc->component,
- rc->type,
- rc->exec,
- rc->recipient,
- now - rc->last_status_change,
- rc->old_value,
- rc->value,
- rc->status,
- RRDCALC_STATUS_UNINITIALIZED,
- rc->source,
- rc->units,
- rc->info,
- 0,
- 0
- );
- health_alarm_log(host, ae);
- }
+ time_t now = now_realtime_sec();
+ ALARM_ENTRY *ae = health_create_alarm_entry(
+ host,
+ rc->id,
+ rc->next_event_id++,
+ rc->config_hash_id,
+ now,
+ rc->name,
+ rc->rrdset->id,
+ rc->rrdset->family,
+ rc->classification,
+ rc->component,
+ rc->type,
+ rc->exec,
+ rc->recipient,
+ now - rc->last_status_change,
+ rc->old_value,
+ rc->value,
+ rc->status,
+ RRDCALC_STATUS_UNINITIALIZED,
+ rc->source,
+ rc->units,
+ rc->info,
+ 0,
+ 0);
+ health_alarm_log(host, ae);
}
static inline int rrdcalc_test_additional_restriction(RRDCALC *rc, RRDSET *st){
@@ -119,6 +116,31 @@ static inline int rrdcalc_test_additional_restriction(RRDCALC *rc, RRDSET *st){
if (rc->plugin_match && !simple_pattern_matches(rc->plugin_pattern, st->plugin_name))
return 0;
+ if (rc->labels) {
+ int labels_count=1;
+ int labels_match=0;
+ char *s = rc->labels;
+ while (*s) {
+ if (*s==' ')
+ labels_count++;
+ s++;
+ }
+ RRDHOST *host = st->rrdhost;
+ char cmp[CONFIG_FILE_LINE_MAX+1];
+ struct label *move = host->labels.head;
+ while(move) {
+ snprintf(cmp, CONFIG_FILE_LINE_MAX, "%s=%s", move->key, move->value);
+ if (simple_pattern_matches(rc->splabels, move->key) ||
+ simple_pattern_matches(rc->splabels, cmp)) {
+ labels_match++;
+ }
+ move = move->next;
+ }
+
+ if (labels_match != labels_count)
+ return 0;
+ }
+
return 1;
}
@@ -159,35 +181,32 @@ inline void rrdsetcalc_unlink(RRDCALC *rc) {
RRDHOST *host = st->rrdhost;
- if(!rrdcalc_isrepeating(rc)) {
- time_t now = now_realtime_sec();
- ALARM_ENTRY *ae = health_create_alarm_entry(
- host,
- rc->id,
- rc->next_event_id++,
- rc->config_hash_id,
- now,
- rc->name,
- rc->rrdset->id,
- rc->rrdset->family,
- rc->classification,
- rc->component,
- rc->type,
- rc->exec,
- rc->recipient,
- now - rc->last_status_change,
- rc->old_value,
- rc->value,
- rc->status,
- RRDCALC_STATUS_REMOVED,
- rc->source,
- rc->units,
- rc->info,
- 0,
- 0
- );
- health_alarm_log(host, ae);
- }
+ time_t now = now_realtime_sec();
+ ALARM_ENTRY *ae = health_create_alarm_entry(
+ host,
+ rc->id,
+ rc->next_event_id++,
+ rc->config_hash_id,
+ now,
+ rc->name,
+ rc->rrdset->id,
+ rc->rrdset->family,
+ rc->classification,
+ rc->component,
+ rc->type,
+ rc->exec,
+ rc->recipient,
+ now - rc->last_status_change,
+ rc->old_value,
+ rc->value,
+ rc->status,
+ RRDCALC_STATUS_REMOVED,
+ rc->source,
+ rc->units,
+ rc->info,
+ 0,
+ 0);
+ health_alarm_log(host, ae);
debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname);
@@ -293,19 +312,15 @@ inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const ch
char *alarm_name_with_dim(char *name, size_t namelen, const char *dim, size_t dimlen) {
char *newname,*move;
- newname = malloc(namelen + dimlen + 2);
- if(newname) {
- move = newname;
- memcpy(move, name, namelen);
- move += namelen;
+ newname = mallocz(namelen + dimlen + 2);
+ move = newname;
+ memcpy(move, name, namelen);
+ move += namelen;
- *move++ = '_';
- memcpy(move, dim, dimlen);
- move += dimlen;
- *move = '\0';
- } else {
- newname = name;
- }
+ *move++ = '_';
+ memcpy(move, dim, dimlen);
+ move += dimlen;
+ *move = '\0';
return newname;
}
@@ -422,6 +437,7 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt,
rc->delay_multiplier = rt->delay_multiplier;
rc->last_repeat = 0;
+ rc->times_repeat = 0;
rc->warn_repeat_every = rt->warn_repeat_every;
rc->crit_repeat_every = rt->crit_repeat_every;
@@ -534,6 +550,7 @@ inline RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const ch
newrc->delay_multiplier = rc->delay_multiplier;
newrc->last_repeat = 0;
+ newrc->times_repeat = 0;
newrc->warn_repeat_every = rc->warn_repeat_every;
newrc->crit_repeat_every = rc->crit_repeat_every;
diff --git a/database/rrdcalc.h b/database/rrdcalc.h
index d7446f63a..2ae47788e 100644
--- a/database/rrdcalc.h
+++ b/database/rrdcalc.h
@@ -121,6 +121,7 @@ struct rrdcalc {
time_t next_update; // the next update timestamp of the alarm
time_t last_status_change; // the timestamp of the last time this alarm changed status
time_t last_repeat; // the last time the alarm got repeated
+ uint32_t times_repeat; // number of times the alarm got repeated
time_t db_after; // the first timestamp evaluated by the db lookup
time_t db_before; // the last timestamp evaluated by the db lookup
diff --git a/database/rrddim.c b/database/rrddim.c
index df45363bc..e488d8b0b 100644
--- a/database/rrddim.c
+++ b/database/rrddim.c
@@ -2,6 +2,10 @@
#define NETDATA_RRD_INTERNALS
#include "rrd.h"
+#ifdef ENABLE_DBENGINE
+#include "database/engine/rrdengineapi.h"
+#endif
+#include "storage_engine.h"
// ----------------------------------------------------------------------------
// RRDDIM index
@@ -38,14 +42,15 @@ inline RRDDIM *rrddim_find(RRDSET *st, const char *id) {
// RRDDIM rename a dimension
inline int rrddim_set_name(RRDSET *st, RRDDIM *rd, const char *name) {
- if(unlikely(!name || !*name || !strcmp(rd->name, name)))
+ if(unlikely(!name || !*name || (rd->name && !strcmp(rd->name, name))))
return 0;
debug(D_RRD_CALLS, "rrddim_set_name() from %s.%s to %s.%s", st->name, rd->name, st->name, name);
- char varname[CONFIG_MAX_NAME + 1];
- snprintfz(varname, CONFIG_MAX_NAME, "dim %s name", rd->id);
- rd->name = config_set_default(st->config_section, varname, name);
+ if (rd->name)
+ freez((void *) rd->name);
+
+ rd->name = strdupz(name);
rd->hash_name = simple_hash(rd->name);
if (!st->state->is_ar_chart)
@@ -96,113 +101,65 @@ inline int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor)
}
// ----------------------------------------------------------------------------
-// RRDDIM legacy data collection functions
-
-static void rrddim_collect_init(RRDDIM *rd) {
- rd->values[rd->rrdset->current_entry] = SN_EMPTY_SLOT;
-}
-static void rrddim_collect_store_metric(RRDDIM *rd, usec_t point_in_time, storage_number number) {
- (void)point_in_time;
-
- rd->values[rd->rrdset->current_entry] = number;
-}
-static int rrddim_collect_finalize(RRDDIM *rd) {
- (void)rd;
-
- return 0;
-}
-
-// ----------------------------------------------------------------------------
-// RRDDIM legacy database query functions
-
-static void rrddim_query_init(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time) {
- handle->rd = rd;
- handle->start_time = start_time;
- handle->end_time = end_time;
- handle->slotted.slot = rrdset_time2slot(rd->rrdset, start_time);
- handle->slotted.last_slot = rrdset_time2slot(rd->rrdset, end_time);
- handle->slotted.finished = 0;
-}
-
-static storage_number rrddim_query_next_metric(struct rrddim_query_handle *handle, time_t *current_time) {
- RRDDIM *rd = handle->rd;
- long entries = rd->rrdset->entries;
- long slot = handle->slotted.slot;
-
- (void)current_time;
- if (unlikely(handle->slotted.slot == handle->slotted.last_slot))
- handle->slotted.finished = 1;
- storage_number n = rd->values[slot++];
-
- if(unlikely(slot >= entries)) slot = 0;
- handle->slotted.slot = slot;
-
- return n;
-}
-
-static int rrddim_query_is_finished(struct rrddim_query_handle *handle) {
- return handle->slotted.finished;
-}
-
-static void rrddim_query_finalize(struct rrddim_query_handle *handle) {
- (void)handle;
-
- return;
-}
-
-static time_t rrddim_query_latest_time(RRDDIM *rd) {
- return rrdset_last_entry_t_nolock(rd->rrdset);
-}
-
-static time_t rrddim_query_oldest_time(RRDDIM *rd) {
- return rrdset_first_entry_t_nolock(rd->rrdset);
-}
-
-
-// ----------------------------------------------------------------------------
// RRDDIM create a dimension
void rrdcalc_link_to_rrddim(RRDDIM *rd, RRDSET *st, RRDHOST *host) {
RRDCALC *rrdc;
+
for (rrdc = host->alarms_with_foreach; rrdc ; rrdc = rrdc->next) {
if (simple_pattern_matches(rrdc->spdim, rd->id) || simple_pattern_matches(rrdc->spdim, rd->name)) {
if (rrdc->hash_chart == st->hash_name || !strcmp(rrdc->chart, st->name) || !strcmp(rrdc->chart, st->id)) {
char *name = alarm_name_with_dim(rrdc->name, strlen(rrdc->name), rd->name, strlen(rd->name));
- if (name) {
- if(rrdcalc_exists(host, st->name, name, 0, 0)){
- freez(name);
- continue;
- }
+ if(rrdcalc_exists(host, st->name, name, 0, 0)) {
+ freez(name);
+ continue;
+ }
+
+ netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock);
+ RRDCALC *child = rrdcalc_create_from_rrdcalc(rrdc, host, name, rd->name);
+ netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
- RRDCALC *child = rrdcalc_create_from_rrdcalc(rrdc, host, name, rd->name);
- if (child) {
- rrdcalc_add_to_host(host, child);
- RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl_t *)child);
- if (rdcmp != child) {
- error("Cannot insert the alarm index ID %s",child->name);
- }
- } else {
- error("Cannot allocate a new alarm.");
- rrdc->foreachcounter--;
+ if (child) {
+ rrdcalc_add_to_host(host, child);
+ RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl_t *)child);
+ if (rdcmp != child) {
+ error("Cannot insert the alarm index ID %s",child->name);
}
}
+ else {
+ error("Cannot allocate a new alarm.");
+ rrdc->foreachcounter--;
+ }
}
}
}
-#ifdef ENABLE_ACLK
- rrdset_flag_clear(st, RRDSET_FLAG_ACLK);
-#endif
}
+// Return either
+// 0 : Dimension is live
+// last collected time : Dimension is not live
+
+#if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
+time_t calc_dimension_liveness(RRDDIM *rd, time_t now)
+{
+ time_t last_updated = rd->last_collected_time.tv_sec;
+ int live;
+ if (rd->state->aclk_live_status == 1)
+ live =
+ ((now - last_updated) <
+ MIN(rrdset_free_obsolete_time, RRDSET_MINIMUM_DIM_OFFLINE_MULTIPLIER * rd->update_every));
+ else
+ live = ((now - last_updated) < RRDSET_MINIMUM_DIM_LIVE_MULTIPLIER * rd->update_every);
+ return live ? 0 : last_updated;
+}
+#endif
+
RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collected_number multiplier,
collected_number divisor, RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode)
{
RRDHOST *host = st->rrdhost;
rrdset_wrlock(st);
- rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK);
- rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED);
-
RRDDIM *rd = rrddim_find(st, id);
if(unlikely(rd)) {
debug(D_RRD_CALLS, "Cannot create rrd dimension '%s/%s', it already exists.", st->id, name?name:"<NONAME>");
@@ -227,11 +184,19 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte
debug(D_METADATALOG, "DIMENSION [%s] metadata updated", rd->id);
(void)sql_store_dimension(&rd->state->metric_uuid, rd->rrdset->chart_uuid, rd->id, rd->name, rd->multiplier, rd->divisor,
rd->algorithm);
+#if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
+ queue_dimension_to_aclk(rd, calc_dimension_liveness(rd, now_realtime_sec()));
+#endif
+ rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK);
+ rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED);
}
rrdset_unlock(st);
return rd;
}
+ rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK);
+ rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED);
+
char filename[FILENAME_MAX + 1];
char fullfilename[FILENAME_MAX + 1];
@@ -244,12 +209,11 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte
if(memory_mode == RRD_MEMORY_MODE_SAVE || memory_mode == RRD_MEMORY_MODE_MAP ||
memory_mode == RRD_MEMORY_MODE_RAM) {
- rd = (RRDDIM *)mymmap(
- (memory_mode == RRD_MEMORY_MODE_RAM) ? NULL : fullfilename
- , size
- , ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE)
- , 1
- );
+ rd = (RRDDIM *)netdata_mmap(
+ (memory_mode == RRD_MEMORY_MODE_RAM) ? NULL : fullfilename,
+ size,
+ ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE),
+ 1);
if(likely(rd)) {
// we have a file mapped for rd
@@ -369,30 +333,16 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte
rd->state->aclk_live_status = -1;
#endif
(void) find_dimension_uuid(st, rd, &(rd->state->metric_uuid));
- if(memory_mode == RRD_MEMORY_MODE_DBENGINE) {
+
+ STORAGE_ENGINE* eng = storage_engine_get(memory_mode);
+ rd->state->collect_ops = eng->api.collect_ops;
+ rd->state->query_ops = eng->api.query_ops;
+
#ifdef ENABLE_DBENGINE
+ if(memory_mode == RRD_MEMORY_MODE_DBENGINE) {
rrdeng_metric_init(rd);
- rd->state->collect_ops.init = rrdeng_store_metric_init;
- rd->state->collect_ops.store_metric = rrdeng_store_metric_next;
- rd->state->collect_ops.finalize = rrdeng_store_metric_finalize;
- rd->state->query_ops.init = rrdeng_load_metric_init;
- rd->state->query_ops.next_metric = rrdeng_load_metric_next;
- rd->state->query_ops.is_finished = rrdeng_load_metric_is_finished;
- rd->state->query_ops.finalize = rrdeng_load_metric_finalize;
- rd->state->query_ops.latest_time = rrdeng_metric_latest_time;
- rd->state->query_ops.oldest_time = rrdeng_metric_oldest_time;
-#endif
- } else {
- rd->state->collect_ops.init = rrddim_collect_init;
- rd->state->collect_ops.store_metric = rrddim_collect_store_metric;
- rd->state->collect_ops.finalize = rrddim_collect_finalize;
- rd->state->query_ops.init = rrddim_query_init;
- rd->state->query_ops.next_metric = rrddim_query_next_metric;
- rd->state->query_ops.is_finished = rrddim_query_is_finished;
- rd->state->query_ops.finalize = rrddim_query_finalize;
- rd->state->query_ops.latest_time = rrddim_query_latest_time;
- rd->state->query_ops.oldest_time = rrddim_query_oldest_time;
}
+#endif
store_active_dimension(&rd->state->metric_uuid);
rd->state->collect_ops.init(rd);
// append this dimension
@@ -437,22 +387,16 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte
ml_new_dimension(rd);
rrdset_unlock(st);
-#ifdef ENABLE_ACLK
- rrdset_flag_clear(st, RRDSET_FLAG_ACLK);
-#endif
return(rd);
}
// ----------------------------------------------------------------------------
// RRDDIM remove / free a dimension
-void rrddim_free_custom(RRDSET *st, RRDDIM *rd, int db_rotated)
+void rrddim_free(RRDSET *st, RRDDIM *rd)
{
ml_delete_dimension(rd);
-
-#ifndef ENABLE_ACLK
- UNUSED(db_rotated);
-#endif
+
debug(D_RRD_CALLS, "rrddim_free() %s.%s", st->name, rd->name);
if (!rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) {
@@ -483,10 +427,10 @@ void rrddim_free_custom(RRDSET *st, RRDDIM *rd, int db_rotated)
error("RRDDIM: INTERNAL ERROR: attempt to remove from index dimension '%s' on chart '%s', removed a different dimension.", rd->id, st->id);
// free(rd->annotations);
-#if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
- if (!netdata_exit)
- aclk_send_dimension_update(rd);
-#endif
+//#if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
+// if (!netdata_exit)
+// aclk_send_dimension_update(rd);
+//#endif
RRD_MEMORY_MODE rrd_memory_mode = rd->rrd_memory_mode;
switch(rrd_memory_mode) {
@@ -512,10 +456,6 @@ void rrddim_free_custom(RRDSET *st, RRDDIM *rd, int db_rotated)
freez(rd);
break;
}
-#ifdef ENABLE_ACLK
- if (db_rotated || RRD_MEMORY_MODE_DBENGINE != rrd_memory_mode)
- rrdset_flag_clear(st, RRDSET_FLAG_ACLK);
-#endif
}
@@ -532,12 +472,11 @@ int rrddim_hide(RRDSET *st, const char *id) {
error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, host->hostname);
return 1;
}
- (void) sql_set_dimension_option(&rd->state->metric_uuid, "hidden");
+ if (!rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN))
+ (void)sql_set_dimension_option(&rd->state->metric_uuid, "hidden");
rrddim_flag_set(rd, RRDDIM_FLAG_HIDDEN);
-#ifdef ENABLE_ACLK
- rrdset_flag_clear(st, RRDSET_FLAG_ACLK);
-#endif
+ rrddim_flag_set(rd, RRDDIM_FLAG_META_HIDDEN);
return 0;
}
@@ -550,12 +489,11 @@ int rrddim_unhide(RRDSET *st, const char *id) {
error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, host->hostname);
return 1;
}
- (void) sql_set_dimension_option(&rd->state->metric_uuid, NULL);
+ if (rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN))
+ (void)sql_set_dimension_option(&rd->state->metric_uuid, NULL);
rrddim_flag_clear(rd, RRDDIM_FLAG_HIDDEN);
-#ifdef ENABLE_ACLK
- rrdset_flag_clear(st, RRDSET_FLAG_ACLK);
-#endif
+ rrddim_flag_clear(rd, RRDDIM_FLAG_META_HIDDEN);
return 0;
}
@@ -568,18 +506,12 @@ inline void rrddim_is_obsolete(RRDSET *st, RRDDIM *rd) {
}
rrddim_flag_set(rd, RRDDIM_FLAG_OBSOLETE);
rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS);
-#ifdef ENABLE_ACLK
- rrdset_flag_clear(st, RRDSET_FLAG_ACLK);
-#endif
}
inline void rrddim_isnot_obsolete(RRDSET *st __maybe_unused, RRDDIM *rd) {
debug(D_RRD_CALLS, "rrddim_isnot_obsolete() for chart %s, dimension %s", st->name, rd->name);
rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE);
-#ifdef ENABLE_ACLK
- rrdset_flag_clear(st, RRDSET_FLAG_ACLK);
-#endif
}
// ----------------------------------------------------------------------------
diff --git a/database/rrdhost.c b/database/rrdhost.c
index 649736ca4..cb56bf353 100644
--- a/database/rrdhost.c
+++ b/database/rrdhost.c
@@ -181,6 +181,8 @@ RRDHOST *rrdhost_create(const char *hostname,
host->rrdpush_send_enabled = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key) ? 1 : 0;
host->rrdpush_send_destination = (host->rrdpush_send_enabled)?strdupz(rrdpush_destination):NULL;
+ if (host->rrdpush_send_destination)
+ host->destinations = destinations_init(host->rrdpush_send_destination);
host->rrdpush_send_api_key = (host->rrdpush_send_enabled)?strdupz(rrdpush_api_key):NULL;
host->rrdpush_send_charts_matching = simple_pattern_create(rrdpush_send_charts_matching, NULL, SIMPLE_PATTERN_EXACT);
@@ -390,6 +392,7 @@ RRDHOST *rrdhost_create(const char *hostname,
if (is_localhost && host->system_info) {
host->system_info->ml_capable = ml_capable();
host->system_info->ml_enabled = ml_enabled(host);
+ host->system_info->mc_version = enable_metric_correlations ? metric_correlations_version : 0;
}
ml_new_host(host);
@@ -698,7 +701,7 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info) {
if (gap_when_lost_iterations_above < 1)
gap_when_lost_iterations_above = 1;
- if (unlikely(sql_init_database(DB_CHECK_NONE))) {
+ if (unlikely(sql_init_database(DB_CHECK_NONE, 0))) {
if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE)
fatal("Failed to initialize SQLite");
info("Skipping SQLITE metadata initialization since memory mode is not db engine");
@@ -1488,9 +1491,8 @@ restart_after_removal:
continue;
}
#if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
- else {
- aclk_send_dimension_update(rd);
- }
+ else
+ queue_dimension_to_aclk(rd, rd->last_collected_time.tv_sec);
#endif
}
last = rd;
@@ -1528,6 +1530,18 @@ restart_after_removal:
}
}
+void rrdset_check_obsoletion(RRDHOST *host)
+{
+ RRDSET *st;
+ time_t last_entry_t;
+ rrdset_foreach_read(st, host) {
+ last_entry_t = rrdset_last_entry_t(st);
+ if (last_entry_t && last_entry_t < host->senders_connect_time) {
+ rrdset_is_obsolete(st);
+ }
+ }
+}
+
void rrd_cleanup_obsolete_charts()
{
rrd_rdlock();
@@ -1547,6 +1561,16 @@ void rrd_cleanup_obsolete_charts()
#endif
rrdhost_unlock(host);
}
+
+ if (host != localhost &&
+ host->trigger_chart_obsoletion_check &&
+ host->senders_last_chart_command &&
+ host->senders_last_chart_command + 120 < now_realtime_sec()) {
+ rrdhost_rdlock(host);
+ rrdset_check_obsoletion(host);
+ rrdhost_unlock(host);
+ host->trigger_chart_obsoletion_check = 0;
+ }
}
rrd_unlock();
diff --git a/database/rrdset.c b/database/rrdset.c
index f8e471be7..e7cb89df0 100644
--- a/database/rrdset.c
+++ b/database/rrdset.c
@@ -125,7 +125,7 @@ char *rrdset_strncpyz_name(char *to, const char *from, size_t length) {
char c, *p = to;
while (length-- && (c = *from++)) {
- if(c != '.' && !isalnum(c))
+ if(c != '.' && c != '-' && !isalnum(c))
c = '_';
*p++ = c;
@@ -366,11 +366,6 @@ void rrdset_free(RRDSET *st) {
rrdvar_free_remaining_variables(host, &st->rrdvar_root_index);
// ------------------------------------------------------------------------
- // remove it from the configuration
-
- appconfig_section_destroy_non_loaded(&netdata_config, st->config_section);
-
- // ------------------------------------------------------------------------
// unlink it from the host
if(st == host->rrdset_root) {
@@ -402,10 +397,10 @@ void rrdset_free(RRDSET *st) {
freez(st->units);
freez(st->context);
freez(st->cache_dir);
- freez(st->config_section);
freez(st->plugin_name);
freez(st->module_name);
freez(st->state->old_title);
+ freez(st->state->old_units);
freez(st->state->old_context);
free_label_list(st->state->labels.head);
freez(st->state);
@@ -557,6 +552,10 @@ RRDSET *rrdset_create_custom(
return NULL;
}
+ if (host != localhost) {
+ host->senders_last_chart_command = now_realtime_sec();
+ }
+
// ------------------------------------------------------------------------
// check if it already exists
@@ -567,15 +566,13 @@ RRDSET *rrdset_create_custom(
RRDSET *st = rrdset_find_on_create(host, fullid);
if (st) {
int mark_rebuild = 0;
- rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK);
- rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED);
if (rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED)) {
rrdset_flag_clear(st, RRDSET_FLAG_ARCHIVED);
changed_from_archived_to_active = 1;
mark_rebuild |= META_CHART_ACTIVATED;
}
char *old_plugin = NULL, *old_module = NULL, *old_title = NULL, *old_context = NULL,
- *old_title_v = NULL, *old_context_v = NULL;
+ *old_title_v = NULL, *old_context_v = NULL, *old_units_v = NULL, *old_units = NULL;
int rc;
if(unlikely(name))
@@ -635,6 +632,17 @@ RRDSET *rrdset_create_custom(
mark_rebuild |= META_CHART_UPDATED;
}
+ if (unlikely(units && st->state->old_units && strcmp(st->state->old_units, units))) {
+ char *new_units = strdupz(units);
+ old_units_v = st->state->old_units;
+ st->state->old_units = strdupz(units);
+ json_fix_string(new_units);
+ old_units= st->units;
+ st->units = new_units;
+ mark_rebuild |= META_CHART_UPDATED;
+ }
+
+
if (st->chart_type != chart_type) {
st->chart_type = chart_type;
mark_rebuild |= META_CHART_UPDATED;
@@ -671,8 +679,10 @@ RRDSET *rrdset_create_custom(
freez(old_plugin);
freez(old_module);
freez(old_title);
+ freez(old_units);
freez(old_context);
freez(old_title_v);
+ freez(old_units_v);
freez(old_context_v);
if (mark_rebuild != META_CHART_ACTIVATED) {
info("Collector updated metadata for chart %s", st->id);
@@ -684,6 +694,11 @@ RRDSET *rrdset_create_custom(
int rc = update_chart_metadata(st->chart_uuid, st, id, name);
if (unlikely(rc))
error_report("Failed to update chart metadata in the database");
+
+ if (!changed_from_archived_to_active) {
+ rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK);
+ rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED);
+ }
}
/* Fall-through during switch from archived to active so that the host lock is taken and health is linked */
if (!changed_from_archived_to_active)
@@ -713,26 +728,14 @@ RRDSET *rrdset_create_custom(
char fullfilename[FILENAME_MAX + 1];
// ------------------------------------------------------------------------
- // compose the config_section for this chart
-
- char config_section[RRD_ID_LENGTH_MAX + GUID_LEN + 2];
- if(host == localhost)
- strcpy(config_section, fullid);
- else
- snprintfz(config_section, RRD_ID_LENGTH_MAX + GUID_LEN + 1, "%s/%s", host->machine_guid, fullid);
-
- // ------------------------------------------------------------------------
// get the options from the config, we need to create it
- long entries;
- int enabled = config_get_boolean(config_section, "enabled", 1);
- if(!enabled || memory_mode == RRD_MEMORY_MODE_DBENGINE)
- entries = 5;
- else
+ long entries = 5;
+ if (memory_mode != RRD_MEMORY_MODE_DBENGINE)
entries = align_entries_to_pagesize(memory_mode, history_entries);
unsigned long size = sizeof(RRDSET);
- char *cache_dir = rrdset_cache_dir(host, fullid, config_section);
+ char *cache_dir = rrdset_cache_dir(host, fullid);
time_t now = now_realtime_sec();
@@ -744,12 +747,11 @@ RRDSET *rrdset_create_custom(
snprintfz(fullfilename, FILENAME_MAX, "%s/main.db", cache_dir);
if(memory_mode == RRD_MEMORY_MODE_SAVE || memory_mode == RRD_MEMORY_MODE_MAP ||
memory_mode == RRD_MEMORY_MODE_RAM) {
- st = (RRDSET *) mymmap(
- (memory_mode == RRD_MEMORY_MODE_RAM) ? NULL : fullfilename
- , size
- , ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE)
- , 0
- );
+ st = (RRDSET *)netdata_mmap(
+ (memory_mode == RRD_MEMORY_MODE_RAM) ? NULL : fullfilename,
+ size,
+ ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE),
+ 0);
if(st) {
memset(&st->avl, 0, sizeof(avl_t));
@@ -759,7 +761,6 @@ RRDSET *rrdset_create_custom(
memset(&st->rrdset_rwlock, 0, sizeof(netdata_rwlock_t));
st->name = NULL;
- st->config_section = NULL;
st->type = NULL;
st->family = NULL;
st->title = NULL;
@@ -832,7 +833,6 @@ RRDSET *rrdset_create_custom(
st->plugin_name = plugin?strdupz(plugin):NULL;
st->module_name = module?strdupz(module):NULL;
- st->config_section = strdupz(config_section);
st->rrdhost = host;
st->memsize = size;
st->entries = entries;
@@ -859,6 +859,7 @@ RRDSET *rrdset_create_custom(
st->state->is_ar_chart = strcmp(st->id, ML_ANOMALY_RATES_CHART_ID) == 0;
st->units = units ? strdupz(units) : strdupz("");
+ st->state->old_units = strdupz(st->units);
json_fix_string(st->units);
st->context = context ? strdupz(context) : strdupz(st->id);
@@ -867,27 +868,9 @@ RRDSET *rrdset_create_custom(
st->hash_context = simple_hash(st->context);
st->priority = priority;
- if(enabled)
- rrdset_flag_set(st, RRDSET_FLAG_ENABLED);
- else
- rrdset_flag_clear(st, RRDSET_FLAG_ENABLED);
- rrdset_flag_clear(st, RRDSET_FLAG_DETAIL);
- rrdset_flag_clear(st, RRDSET_FLAG_DEBUG);
- rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE);
- rrdset_flag_clear(st, RRDSET_FLAG_EXPORTING_SEND);
- rrdset_flag_clear(st, RRDSET_FLAG_EXPORTING_IGNORE);
- rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND);
- rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_IGNORE);
- rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED);
rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK);
- // if(!strcmp(st->id, "disk_util.dm-0")) {
- // st->debug = 1;
- // error("enabled debugging for '%s'", st->id);
- // }
- // else error("not enabled debugging for '%s'", st->id);
-
st->green = NAN;
st->red = NAN;
@@ -960,7 +943,7 @@ RRDSET *rrdset_create_custom(
// RRDSET - data collection iteration control
inline void rrdset_next_usec_unfiltered(RRDSET *st, usec_t microseconds) {
- if(unlikely(!st->last_collected_time.tv_sec || !microseconds || (rrdset_flag_check_noatomic(st, RRDSET_FLAG_SYNC_CLOCK)))) {
+ if(unlikely(!st->last_collected_time.tv_sec || !microseconds || (rrdset_flag_check(st, RRDSET_FLAG_SYNC_CLOCK)))) {
// call the full next_usec() function
rrdset_next_usec(st, microseconds);
return;
@@ -978,7 +961,7 @@ inline void rrdset_next_usec(RRDSET *st, usec_t microseconds) {
usec_t discarded = microseconds;
#endif
- if(unlikely(rrdset_flag_check_noatomic(st, RRDSET_FLAG_SYNC_CLOCK))) {
+ if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_SYNC_CLOCK))) {
// the chart needs to be re-synced to current time
rrdset_flag_clear(st, RRDSET_FLAG_SYNC_CLOCK);
@@ -1010,7 +993,9 @@ inline void rrdset_next_usec(RRDSET *st, usec_t microseconds) {
if(unlikely(since_last_usec < 0)) {
// oops! the database is in the future
+ #ifdef NETDATA_INTERNAL_CHECKS
info("RRD database for chart '%s' on host '%s' is %0.5" LONG_DOUBLE_MODIFIER " secs in the future (counter #%zu, update #%zu). Adjusting it to current time.", st->id, st->rrdhost->hostname, (LONG_DOUBLE)-since_last_usec / USEC_PER_SEC, st->counter, st->counter_done);
+ #endif
st->last_collected_time.tv_sec = now.tv_sec - st->update_every;
st->last_collected_time.tv_usec = now.tv_usec;
@@ -1027,7 +1012,9 @@ inline void rrdset_next_usec(RRDSET *st, usec_t microseconds) {
}
else if(unlikely((usec_t)since_last_usec > (usec_t)(st->update_every * 5 * USEC_PER_SEC))) {
// oops! the database is too far behind
+ #ifdef NETDATA_INTERNAL_CHECKS
info("RRD database for chart '%s' on host '%s' is %0.5" LONG_DOUBLE_MODIFIER " secs in the past (counter #%zu, update #%zu). Adjusting it to current time.", st->id, st->rrdhost->hostname, (LONG_DOUBLE)since_last_usec / USEC_PER_SEC, st->counter, st->counter_done);
+ #endif
microseconds = (usec_t)since_last_usec;
#ifdef NETDATA_INTERNAL_CHECKS
@@ -1403,8 +1390,9 @@ void rrdset_done(RRDSET *st) {
#ifdef ENABLE_ACLK
if (likely(!st->state->is_ar_chart)) {
if (unlikely(!rrdset_flag_check(st, RRDSET_FLAG_ACLK))) {
- if (likely(st->dimensions && st->counter_done && !queue_chart_to_aclk(st)))
+ if (likely(st->dimensions && st->counter_done && !queue_chart_to_aclk(st))) {
rrdset_flag_set(st, RRDSET_FLAG_ACLK);
+ }
}
}
#endif
@@ -1823,7 +1811,7 @@ after_first_database_work:
after_second_database_work:
st->last_collected_total = st->collected_total;
-#ifdef ENABLE_NEW_CLOUD_PROTOCOL
+#if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
time_t mark = now_realtime_sec();
#endif
rrddim_foreach_read(rd, st) {
@@ -1831,20 +1819,10 @@ after_second_database_work:
continue;
#if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
- if (likely(!st->state->is_ar_chart)) {
- if (!rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN)) {
- int live =
- ((mark - rd->last_collected_time.tv_sec) < RRDSET_MINIMUM_DIM_LIVE_MULTIPLIER * rd->update_every);
- if (unlikely(live != rd->state->aclk_live_status)) {
- if (likely(rrdset_flag_check(st, RRDSET_FLAG_ACLK))) {
- if (likely(!queue_dimension_to_aclk(rd))) {
- rd->state->aclk_live_status = live;
- rrddim_flag_set(rd, RRDDIM_FLAG_ACLK);
- }
- }
- }
+ if (likely(!st->state->is_ar_chart)) {
+ if (!rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN) && likely(rrdset_flag_check(st, RRDSET_FLAG_ACLK)))
+ queue_dimension_to_aclk(rd, calc_dimension_liveness(rd, mark));
}
- }
#endif
if(unlikely(!rd->updated))
continue;
@@ -1946,7 +1924,7 @@ after_second_database_work:
} else {
/* Do not delete this dimension */
#if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
- aclk_send_dimension_update(rd);
+ queue_dimension_to_aclk(rd, calc_dimension_liveness(rd, mark));
#endif
last = rd;
rd = rd->next;
@@ -1996,7 +1974,7 @@ void rrdset_finalize_labels(RRDSET *st)
replace_label_list(labels, new_labels);
}
- netdata_rwlock_wrlock(&labels->labels_rwlock);
+ netdata_rwlock_rdlock(&labels->labels_rwlock);
struct label *lbl = labels->head;
while (lbl) {
sql_store_chart_label(st->chart_uuid, (int)lbl->label_source, lbl->key, lbl->value);
diff --git a/database/sqlite/sqlite_aclk.c b/database/sqlite/sqlite_aclk.c
index 989328097..950856d9a 100644
--- a/database/sqlite/sqlite_aclk.c
+++ b/database/sqlite/sqlite_aclk.c
@@ -10,6 +10,11 @@
#include "../../aclk/aclk.h"
#endif
+void sanity_check(void) {
+ // make sure the compiler will stop on misconfigurations
+ BUILD_BUG_ON(WORKER_UTILIZATION_MAX_JOB_TYPES < ACLK_MAX_ENUMERATIONS_DEFINED);
+}
+
const char *aclk_sync_config[] = {
"CREATE TABLE IF NOT EXISTS dimension_delete (dimension_id blob, dimension_name text, chart_type_id text, "
"dim_id blob, chart_id blob, host_id blob, date_created);",
@@ -29,6 +34,28 @@ const char *aclk_sync_config[] = {
uv_mutex_t aclk_async_lock;
struct aclk_database_worker_config *aclk_thread_head = NULL;
+int retention_running = 0;
+
+#ifdef ENABLE_NEW_CLOUD_PROTOCOL
+static void stop_retention_run()
+{
+ uv_mutex_lock(&aclk_async_lock);
+ retention_running = 0;
+ uv_mutex_unlock(&aclk_async_lock);
+}
+
+static int request_retention_run()
+{
+ int rc = 0;
+ uv_mutex_lock(&aclk_async_lock);
+ if (unlikely(retention_running))
+ rc = 1;
+ else
+ retention_running = 1;
+ uv_mutex_unlock(&aclk_async_lock);
+ return rc;
+}
+#endif
int claimed()
{
@@ -313,9 +340,6 @@ static void timer_cb(uv_timer_t* handle)
if (aclk_use_new_cloud_arch && aclk_connected) {
if (wc->rotation_after && wc->rotation_after < now) {
- cmd.opcode = ACLK_DATABASE_NODE_INFO;
- aclk_database_enq_cmd_noblock(wc, &cmd);
-
cmd.opcode = ACLK_DATABASE_UPD_RETENTION;
if (!aclk_database_enq_cmd_noblock(wc, &cmd))
wc->rotation_after += ACLK_DATABASE_ROTATION_INTERVAL;
@@ -339,7 +363,7 @@ static void timer_cb(uv_timer_t* handle)
}
}
- if (wc->alert_updates) {
+ if (wc->alert_updates && !wc->pause_alert_updates) {
cmd.opcode = ACLK_DATABASE_PUSH_ALERT;
cmd.count = ACLK_MAX_ALERT_UPDATES;
aclk_database_enq_cmd_noblock(wc, &cmd);
@@ -348,10 +372,65 @@ static void timer_cb(uv_timer_t* handle)
#endif
}
+
+#ifdef ENABLE_NEW_CLOUD_PROTOCOL
+void after_send_retention(uv_work_t *req, int status)
+{
+ struct aclk_database_worker_config *wc = req->data;
+ (void)status;
+ stop_retention_run();
+ wc->retention_running = 0;
+
+ struct aclk_database_cmd cmd;
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = ACLK_DATABASE_DIM_DELETION;
+ if (aclk_database_enq_cmd_noblock(wc, &cmd))
+ info("Failed to queue a dimension deletion message");
+
+ cmd.opcode = ACLK_DATABASE_NODE_INFO;
+ if (aclk_database_enq_cmd_noblock(wc, &cmd))
+ info("Failed to queue a node update info message");
+}
+
+
+static void send_retention(uv_work_t *req)
+{
+ struct aclk_database_worker_config *wc = req->data;
+
+ if (unlikely(wc->is_shutting_down))
+ return;
+
+ aclk_update_retention(wc);
+}
+#endif
+
#define MAX_CMD_BATCH_SIZE (256)
void aclk_database_worker(void *arg)
{
+ worker_register("ACLKSYNC");
+ worker_register_job_name(ACLK_DATABASE_NOOP, "noop");
+#ifdef ENABLE_NEW_CLOUD_PROTOCOL
+ worker_register_job_name(ACLK_DATABASE_ADD_CHART, "chart add");
+ worker_register_job_name(ACLK_DATABASE_ADD_DIMENSION, "dimension add");
+ worker_register_job_name(ACLK_DATABASE_PUSH_CHART, "chart push");
+ worker_register_job_name(ACLK_DATABASE_PUSH_CHART_CONFIG, "chart conf push");
+ worker_register_job_name(ACLK_DATABASE_RESET_CHART, "chart reset");
+ worker_register_job_name(ACLK_DATABASE_CHART_ACK, "chart ack");
+ worker_register_job_name(ACLK_DATABASE_UPD_RETENTION, "retention check");
+ worker_register_job_name(ACLK_DATABASE_DIM_DELETION, "dimension delete");
+ worker_register_job_name(ACLK_DATABASE_ORPHAN_HOST, "node orphan");
+#endif
+ worker_register_job_name(ACLK_DATABASE_ALARM_HEALTH_LOG, "alert log");
+ worker_register_job_name(ACLK_DATABASE_CLEANUP, "cleanup");
+ worker_register_job_name(ACLK_DATABASE_DELETE_HOST, "node delete");
+ worker_register_job_name(ACLK_DATABASE_NODE_INFO, "node info");
+ worker_register_job_name(ACLK_DATABASE_PUSH_ALERT, "alert push");
+ worker_register_job_name(ACLK_DATABASE_PUSH_ALERT_CONFIG, "alert conf push");
+ worker_register_job_name(ACLK_DATABASE_PUSH_ALERT_SNAPSHOT, "alert snapshot");
+ worker_register_job_name(ACLK_DATABASE_QUEUE_REMOVED_ALERTS, "alerts check");
+ worker_register_job_name(ACLK_DATABASE_TIMER, "timer");
+
struct aclk_database_worker_config *wc = arg;
uv_loop_t *loop;
int ret;
@@ -401,6 +480,7 @@ void aclk_database_worker(void *arg)
memset(&cmd, 0, sizeof(cmd));
#ifdef ENABLE_NEW_CLOUD_PROTOCOL
+ uv_work_t retention_work;
sql_get_last_chart_sequence(wc);
wc->chart_payload_count = sql_get_pending_count(wc);
if (!wc->chart_payload_count)
@@ -412,7 +492,9 @@ void aclk_database_worker(void *arg)
wc->rotation_after = wc->startup_time + ACLK_DATABASE_ROTATION_DELAY;
debug(D_ACLK_SYNC,"Node %s reports pending message count = %u", wc->node_id, wc->chart_payload_count);
+
while (likely(!netdata_exit)) {
+ worker_is_idle();
uv_run(loop, UV_RUN_DEFAULT);
/* wait for commands */
@@ -427,6 +509,10 @@ void aclk_database_worker(void *arg)
opcode = cmd.opcode;
++cmd_batch_size;
+
+ if(likely(opcode != ACLK_DATABASE_NOOP))
+ worker_is_busy(opcode);
+
switch (opcode) {
case ACLK_DATABASE_NOOP:
/* the command queue was empty, do nothing */
@@ -439,6 +525,7 @@ void aclk_database_worker(void *arg)
if (wc->host == localhost)
sql_check_aclk_table_list(wc);
break;
+
case ACLK_DATABASE_DELETE_HOST:
debug(D_ACLK_SYNC,"Cleaning ACLK tables for %s", (char *) cmd.data);
sql_delete_aclk_table_list(wc, cmd);
@@ -504,9 +591,21 @@ void aclk_database_worker(void *arg)
aclk_process_dimension_deletion(wc, cmd);
break;
case ACLK_DATABASE_UPD_RETENTION:
+ if (unlikely(wc->retention_running))
+ break;
+
+ if (unlikely(request_retention_run())) {
+ wc->rotation_after = now_realtime_sec() + ACLK_DATABASE_RETENTION_RETRY;
+ break;
+ }
+
debug(D_ACLK_SYNC,"Sending retention info for %s", wc->uuid_str);
- aclk_update_retention(wc, cmd);
- aclk_process_dimension_deletion(wc, cmd);
+ retention_work.data = wc;
+ wc->retention_running = 1;
+ if (unlikely(uv_queue_work(loop, &retention_work, send_retention, after_send_retention))) {
+ wc->retention_running = 0;
+ stop_retention_run();
+ }
break;
// NODE_INSTANCE DETECTION
@@ -535,6 +634,8 @@ void aclk_database_worker(void *arg)
cmd.completion = NULL;
wc->node_info_send = aclk_database_enq_cmd_noblock(wc, &cmd);
}
+ if (localhost == wc->host)
+ (void) sqlite3_wal_checkpoint(db_meta, NULL);
break;
default:
debug(D_ACLK_SYNC, "%s: default.", __func__);
@@ -577,6 +678,8 @@ void aclk_database_worker(void *arg)
wc->host->dbsync_worker = NULL;
freez(wc);
rrd_unlock();
+
+ worker_unregister();
return;
error_after_timer_init:
@@ -585,6 +688,7 @@ error_after_async_init:
fatal_assert(0 == uv_loop_close(loop));
error_after_loop_init:
freez(loop);
+ worker_unregister();
}
// -------------------------------------------------------------
@@ -628,7 +732,7 @@ void sql_create_aclk_table(RRDHOST *host, uuid_t *host_uuid, uuid_t *node_id)
db_execute(buffer_tostring(sql));
buffer_flush(sql);
- buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str, uuid_str, uuid_str);
+ buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str);
db_execute(buffer_tostring(sql));
buffer_flush(sql);
diff --git a/database/sqlite/sqlite_aclk.h b/database/sqlite/sqlite_aclk.h
index 894d93489..37e3d4530 100644
--- a/database/sqlite/sqlite_aclk.h
+++ b/database/sqlite/sqlite_aclk.h
@@ -16,7 +16,8 @@
#endif
#define ACLK_MAX_ALERT_UPDATES (5)
#define ACLK_DATABASE_CLEANUP_FIRST (60)
-#define ACLK_DATABASE_ROTATION_DELAY (60)
+#define ACLK_DATABASE_ROTATION_DELAY (180)
+#define ACLK_DATABASE_RETENTION_RETRY (60)
#define ACLK_DATABASE_CLEANUP_INTERVAL (3600)
#define ACLK_DATABASE_ROTATION_INTERVAL (3600)
#define ACLK_DELETE_ACK_INTERNAL (600)
@@ -103,9 +104,7 @@ static inline char *get_str_from_uuid(uuid_t *uuid)
#define TABLE_ACLK_ALERT "CREATE TABLE IF NOT EXISTS aclk_alert_%s (sequence_id INTEGER PRIMARY KEY, " \
"alert_unique_id, date_created, date_submitted, date_cloud_ack, " \
- "unique(alert_unique_id)); " \
- "insert into aclk_alert_%s (alert_unique_id, date_created) " \
- "select unique_id alert_unique_id, strftime('%%s') date_created from health_log_%s where new_status <> 0 and new_status <> -2 order by unique_id asc on conflict (alert_unique_id) do nothing;"
+ "unique(alert_unique_id));"
#define INDEX_ACLK_CHART "CREATE INDEX IF NOT EXISTS aclk_chart_index_%s ON aclk_chart_%s (unique_id);"
@@ -135,7 +134,11 @@ enum aclk_database_opcode {
ACLK_DATABASE_PUSH_ALERT_CONFIG,
ACLK_DATABASE_PUSH_ALERT_SNAPSHOT,
ACLK_DATABASE_QUEUE_REMOVED_ALERTS,
- ACLK_DATABASE_TIMER
+ ACLK_DATABASE_TIMER,
+
+ // leave this last
+ // we need it to check for worker utilization
+ ACLK_MAX_ENUMERATIONS_DEFINED
};
struct aclk_chart_payload_t {
@@ -176,6 +179,7 @@ struct aclk_database_worker_config {
uint64_t alerts_batch_id; // batch id for alerts to use
uint64_t alerts_start_seq_id; // cloud has asked to start streaming from
uint64_t alert_sequence_id; // last alert sequence_id
+ int pause_alert_updates;
uint32_t chart_payload_count;
uint64_t alerts_snapshot_id; //will contain the snapshot_id value if snapshot was requested
uint64_t alerts_ack_sequence_id; //last sequence_id ack'ed from cloud via sendsnapshot message
@@ -194,6 +198,7 @@ struct aclk_database_worker_config {
int node_info_send;
int chart_pending;
int chart_reset_count;
+ int retention_running;
volatile unsigned is_shutting_down;
volatile unsigned is_orphan;
struct aclk_database_worker_config *next;
diff --git a/database/sqlite/sqlite_aclk_alert.c b/database/sqlite/sqlite_aclk_alert.c
index 54e8be4a7..53c6c2a65 100644
--- a/database/sqlite/sqlite_aclk_alert.c
+++ b/database/sqlite/sqlite_aclk_alert.c
@@ -8,9 +8,120 @@
#include "../../aclk/aclk.h"
#endif
+time_t removed_when(uint32_t alarm_id, uint32_t before_unique_id, uint32_t after_unique_id, char *uuid_str) {
+ sqlite3_stmt *res = NULL;
+ int rc = 0;
+ time_t when = 0;
+ char sql[ACLK_SYNC_QUERY_SIZE];
+
+ snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, "select when_key from health_log_%s where alarm_id = %u " \
+ "and unique_id > %u and unique_id < %u " \
+ "and new_status = -2;", uuid_str, alarm_id, after_unique_id, before_unique_id);
+
+ rc = sqlite3_prepare_v2(db_meta, sql, -1, &res, 0);
+ if (rc != SQLITE_OK) {
+ error_report("Failed to prepare statement when trying to find removed gap.");
+ return 0;
+ }
+
+ rc = sqlite3_step(res);
+ if (likely(rc == SQLITE_ROW)) {
+ when = (time_t) sqlite3_column_int64(res, 0);
+ }
+
+ rc = sqlite3_finalize(res);
+ if (unlikely(rc != SQLITE_OK))
+ error_report("Failed to finalize statement when trying to find removed gap, rc = %d", rc);
+
+ return when;
+}
+
+#define MAX_REMOVED_PERIOD 900
+//decide if some events should be sent or not
+int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae)
+{
+ sqlite3_stmt *res = NULL;
+ char uuid_str[GUID_LEN + 1];
+ uuid_unparse_lower_fix(&host->host_uuid, uuid_str);
+ int send = 1, rc = 0;
+
+ if (ae->new_status == RRDCALC_STATUS_REMOVED || ae->new_status == RRDCALC_STATUS_UNINITIALIZED) {
+ return 0;
+ }
+
+ if (unlikely(uuid_is_null(ae->config_hash_id)))
+ return 0;
+
+ char sql[ACLK_SYNC_QUERY_SIZE];
+ uuid_t config_hash_id;
+ RRDCALC_STATUS status;
+ uint32_t unique_id;
+
+ //get the previous sent event of this alarm_id
+ snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, "select hl.new_status, hl.config_hash_id, hl.unique_id from health_log_%s hl, aclk_alert_%s aa \
+ where hl.unique_id = aa.alert_unique_id \
+ and hl.alarm_id = %u and hl.unique_id <> %u \
+ order by alarm_event_id desc LIMIT 1;", uuid_str, uuid_str, ae->alarm_id, ae->unique_id);
+
+ rc = sqlite3_prepare_v2(db_meta, sql, -1, &res, 0);
+ if (rc != SQLITE_OK) {
+ error_report("Failed to prepare statement when trying to filter alert events.");
+ send = 1;
+ return send;
+ }
+
+ rc = sqlite3_step(res);
+ if (likely(rc == SQLITE_ROW)) {
+ status = (RRDCALC_STATUS) sqlite3_column_int(res, 0);
+ if (sqlite3_column_type(res, 1) != SQLITE_NULL)
+ uuid_copy(config_hash_id, *((uuid_t *) sqlite3_column_blob(res, 1)));
+ unique_id = (uint32_t) sqlite3_column_int64(res, 2);
+
+ } else {
+ send = 1;
+ goto done;
+ }
+
+ if (ae->new_status != (RRDCALC_STATUS)status) {
+ send = 1;
+ goto done;
+ }
+
+ if (uuid_compare(ae->config_hash_id, config_hash_id)) {
+ send = 1;
+ goto done;
+ }
+
+ //same status, same config
+ if (ae->new_status == RRDCALC_STATUS_CLEAR) {
+ send = 0;
+ goto done;
+ }
+
+ //detect a long off period of the agent, TODO make global
+ if (ae->new_status == RRDCALC_STATUS_WARNING || ae->new_status == RRDCALC_STATUS_CRITICAL) {
+ time_t when = removed_when(ae->alarm_id, ae->unique_id, unique_id, uuid_str);
+
+ if (when && (when + (time_t)MAX_REMOVED_PERIOD) < ae->when) {
+ send = 1;
+ goto done;
+ } else {
+ send = 0;
+ goto done;
+ }
+ }
+
+done:
+ rc = sqlite3_finalize(res);
+ if (unlikely(rc != SQLITE_OK))
+ error_report("Failed to finalize statement when trying to filter alert events, rc = %d", rc);
+
+ return send;
+}
+
// will replace call to aclk_update_alarm in health/health_log.c
// and handle both cases
-int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae)
+int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae, int skip_filter)
{
//check aclk architecture and handle old json alarm update to cloud
//include also the valid statuses for this case
@@ -30,17 +141,15 @@ int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae)
if (!claimed())
return 0;
- if (ae->flags & HEALTH_ENTRY_FLAG_ACLK_QUEUED)
+ if (ae->flags & HEALTH_ENTRY_FLAG_ACLK_QUEUED) {
return 0;
+ }
- if (ae->new_status == RRDCALC_STATUS_REMOVED || ae->new_status == RRDCALC_STATUS_UNINITIALIZED)
- return 0;
-
- if (unlikely(!host->dbsync_worker))
- return 1;
-
- if (unlikely(uuid_is_null(ae->config_hash_id)))
- return 0;
+ if (!skip_filter) {
+ if (!should_send_to_cloud(host, ae)) {
+ return 0;
+ }
+ }
int rc = 0;
@@ -76,6 +185,10 @@ int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae)
}
ae->flags |= HEALTH_ENTRY_FLAG_ACLK_QUEUED;
+ struct aclk_database_worker_config *wc = (struct aclk_database_worker_config *)host->dbsync_worker;
+ if (wc) {
+ wc->pause_alert_updates = 0;
+ }
bind_fail:
if (unlikely(sqlite3_finalize(res_alert) != SQLITE_OK))
@@ -86,6 +199,7 @@ bind_fail:
#else
UNUSED(host);
UNUSED(ae);
+ UNUSED(skip_filter);
#endif
return 0;
}
@@ -283,6 +397,7 @@ void aclk_push_alert_event(struct aclk_database_worker_config *wc, struct aclk_d
wc->alerts_batch_id);
log_first_sequence_id = 0;
log_last_sequence_id = 0;
+ wc->pause_alert_updates = 1;
}
rc = sqlite3_finalize(res);
@@ -296,6 +411,27 @@ void aclk_push_alert_event(struct aclk_database_worker_config *wc, struct aclk_d
return;
}
+void sql_queue_existing_alerts_to_aclk(RRDHOST *host)
+{
+ char uuid_str[GUID_LEN + 1];
+ uuid_unparse_lower_fix(&host->host_uuid, uuid_str);
+ BUFFER *sql = buffer_create(1024);
+
+ buffer_sprintf(sql,"insert into aclk_alert_%s (alert_unique_id, date_created) " \
+ "select unique_id alert_unique_id, strftime('%%s') date_created from health_log_%s " \
+ "where new_status <> 0 and new_status <> -2 and config_hash_id is not null and updated_by_id = 0 " \
+ "order by unique_id asc on conflict (alert_unique_id) do nothing;", uuid_str, uuid_str);
+
+ db_execute(buffer_tostring(sql));
+
+ buffer_free(sql);
+
+ struct aclk_database_worker_config *wc = (struct aclk_database_worker_config *)host->dbsync_worker;
+ if (wc) {
+ wc->pause_alert_updates = 0;
+ }
+}
+
void aclk_send_alarm_health_log(char *node_id)
{
if (unlikely(!node_id))
@@ -421,6 +557,8 @@ void aclk_push_alarm_health_log(struct aclk_database_worker_config *wc, struct a
freez(claim_id);
buffer_free(sql);
+
+ aclk_alert_reloaded = 1;
#endif
return;
@@ -593,6 +731,9 @@ void aclk_start_alert_streaming(char *node_id, uint64_t batch_id, uint64_t start
log_access("ACLK STA [%s (N/A)]: Ignoring request to stream alert state changes, health is disabled.", node_id);
return;
}
+
+ if (unlikely(batch_id == 1) && unlikely(start_seq_id == 1))
+ sql_queue_existing_alerts_to_aclk(host);
} else
wc = (struct aclk_database_worker_config *)find_inactive_wc_by_node_id(node_id);
@@ -602,6 +743,7 @@ void aclk_start_alert_streaming(char *node_id, uint64_t batch_id, uint64_t start
wc->alerts_batch_id = batch_id;
wc->alerts_start_seq_id = start_seq_id;
wc->alert_updates = 1;
+ wc->pause_alert_updates = 0;
__sync_synchronize();
}
else
@@ -631,9 +773,11 @@ void sql_process_queue_removed_alerts_to_aclk(struct aclk_database_worker_config
db_execute(buffer_tostring(sql));
- log_access("ACLK STA [%s (%s)]: Queued removed alerts.", wc->node_id, wc->host ? wc->host->hostname : "N/A");
+ log_access("ACLK STA [%s (%s)]: QUEUED REMOVED ALERTS", wc->node_id, wc->host ? wc->host->hostname : "N/A");
buffer_free(sql);
+
+ wc->pause_alert_updates = 0;
#endif
return;
}
@@ -644,6 +788,9 @@ void sql_queue_removed_alerts_to_aclk(RRDHOST *host)
if (unlikely(!host->dbsync_worker))
return;
+ if (!claimed())
+ return;
+
struct aclk_database_cmd cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.opcode = ACLK_DATABASE_QUEUE_REMOVED_ALERTS;
@@ -912,9 +1059,6 @@ void sql_aclk_alert_clean_dead_entries(RRDHOST *host)
if (!claimed())
return;
- if (unlikely(!host->dbsync_worker))
- return;
-
char uuid_str[GUID_LEN + 1];
uuid_unparse_lower_fix(&host->host_uuid, uuid_str);
diff --git a/database/sqlite/sqlite_aclk_alert.h b/database/sqlite/sqlite_aclk_alert.h
index 957cb94ac..0181b4842 100644
--- a/database/sqlite/sqlite_aclk_alert.h
+++ b/database/sqlite/sqlite_aclk_alert.h
@@ -26,5 +26,6 @@ void sql_process_queue_removed_alerts_to_aclk(struct aclk_database_worker_config
void aclk_push_alert_snapshot_event(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd);
void aclk_process_send_alarm_snapshot(char *node_id, char *claim_id, uint64_t snapshot_id, uint64_t sequence_id);
int get_proto_alert_status(RRDHOST *host, struct proto_alert_status *proto_alert_status);
+extern int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae, int skip_filter);
#endif //NETDATA_SQLITE_ACLK_ALERT_H
diff --git a/database/sqlite/sqlite_aclk_chart.c b/database/sqlite/sqlite_aclk_chart.c
index 7afa1d451..a9db5282a 100644
--- a/database/sqlite/sqlite_aclk_chart.c
+++ b/database/sqlite/sqlite_aclk_chart.c
@@ -22,20 +22,20 @@ sql_queue_chart_payload(struct aclk_database_worker_config *wc, void *data, enum
return rc;
}
-static int payload_sent(char *uuid_str, uuid_t *uuid, void *payload, size_t payload_size)
+static time_t payload_sent(char *uuid_str, uuid_t *uuid, void *payload, size_t payload_size)
{
static __thread sqlite3_stmt *res = NULL;
int rc;
- int send_status = 0;
+ time_t send_status = 0;
if (unlikely(!res)) {
char sql[ACLK_SYNC_QUERY_SIZE];
- snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, "SELECT 1 FROM aclk_chart_latest_%s acl, aclk_chart_payload_%s acp "
- "WHERE acl.unique_id = acp.unique_id AND acl.uuid = @uuid AND acp.payload = @payload;",
- uuid_str, uuid_str);
+ snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, "SELECT acl.date_submitted FROM aclk_chart_latest_%s acl, aclk_chart_payload_%s acp "
+ "WHERE acl.unique_id = acp.unique_id AND acl.uuid = @uuid AND acp.payload = @payload;",
+ uuid_str, uuid_str);
rc = prepare_statement(db_meta, sql, &res);
if (rc != SQLITE_OK) {
- error_report("Failed to prepare statement to check payload data");
+ error_report("Failed to prepare statement to check payload data on %s", sql);
return 0;
}
}
@@ -49,7 +49,7 @@ static int payload_sent(char *uuid_str, uuid_t *uuid, void *payload, size_t payl
goto bind_fail;
while (sqlite3_step(res) == SQLITE_ROW) {
- send_status = sqlite3_column_int(res, 0);
+ send_status = (time_t) sqlite3_column_int64(res, 0);
}
bind_fail:
@@ -58,23 +58,36 @@ bind_fail:
return send_status;
}
-static int aclk_add_chart_payload(struct aclk_database_worker_config *wc, uuid_t *uuid, char *claim_id,
- ACLK_PAYLOAD_TYPE payload_type, void *payload, size_t payload_size, int *send_status)
+static int aclk_add_chart_payload(
+ struct aclk_database_worker_config *wc,
+ uuid_t *uuid,
+ char *claim_id,
+ ACLK_PAYLOAD_TYPE payload_type,
+ void *payload,
+ size_t payload_size,
+ time_t *send_status,
+ int check_sent)
{
static __thread sqlite3_stmt *res_chart = NULL;
int rc;
+ time_t date_submitted;
- rc = payload_sent(wc->uuid_str, uuid, payload, payload_size);
- if (send_status)
- *send_status = rc;
- if (rc == 1)
+ if (unlikely(!payload))
return 0;
+ if (check_sent) {
+ date_submitted = payload_sent(wc->uuid_str, uuid, payload, payload_size);
+ if (send_status)
+ *send_status = date_submitted;
+ if (date_submitted)
+ return 0;
+ }
+
if (unlikely(!res_chart)) {
char sql[ACLK_SYNC_QUERY_SIZE];
snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1,
- "INSERT INTO aclk_chart_payload_%s (unique_id, uuid, claim_id, date_created, type, payload) " \
- "VALUES (@unique_id, @uuid, @claim_id, strftime('%%s','now'), @type, @payload);", wc->uuid_str);
+ "INSERT INTO aclk_chart_payload_%s (unique_id, uuid, claim_id, date_created, type, payload) " \
+ "VALUES (@unique_id, @uuid, @claim_id, strftime('%%s','now'), @type, @payload);", wc->uuid_str);
rc = prepare_statement(db_meta, sql, &res_chart);
if (rc != SQLITE_OK) {
error_report("Failed to prepare statement to store chart payload data");
@@ -146,7 +159,7 @@ int aclk_add_chart_event(struct aclk_database_worker_config *wc, struct aclk_dat
chart_payload.id = strdupz(st->id);
struct label_index *labels = &st->state->labels;
- netdata_rwlock_wrlock(&labels->labels_rwlock);
+ netdata_rwlock_rdlock(&labels->labels_rwlock);
struct label *label_list = labels->head;
struct label *chart_label = NULL;
while (label_list) {
@@ -159,7 +172,7 @@ int aclk_add_chart_event(struct aclk_database_worker_config *wc, struct aclk_dat
size_t size;
char *payload = generate_chart_instance_updated(&size, &chart_payload);
if (likely(payload))
- rc = aclk_add_chart_payload(wc, st->chart_uuid, claim_id, ACLK_PAYLOAD_CHART, (void *) payload, size, NULL);
+ rc = aclk_add_chart_payload(wc, st->chart_uuid, claim_id, ACLK_PAYLOAD_CHART, (void *) payload, size, NULL, 1);
freez(payload);
chart_instance_updated_destroy(&chart_payload);
}
@@ -168,7 +181,7 @@ int aclk_add_chart_event(struct aclk_database_worker_config *wc, struct aclk_dat
static inline int aclk_upd_dimension_event(struct aclk_database_worker_config *wc, char *claim_id, uuid_t *dim_uuid,
const char *dim_id, const char *dim_name, const char *chart_type_id, time_t first_time, time_t last_time,
- int *send_status)
+ time_t *send_status)
{
int rc = 0;
size_t size;
@@ -197,7 +210,7 @@ static inline int aclk_upd_dimension_event(struct aclk_database_worker_config *w
dim_payload.last_timestamp.tv_sec = last_time;
char *payload = generate_chart_dimension_updated(&size, &dim_payload);
if (likely(payload))
- rc = aclk_add_chart_payload(wc, dim_uuid, claim_id, ACLK_PAYLOAD_DIMENSION, (void *)payload, size, send_status);
+ rc = aclk_add_chart_payload(wc, dim_uuid, claim_id, ACLK_PAYLOAD_DIMENSION, (void *)payload, size, send_status, 1);
freez(payload);
return rc;
}
@@ -271,39 +284,22 @@ bind_fail:
int aclk_add_dimension_event(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd)
{
- int rc = 0;
+ int rc = 1;
CHECK_SQLITE_CONNECTION(db_meta);
- char *claim_id = is_agent_claimed();
-
- RRDDIM *rd = cmd.data;
-
- if (likely(claim_id)) {
- int send_status = 0;
- time_t now = now_realtime_sec();
-
- time_t first_t = rd->state->query_ops.oldest_time(rd);
- time_t last_t = rd->state->query_ops.latest_time(rd);
-
- int live = ((now - last_t) < (RRDSET_MINIMUM_DIM_LIVE_MULTIPLIER * rd->update_every));
+ struct aclk_chart_dimension_data *aclk_cd_data = cmd.data;
- rc = aclk_upd_dimension_event(
- wc,
- claim_id,
- &rd->state->metric_uuid,
- rd->id,
- rd->name,
- rd->rrdset->id,
- first_t,
- live ? 0 : last_t,
- &send_status);
+ char *claim_id = is_agent_claimed();
+ if (!claim_id)
+ goto cleanup;
- if (!send_status)
- rd->state->aclk_live_status = live;
+ rc = aclk_add_chart_payload(wc, &aclk_cd_data->uuid, claim_id, ACLK_PAYLOAD_DIMENSION,
+ (void *) aclk_cd_data->payload, aclk_cd_data->payload_size, NULL, aclk_cd_data->check_payload);
- freez(claim_id);
- }
- rrddim_flag_clear(rd, RRDDIM_FLAG_ACLK);
+ freez(claim_id);
+cleanup:
+ freez(aclk_cd_data->payload);
+ freez(aclk_cd_data);
return rc;
}
@@ -337,6 +333,12 @@ void aclk_send_chart_event(struct aclk_database_worker_config *wc, struct aclk_d
char sql[ACLK_SYNC_QUERY_SIZE];
static __thread sqlite3_stmt *res = NULL;
+ char *hostname = NULL;
+ if (wc->host)
+ hostname = strdupz(wc->host->hostname);
+ else
+ hostname = get_hostname_by_node_id(wc->node_id);
+
if (unlikely(!res)) {
snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1,"SELECT ac.sequence_id, acp.payload, ac.date_created, ac.type, ac.uuid " \
"FROM aclk_chart_%s ac, aclk_chart_payload_%s acp " \
@@ -346,6 +348,7 @@ void aclk_send_chart_event(struct aclk_database_worker_config *wc, struct aclk_d
if (rc != SQLITE_OK) {
error_report("Failed to prepare statement when trying to send a chart update via ACLK");
freez(claim_id);
+ freez(hostname);
return;
}
}
@@ -419,7 +422,7 @@ void aclk_send_chart_event(struct aclk_database_worker_config *wc, struct aclk_d
log_access(
"ACLK RES [%s (%s)]: CHARTS SENT from %" PRIu64 " to %" PRIu64 " batch=%" PRIu64,
wc->node_id,
- wc->host ? wc->host->hostname : "N/A",
+ hostname ? hostname : "N/A",
first_sequence,
last_sequence,
wc->batch_id);
@@ -440,7 +443,7 @@ void aclk_send_chart_event(struct aclk_database_worker_config *wc, struct aclk_d
log_access(
"ACLK STA [%s (%s)]: Sync of charts and dimensions done in %ld seconds.",
wc->node_id,
- wc->host ? wc->host->hostname : "N/A",
+ hostname ? hostname : "N/A",
now_realtime_sec() - wc->startup_time);
}
@@ -459,6 +462,7 @@ bind_fail:
error_report("Failed to reset statement when pushing chart events, rc = %d", rc);
freez(claim_id);
+ freez(hostname);
return;
}
@@ -562,7 +566,7 @@ void aclk_receive_chart_ack(struct aclk_database_worker_config *wc, struct aclk_
error_report("Failed to ACK sequence id, rc = %d", rc);
else
log_access(
- "ACLK STA [%s (%s)]: CHARTS ACKNOWLEDGED in the database upto %" PRIu64,
+ "ACLK STA [%s (%s)]: CHARTS ACKNOWLEDGED IN THE DATABASE UP TO %" PRIu64,
wc->node_id,
wc->host ? wc->host->hostname : "N/A",
cmd.param1);
@@ -583,8 +587,13 @@ void aclk_receive_chart_reset(struct aclk_database_worker_config *wc, struct acl
cmd.param1);
db_execute(buffer_tostring(sql));
if (cmd.param1 == 1) {
+ char *hostname = NULL;
+ if (wc->host)
+ hostname = strdupz(wc->host->hostname);
+ else
+ hostname = get_hostname_by_node_id(wc->node_id);
buffer_flush(sql);
- log_access("ACLK REQ [%s (%s)]: Received chart full resync.", wc->node_id, wc->host ? wc->host->hostname : "N/A");
+ log_access("ACLK REQ [%s (%s)]: Received chart full resync.", wc->node_id, hostname? hostname : "N/A");
buffer_sprintf(sql, "DELETE FROM aclk_chart_payload_%s; DELETE FROM aclk_chart_%s; " \
"DELETE FROM aclk_chart_latest_%s;", wc->uuid_str, wc->uuid_str, wc->uuid_str);
db_lock();
@@ -609,6 +618,7 @@ void aclk_receive_chart_reset(struct aclk_database_worker_config *wc, struct acl
RRDDIM *rd;
rrddim_foreach_read(rd, st)
{
+ rrddim_flag_clear(rd, RRDDIM_FLAG_ACLK);
rd->state->aclk_live_status = (rd->state->aclk_live_status == 0);
}
rrdset_unlock(st);
@@ -616,9 +626,10 @@ void aclk_receive_chart_reset(struct aclk_database_worker_config *wc, struct acl
rrdhost_unlock(host);
} else
error_report("ACLK synchronization thread for %s is not linked to HOST", wc->host_guid);
+ freez(hostname);
} else {
log_access(
- "ACLK STA [%s (%s)]: Restarting chart sync from sequence %" PRIu64,
+ "ACLK STA [%s (%s)]: RESTARTING CHART SYNC FROM SEQUENCE %" PRIu64,
wc->node_id,
wc->host ? wc->host->hostname : "N/A",
cmd.param1);
@@ -705,25 +716,28 @@ void aclk_start_streaming(char *node_id, uint64_t sequence_id, time_t created_at
if (unlikely(!node_id))
return;
- // log_access("ACLK REQ [%s (N/A)]: CHARTS STREAM from %"PRIu64" t=%ld batch=%"PRIu64, node_id,
- // sequence_id, created_at, batch_id);
-
uuid_t node_uuid;
if (uuid_parse(node_id, node_uuid)) {
log_access("ACLK REQ [%s (N/A)]: CHARTS STREAM ignored, invalid node id", node_id);
return;
}
- struct aclk_database_worker_config *wc = NULL;
+ struct aclk_database_worker_config *wc = find_inactive_wc_by_node_id(node_id);
rrd_rdlock();
RRDHOST *host = localhost;
while(host) {
- if (host->node_id && !(uuid_compare(*host->node_id, node_uuid))) {
+ if (wc || (host->node_id && !(uuid_compare(*host->node_id, node_uuid)))) {
rrd_unlock();
- wc = (struct aclk_database_worker_config *)host->dbsync_worker ?
- (struct aclk_database_worker_config *)host->dbsync_worker :
- (struct aclk_database_worker_config *)find_inactive_wc_by_node_id(node_id);
+ if (!wc)
+ wc = (struct aclk_database_worker_config *)host->dbsync_worker ?
+ (struct aclk_database_worker_config *)host->dbsync_worker :
+ (struct aclk_database_worker_config *)find_inactive_wc_by_node_id(node_id);
+ char *hostname = NULL;
if (likely(wc)) {
+ if (wc->host)
+ hostname = strdupz(wc->host->hostname);
+ else
+ hostname = get_hostname_by_node_id(node_id);
wc->chart_reset_count++;
__sync_synchronize();
wc->chart_updates = 0;
@@ -731,9 +745,10 @@ void aclk_start_streaming(char *node_id, uint64_t sequence_id, time_t created_at
__sync_synchronize();
wc->batch_created = now_realtime_sec();
log_access(
- "ACLK REQ [%s (%s)]: CHARTS STREAM from %" PRIu64 " t=%ld resets=%d",
+ "ACLK REQ [%s (%s)]: CHARTS STREAM from %"PRIu64" (LOCAL %"PRIu64") t=%ld resets=%d" ,
wc->node_id,
- wc->host ? wc->host->hostname : "N/A",
+ hostname ? hostname : "N/A",
+ sequence_id + 1,
wc->chart_sequence_id,
wc->chart_timestamp,
wc->chart_reset_count);
@@ -742,7 +757,7 @@ void aclk_start_streaming(char *node_id, uint64_t sequence_id, time_t created_at
"ACLK RES [%s (%s)]: CHARTS FULL RESYNC REQUEST "
"remote_seq=%" PRIu64 " local_seq=%" PRIu64 " resets=%d ",
wc->node_id,
- wc->host ? wc->host->hostname : "N/A",
+ hostname ? hostname : "N/A",
sequence_id,
wc->chart_sequence_id,
wc->chart_reset_count);
@@ -756,7 +771,6 @@ void aclk_start_streaming(char *node_id, uint64_t sequence_id, time_t created_at
freez(chart_reset.claim_id);
wc->chart_reset_count = -1;
}
- return;
} else {
struct aclk_database_cmd cmd;
memset(&cmd, 0, sizeof(cmd));
@@ -766,8 +780,8 @@ void aclk_start_streaming(char *node_id, uint64_t sequence_id, time_t created_at
log_access(
"ACLK REQ [%s (%s)]: CHART RESET from %" PRIu64 " t=%ld batch=%" PRIu64,
wc->node_id,
- wc->host ? wc->host->hostname : "N/A",
- wc->chart_sequence_id,
+ hostname ? hostname : "N/A",
+ sequence_id + 1,
wc->chart_timestamp,
wc->batch_id);
cmd.opcode = ACLK_DATABASE_RESET_CHART;
@@ -775,20 +789,15 @@ void aclk_start_streaming(char *node_id, uint64_t sequence_id, time_t created_at
cmd.completion = NULL;
aclk_database_enq_cmd(wc, &cmd);
} else {
-// log_access(
-// "ACLK RES [%s (%s)]: CHARTS STREAM from %" PRIu64
-// " t=%ld resets=%d",
-// wc->node_id,
-// wc->host ? wc->host->hostname : "N/A",
-// wc->chart_sequence_id,
-// wc->chart_timestamp,
-// wc->chart_reset_count);
wc->chart_reset_count = 0;
wc->chart_updates = 1;
}
}
- } else
- log_access("ACLK STA [%s (N/A)]: ACLK synchronization thread is not active.", node_id);
+ } else {
+ hostname = get_hostname_by_node_id(node_id);
+ log_access("ACLK STA [%s (%s)]: ACLK synchronization thread is not active.", node_id, hostname ? hostname : "N/A");
+ }
+ freez(hostname);
return;
}
host = host->next;
@@ -838,9 +847,8 @@ failed:
"SELECT distinct h.host_id, c.update_every, c.type||'.'||c.id FROM chart c, host h " \
"WHERE c.host_id = h.host_id AND c.host_id = @host_id ORDER BY c.update_every ASC;"
-void aclk_update_retention(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd)
+void aclk_update_retention(struct aclk_database_worker_config *wc)
{
- UNUSED(cmd);
int rc;
if (!aclk_use_new_cloud_arch || !aclk_connected)
@@ -887,7 +895,10 @@ void aclk_update_retention(struct aclk_database_worker_config *wc, struct aclk_d
time_t last_entry_t;
uint32_t update_every = 0;
uint32_t dimension_update_count = 0;
- int send_status;
+ uint32_t total_checked = 0;
+ uint32_t total_deleted= 0;
+ uint32_t total_stopped= 0;
+ time_t send_status;
struct retention_updated rotate_data;
@@ -904,7 +915,9 @@ void aclk_update_retention(struct aclk_database_worker_config *wc, struct aclk_d
rotate_data.node_id = strdupz(wc->node_id);
time_t now = now_realtime_sec();
- while (sqlite3_step(res) == SQLITE_ROW) {
+ while (sqlite3_step(res) == SQLITE_ROW && dimension_update_count < ACLK_MAX_DIMENSION_CLEANUP) {
+ if (unlikely(netdata_exit))
+ break;
if (!update_every || update_every != (uint32_t)sqlite3_column_int(res, 1)) {
if (update_every) {
debug(D_ACLK_SYNC, "Update %s for %u oldest time = %ld", wc->host_guid, update_every, start_time);
@@ -942,23 +955,40 @@ void aclk_update_retention(struct aclk_database_worker_config *wc, struct aclk_d
if (likely(!rc && first_entry_t))
start_time = MIN(start_time, first_entry_t);
- if (memory_mode == RRD_MEMORY_MODE_DBENGINE && wc->chart_updates) {
+ if (memory_mode == RRD_MEMORY_MODE_DBENGINE && wc->chart_updates && (dimension_update_count < ACLK_MAX_DIMENSION_CLEANUP)) {
int live = ((now - last_entry_t) < (RRDSET_MINIMUM_DIM_LIVE_MULTIPLIER * update_every));
- if ((!live || !first_entry_t) && (dimension_update_count < ACLK_MAX_DIMENSION_CLEANUP)) {
- (void)aclk_upd_dimension_event(
- wc,
- claim_id,
- (uuid_t *)sqlite3_column_blob(res, 0),
- (const char *)(const char *)sqlite3_column_text(res, 3),
- (const char *)(const char *)sqlite3_column_text(res, 4),
- (const char *)(const char *)sqlite3_column_text(res, 2),
- first_entry_t,
- live ? 0 : last_entry_t,
- &send_status);
- if (!send_status)
+ if (rc) {
+ first_entry_t = 0;
+ last_entry_t = 0;
+ live = 0;
+ }
+ if (!wc->host || !first_entry_t) {
+ if (!first_entry_t) {
+ delete_dimension_uuid((uuid_t *)sqlite3_column_blob(res, 0));
+ total_deleted++;
dimension_update_count++;
+ }
+ else {
+ (void)aclk_upd_dimension_event(
+ wc,
+ claim_id,
+ (uuid_t *)sqlite3_column_blob(res, 0),
+ (const char *)(const char *)sqlite3_column_text(res, 3),
+ (const char *)(const char *)sqlite3_column_text(res, 4),
+ (const char *)(const char *)sqlite3_column_text(res, 2),
+ first_entry_t,
+ live ? 0 : last_entry_t,
+ &send_status);
+
+ if (!send_status) {
+ if (last_entry_t)
+ total_stopped++;
+ dimension_update_count++;
+ }
+ }
}
}
+ total_checked++;
}
if (update_every) {
debug(D_ACLK_SYNC, "Update %s for %u oldest time = %ld", wc->host_guid, update_every, start_time);
@@ -970,7 +1000,20 @@ void aclk_update_retention(struct aclk_database_worker_config *wc, struct aclk_d
rotate_data.interval_duration_count++;
}
+ char *hostname = NULL;
+ if (!wc->host)
+ hostname = get_hostname_by_node_id(wc->node_id);
+
+ if (dimension_update_count < ACLK_MAX_DIMENSION_CLEANUP && !netdata_exit)
+ log_access("ACLK STA [%s (%s)]: UPDATES %d RETENTION MESSAGE SENT. CHECKED %u DIMENSIONS. %u DELETED, %u STOPPED COLLECTING",
+ wc->node_id, wc->host ? wc->host->hostname : hostname ? hostname : "N/A", wc->chart_updates, total_checked, total_deleted, total_stopped);
+ else
+ log_access("ACLK STA [%s (%s)]: UPDATES %d RETENTION MESSAGE NOT SENT. CHECKED %u DIMENSIONS. %u DELETED, %u STOPPED COLLECTING",
+ wc->node_id, wc->host ? wc->host->hostname : hostname ? hostname : "N/A", wc->chart_updates, total_checked, total_deleted, total_stopped);
+ freez(hostname);
+
#ifdef NETDATA_INTERNAL_CHECKS
+ info("Retention update for %s (chart updates = %d)", wc->host_guid, wc->chart_updates);
for (int i = 0; i < rotate_data.interval_duration_count; ++i)
info(
"Update for host %s (node %s) for %u Retention = %u",
@@ -979,7 +1022,8 @@ void aclk_update_retention(struct aclk_database_worker_config *wc, struct aclk_d
rotate_data.interval_durations[i].update_every,
rotate_data.interval_durations[i].retention);
#endif
- aclk_retention_updated(&rotate_data);
+ if (dimension_update_count < ACLK_MAX_DIMENSION_CLEANUP && !netdata_exit)
+ aclk_retention_updated(&rotate_data);
freez(rotate_data.node_id);
freez(rotate_data.interval_durations);
@@ -1048,11 +1092,64 @@ void sql_get_last_chart_sequence(struct aclk_database_worker_config *wc)
return;
}
-int queue_dimension_to_aclk(RRDDIM *rd)
+void queue_dimension_to_aclk(RRDDIM *rd, time_t last_updated)
{
- int rc = sql_queue_chart_payload((struct aclk_database_worker_config *) rd->rrdset->rrdhost->dbsync_worker,
- rd, ACLK_DATABASE_ADD_DIMENSION);
- return rc;
+ int live = !last_updated;
+
+ if (likely(rd->state->aclk_live_status == live))
+ return;
+
+ time_t created_at = rd->state->query_ops.oldest_time(rd);
+
+ if (unlikely(!created_at && rd->updated))
+ created_at = rd->last_collected_time.tv_sec;
+
+ rd->state->aclk_live_status = live;
+
+ struct aclk_database_worker_config *wc = rd->rrdset->rrdhost->dbsync_worker;
+ if (unlikely(!wc))
+ return;
+
+ char *claim_id = is_agent_claimed();
+ if (unlikely(!claim_id))
+ return;
+
+ struct chart_dimension_updated dim_payload;
+ memset(&dim_payload, 0, sizeof(dim_payload));
+ dim_payload.node_id = wc->node_id;
+ dim_payload.claim_id = claim_id;
+ dim_payload.name = rd->name;
+ dim_payload.id = rd->id;
+ dim_payload.chart_id = rd->rrdset->id;
+ dim_payload.created_at.tv_sec = created_at;
+ dim_payload.last_timestamp.tv_sec = last_updated;
+
+ size_t size = 0;
+ char *payload = generate_chart_dimension_updated(&size, &dim_payload);
+
+ freez(claim_id);
+ if (unlikely(!payload))
+ return;
+
+ struct aclk_chart_dimension_data *aclk_cd_data = mallocz(sizeof(*aclk_cd_data));
+ uuid_copy(aclk_cd_data->uuid, rd->state->metric_uuid);
+ aclk_cd_data->payload = payload;
+ aclk_cd_data->payload_size = size;
+ aclk_cd_data->check_payload = 1;
+
+ struct aclk_database_cmd cmd;
+ memset(&cmd, 0, sizeof(cmd));
+
+ cmd.opcode = ACLK_DATABASE_ADD_DIMENSION;
+ cmd.data = aclk_cd_data;
+ int rc = aclk_database_enq_cmd_noblock(wc, &cmd);
+
+ if (unlikely(rc)) {
+ freez(aclk_cd_data->payload);
+ freez(aclk_cd_data);
+ rd->state->aclk_live_status = !live;
+ }
+ return;
}
void aclk_send_dimension_update(RRDDIM *rd)
@@ -1203,6 +1300,12 @@ void sql_check_chart_liveness(RRDSET *st) {
return;
rrdset_rdlock(st);
+
+ if (unlikely(!rrdset_flag_check(st, RRDSET_FLAG_ACLK))) {
+ rrdset_unlock(st);
+ return;
+ }
+
if (unlikely(!rrdset_flag_check(st, RRDSET_FLAG_ACLK))) {
if (likely(st->dimensions && st->counter_done && !queue_chart_to_aclk(st))) {
debug(D_ACLK_SYNC,"Check chart liveness [%s] submit chart definition", st->name);
@@ -1215,20 +1318,8 @@ void sql_check_chart_liveness(RRDSET *st) {
debug(D_ACLK_SYNC,"Check chart liveness [%s] scanning dimensions", st->name);
rrddim_foreach_read(rd, st) {
- if (!rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN)) {
- int live = (mark - rd->last_collected_time.tv_sec) < RRDSET_MINIMUM_DIM_LIVE_MULTIPLIER * rd->update_every;
- if (unlikely(live != rd->state->aclk_live_status)) {
- if (likely(rrdset_flag_check(st, RRDSET_FLAG_ACLK))) {
- if (likely(!queue_dimension_to_aclk(rd))) {
- debug(D_ACLK_SYNC,"Dimension change [%s] on [%s] from live %d --> %d", rd->id, rd->rrdset->name, rd->state->aclk_live_status, live);
- rd->state->aclk_live_status = live;
- rrddim_flag_set(rd, RRDDIM_FLAG_ACLK);
- }
- }
- }
- else
- debug(D_ACLK_SYNC,"Dimension check [%s] on [%s] liveness matches", rd->id, st->name);
- }
+ if (!rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN))
+ queue_dimension_to_aclk(rd, calc_dimension_liveness(rd, mark));
}
rrdset_unlock(st);
}
diff --git a/database/sqlite/sqlite_aclk_chart.h b/database/sqlite/sqlite_aclk_chart.h
index 1d25de24e..84325bf6c 100644
--- a/database/sqlite/sqlite_aclk_chart.h
+++ b/database/sqlite/sqlite_aclk_chart.h
@@ -16,10 +16,21 @@ extern sqlite3 *db_meta;
#define RRDSET_MINIMUM_DIM_LIVE_MULTIPLIER (3)
#endif
+#ifndef RRDSET_MINIMUM_DIM_OFFLINE_MULTIPLIER
+#define RRDSET_MINIMUM_DIM_OFFLINE_MULTIPLIER (30)
+#endif
+
#ifndef ACLK_MAX_DIMENSION_CLEANUP
#define ACLK_MAX_DIMENSION_CLEANUP (500)
#endif
+struct aclk_chart_dimension_data {
+ uuid_t uuid;
+ char *payload;
+ size_t payload_size;
+ uint8_t check_payload;
+};
+
struct aclk_chart_sync_stats {
int updates;
uint64_t batch_id;
@@ -37,9 +48,8 @@ struct aclk_chart_sync_stats {
};
extern int queue_chart_to_aclk(RRDSET *st);
-extern int queue_dimension_to_aclk(RRDDIM *rd);
+extern void queue_dimension_to_aclk(RRDDIM *rd, time_t last_updated);
extern void sql_create_aclk_table(RRDHOST *host, uuid_t *host_uuid, uuid_t *node_id);
-extern int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae);
int aclk_add_chart_event(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd);
int aclk_add_dimension_event(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd);
int aclk_send_chart_config(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd);
@@ -57,4 +67,5 @@ uint32_t sql_get_pending_count(struct aclk_database_worker_config *wc);
void aclk_send_dimension_update(RRDDIM *rd);
struct aclk_chart_sync_stats *aclk_get_chart_sync_stats(RRDHOST *host);
void sql_check_chart_liveness(RRDSET *st);
+void aclk_update_retention(struct aclk_database_worker_config *wc);
#endif //NETDATA_SQLITE_ACLK_CHART_H
diff --git a/database/sqlite/sqlite_aclk_node.c b/database/sqlite/sqlite_aclk_node.c
index 97e6bebd1..239a24b8c 100644
--- a/database/sqlite/sqlite_aclk_node.c
+++ b/database/sqlite/sqlite_aclk_node.c
@@ -24,6 +24,15 @@ void sql_build_node_info(struct aclk_database_worker_config *wc, struct aclk_dat
node_info.child = (wc->host != localhost);
node_info.ml_info.ml_capable = ml_capable(localhost);
node_info.ml_info.ml_enabled = ml_enabled(wc->host);
+
+ struct capability instance_caps[] = {
+ { .name = "proto", .version = 1, .enabled = 1 },
+ { .name = "ml", .version = ml_capable(localhost), .enabled = ml_enabled(wc->host) },
+ { .name = "mc", .version = enable_metric_correlations ? metric_correlations_version : 0, .enabled = enable_metric_correlations },
+ { .name = NULL, .version = 0, .enabled = 0 }
+ };
+ node_info.node_instance_capabilities = instance_caps;
+
now_realtime_timeval(&node_info.updated_at);
RRDHOST *host = wc->host;
@@ -47,7 +56,7 @@ void sql_build_node_info(struct aclk_database_worker_config *wc, struct aclk_dat
node_info.data.memory = host->system_info->host_ram_total ? host->system_info->host_ram_total : "0";
node_info.data.disk_space = host->system_info->host_disk_space ? host->system_info->host_disk_space : "0";
node_info.data.version = host_version ? host_version : VERSION;
- node_info.data.release_channel = "nightly";
+ node_info.data.release_channel = (char *) get_release_channel();
node_info.data.timezone = (char *) host->abbrev_timezone;
node_info.data.virtualization_type = host->system_info->virtualization ? host->system_info->virtualization : "unknown";
node_info.data.container_type = host->system_info->container ? host->system_info->container : "unknown";
@@ -55,11 +64,19 @@ void sql_build_node_info(struct aclk_database_worker_config *wc, struct aclk_dat
node_info.data.services = NULL; // char **
node_info.data.service_count = 0;
node_info.data.machine_guid = wc->host_guid;
+
+ struct capability node_caps[] = {
+ { .name = "ml", .version = host->system_info->ml_capable, .enabled = host->system_info->ml_enabled },
+ { .name = "mc", .version = host->system_info->mc_version ? host->system_info->mc_version : 0, .enabled = host->system_info->mc_version ? 1 : 0 },
+ { .name = NULL, .version = 0, .enabled = 0 }
+ };
+ node_info.node_capabilities = node_caps;
+
node_info.data.ml_info.ml_capable = host->system_info->ml_capable;
node_info.data.ml_info.ml_enabled = host->system_info->ml_enabled;
struct label_index *labels = &host->labels;
- netdata_rwlock_wrlock(&labels->labels_rwlock);
+ netdata_rwlock_rdlock(&labels->labels_rwlock);
node_info.data.host_labels_head = labels->head;
aclk_update_node_info(&node_info);
diff --git a/database/sqlite/sqlite_aclk_node.h b/database/sqlite/sqlite_aclk_node.h
index 9cb411586..b8f8c6bbf 100644
--- a/database/sqlite/sqlite_aclk_node.h
+++ b/database/sqlite/sqlite_aclk_node.h
@@ -4,5 +4,4 @@
#define NETDATA_SQLITE_ACLK_NODE_H
void sql_build_node_info(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd);
-void aclk_update_retention(struct aclk_database_worker_config *wc, struct aclk_database_cmd cmd);
#endif //NETDATA_SQLITE_ACLK_NODE_H
diff --git a/database/sqlite/sqlite_functions.c b/database/sqlite/sqlite_functions.c
index 1e1d2a741..502633c67 100644
--- a/database/sqlite/sqlite_functions.c
+++ b/database/sqlite/sqlite_functions.c
@@ -5,8 +5,6 @@
#define DB_METADATA_VERSION "1"
const char *database_config[] = {
- "PRAGMA auto_vacuum=incremental; PRAGMA synchronous=1 ; PRAGMA journal_mode=WAL; PRAGMA temp_store=MEMORY;",
- "PRAGMA journal_size_limit=16777216;",
"CREATE TABLE IF NOT EXISTS host(host_id blob PRIMARY KEY, hostname text, "
"registry_hostname text, update_every int, os text, timezone text, tags text);",
"CREATE TABLE IF NOT EXISTS chart(chart_id blob PRIMARY KEY, host_id blob, type text, id text, name text, "
@@ -62,6 +60,9 @@ const char *database_cleanup[] = {
"delete from chart where chart_id not in (select chart_id from dimension);",
"delete from host where host_id not in (select host_id from chart);",
"delete from chart_label where chart_id not in (select chart_id from chart);",
+ "DELETE FROM chart_hash_map WHERE chart_id NOT IN (SELECT chart_id FROM chart);",
+ "DELETE FROM chart_hash WHERE hash_id NOT IN (SELECT hash_id FROM chart_hash_map);",
+ "DELETE FROM node_instance WHERE host_id NOT IN (SELECT host_id FROM host);",
NULL
};
@@ -72,10 +73,12 @@ static uv_mutex_t sqlite_transaction_lock;
int execute_insert(sqlite3_stmt *res)
{
int rc;
-
- while ((rc = sqlite3_step(res)) != SQLITE_DONE && unlikely(netdata_exit)) {
- if (likely(rc == SQLITE_BUSY || rc == SQLITE_LOCKED))
+ int cnt = 0;
+ while ((rc = sqlite3_step(res)) != SQLITE_DONE && ++cnt < SQL_MAX_RETRY && likely(!netdata_exit)) {
+ if (likely(rc == SQLITE_BUSY || rc == SQLITE_LOCKED)) {
usleep(SQLITE_INSERT_DELAY * USEC_PER_MS);
+ error_report("Failed to insert/update, rc = %d -- attempt %d", rc, cnt);
+ }
else {
error_report("SQLite error %d", rc);
break;
@@ -93,8 +96,12 @@ static void add_stmt_to_list(sqlite3_stmt *res)
static sqlite3_stmt *statements[MAX_OPEN_STATEMENTS];
if (unlikely(!res)) {
- while (idx > 0)
- sqlite3_finalize(statements[--idx]);
+ while (idx > 0) {
+ int rc;
+ rc = sqlite3_finalize(statements[--idx]);
+ if (unlikely(rc != SQLITE_OK))
+ error_report("Failed to finalize statement during shutdown, rc = %d", rc);
+ }
return;
}
@@ -302,7 +309,7 @@ static int attempt_database_fix()
error_report("Failed to close database, rc = %d", rc);
info("Attempting to fix database");
db_meta = NULL;
- return sql_init_database(DB_CHECK_FIX_DB | DB_CHECK_CONT);
+ return sql_init_database(DB_CHECK_FIX_DB | DB_CHECK_CONT, 0);
}
static int init_database_batch(int rebuild, int init_type, const char *batch[])
@@ -333,13 +340,17 @@ static int init_database_batch(int rebuild, int init_type, const char *batch[])
* Initialize the SQLite database
* Return 0 on success
*/
-int sql_init_database(db_check_action_type_t rebuild)
+int sql_init_database(db_check_action_type_t rebuild, int memory)
{
char *err_msg = NULL;
char sqlite_database[FILENAME_MAX + 1];
int rc;
- snprintfz(sqlite_database, FILENAME_MAX, "%s/netdata-meta.db", netdata_configured_cache_dir);
+ if (likely(!memory))
+ snprintfz(sqlite_database, FILENAME_MAX, "%s/netdata-meta.db", netdata_configured_cache_dir);
+ else
+ strcpy(sqlite_database, ":memory:");
+
rc = sqlite3_open(sqlite_database, &db_meta);
if (rc != SQLITE_OK) {
error_report("Failed to initialize database at %s, due to \"%s\"", sqlite_database, sqlite3_errstr(rc));
@@ -390,6 +401,40 @@ int sql_init_database(db_check_action_type_t rebuild)
info("SQLite database %s initialization", sqlite_database);
+ char buf[1024 + 1] = "";
+ const char *list[2] = { buf, NULL };
+
+ // https://www.sqlite.org/pragma.html#pragma_auto_vacuum
+ // PRAGMA schema.auto_vacuum = 0 | NONE | 1 | FULL | 2 | INCREMENTAL;
+ snprintfz(buf, 1024, "PRAGMA auto_vacuum=%s;", config_get(CONFIG_SECTION_SQLITE, "auto vacuum", "INCREMENTAL"));
+ if(init_database_batch(rebuild, 0, list)) return 1;
+
+ // https://www.sqlite.org/pragma.html#pragma_synchronous
+ // PRAGMA schema.synchronous = 0 | OFF | 1 | NORMAL | 2 | FULL | 3 | EXTRA;
+ snprintfz(buf, 1024, "PRAGMA synchronous=%s;", config_get(CONFIG_SECTION_SQLITE, "synchronous", "NORMAL"));
+ if(init_database_batch(rebuild, 0, list)) return 1;
+
+ // https://www.sqlite.org/pragma.html#pragma_journal_mode
+ // PRAGMA schema.journal_mode = DELETE | TRUNCATE | PERSIST | MEMORY | WAL | OFF
+ snprintfz(buf, 1024, "PRAGMA journal_mode=%s;", config_get(CONFIG_SECTION_SQLITE, "journal mode", "WAL"));
+ if(init_database_batch(rebuild, 0, list)) return 1;
+
+ // https://www.sqlite.org/pragma.html#pragma_temp_store
+ // PRAGMA temp_store = 0 | DEFAULT | 1 | FILE | 2 | MEMORY;
+ snprintfz(buf, 1024, "PRAGMA temp_store=%s;", config_get(CONFIG_SECTION_SQLITE, "temp store", "MEMORY"));
+ if(init_database_batch(rebuild, 0, list)) return 1;
+
+ // https://www.sqlite.org/pragma.html#pragma_journal_size_limit
+ // PRAGMA schema.journal_size_limit = N ;
+ snprintfz(buf, 1024, "PRAGMA journal_size_limit=%lld;", config_get_number(CONFIG_SECTION_SQLITE, "journal size limit", 16777216));
+ if(init_database_batch(rebuild, 0, list)) return 1;
+
+ // https://www.sqlite.org/pragma.html#pragma_cache_size
+ // PRAGMA schema.cache_size = pages;
+ // PRAGMA schema.cache_size = -kibibytes;
+ snprintfz(buf, 1024, "PRAGMA cache_size=%lld;", config_get_number(CONFIG_SECTION_SQLITE, "cache size", -2000));
+ if(init_database_batch(rebuild, 0, list)) return 1;
+
if (init_database_batch(rebuild, 0, &database_config[0]))
return 1;
@@ -1160,8 +1205,24 @@ failed:
return;
}
+void free_temporary_host(RRDHOST *host)
+{
+ if (host) {
+ freez(host->hostname);
+ freez((char *)host->os);
+ freez((char *)host->tags);
+ freez((char *)host->timezone);
+ freez(host->program_name);
+ freez(host->program_version);
+ freez(host->registry_hostname);
+ freez(host->system_info);
+ freez(host);
+ }
+}
+
#define SELECT_HOST "select host_id, registry_hostname, update_every, os, timezone, tags from host where hostname = @hostname order by rowid desc;"
-#define SELECT_HOST_BY_UUID "select host_id, registry_hostname, update_every, os, timezone, tags from host where host_id = @host_id ;"
+#define SELECT_HOST_BY_UUID "select h.host_id, h.registry_hostname, h.update_every, h.os, h.timezone, h.tags from host h, node_instance ni " \
+ "where (ni.host_id = @host_id or ni.node_id = @host_id) AND ni.host_id = h.host_id;"
RRDHOST *sql_create_host_by_uuid(char *hostname)
{
@@ -1229,8 +1290,6 @@ failed:
return host;
}
-#define SQL_MAX_RETRY 100
-
void db_execute(const char *cmd)
{
int rc;
@@ -1430,13 +1489,13 @@ int find_dimension_first_last_t(char *machine_guid, char *chart_id, char *dim_id
}
#ifdef ENABLE_DBENGINE
-static RRDDIM *create_rrdim_entry(RRDSET *st, char *id, char *name, uuid_t *metric_uuid)
+static RRDDIM *create_rrdim_entry(ONEWAYALLOC *owa, RRDSET *st, char *id, char *name, uuid_t *metric_uuid)
{
- RRDDIM *rd = callocz(1, sizeof(*rd));
+ RRDDIM *rd = onewayalloc_callocz(owa, 1, sizeof(*rd));
rd->rrdset = st;
rd->last_stored_value = NAN;
rrddim_flag_set(rd, RRDDIM_FLAG_NONE);
- rd->state = mallocz(sizeof(*rd->state));
+ rd->state = onewayalloc_mallocz(owa, sizeof(*rd->state));
rd->rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
rd->state->query_ops.init = rrdeng_load_metric_init;
rd->state->query_ops.next_metric = rrdeng_load_metric_next;
@@ -1444,11 +1503,11 @@ static RRDDIM *create_rrdim_entry(RRDSET *st, char *id, char *name, uuid_t *metr
rd->state->query_ops.finalize = rrdeng_load_metric_finalize;
rd->state->query_ops.latest_time = rrdeng_metric_latest_time;
rd->state->query_ops.oldest_time = rrdeng_metric_oldest_time;
- rd->state->rrdeng_uuid = mallocz(sizeof(uuid_t));
+ rd->state->rrdeng_uuid = onewayalloc_mallocz(owa, sizeof(uuid_t));
uuid_copy(*rd->state->rrdeng_uuid, *metric_uuid);
uuid_copy(rd->state->metric_uuid, *metric_uuid);
- rd->id = strdupz(id);
- rd->name = strdupz(name);
+ rd->id = onewayalloc_strdupz(owa, id);
+ rd->name = onewayalloc_strdupz(owa, name);
return rd;
}
#endif
@@ -1465,7 +1524,7 @@ static RRDDIM *create_rrdim_entry(RRDSET *st, char *id, char *name, uuid_t *metr
"where d.chart_id = c.chart_id and c.host_id = h.host_id and c.host_id = @host_id and c.type||'.'||c.id = @chart " \
"order by c.chart_id asc, c.type||'.'||c.id desc;"
-void sql_build_context_param_list(struct context_param **param_list, RRDHOST *host, char *context, char *chart)
+void sql_build_context_param_list(ONEWAYALLOC *owa, struct context_param **param_list, RRDHOST *host, char *context, char *chart)
{
#ifdef ENABLE_DBENGINE
int rc;
@@ -1474,7 +1533,7 @@ void sql_build_context_param_list(struct context_param **param_list, RRDHOST *ho
return;
if (unlikely(!(*param_list))) {
- *param_list = mallocz(sizeof(struct context_param));
+ *param_list = onewayalloc_mallocz(owa, sizeof(struct context_param));
(*param_list)->first_entry_t = LONG_MAX;
(*param_list)->last_entry_t = 0;
(*param_list)->rd = NULL;
@@ -1523,21 +1582,21 @@ void sql_build_context_param_list(struct context_param **param_list, RRDHOST *ho
if (!st || uuid_compare(*(uuid_t *)sqlite3_column_blob(res, 7), chart_id)) {
if (unlikely(st && !st->counter)) {
- freez(st->context);
- freez((char *) st->name);
- freez(st);
+ onewayalloc_freez(owa, st->context);
+ onewayalloc_freez(owa, (char *) st->name);
+ onewayalloc_freez(owa, st);
}
- st = callocz(1, sizeof(*st));
+ st = onewayalloc_callocz(owa, 1, sizeof(*st));
char n[RRD_ID_LENGTH_MAX + 1];
snprintfz(
n, RRD_ID_LENGTH_MAX, "%s.%s", (char *)sqlite3_column_text(res, 4),
(char *)sqlite3_column_text(res, 3));
- st->name = strdupz(n);
+ st->name = onewayalloc_strdupz(owa, n);
st->update_every = sqlite3_column_int(res, 6);
st->counter = 0;
if (chart) {
- st->context = strdupz((char *)sqlite3_column_text(res, 8));
+ st->context = onewayalloc_strdupz(owa, (char *)sqlite3_column_text(res, 8));
strncpyz(st->id, chart, RRD_ID_LENGTH_MAX);
}
uuid_copy(chart_id, *(uuid_t *)sqlite3_column_blob(res, 7));
@@ -1553,7 +1612,7 @@ void sql_build_context_param_list(struct context_param **param_list, RRDHOST *ho
st->counter++;
st->last_entry_t = MAX(st->last_entry_t, (*param_list)->last_entry_t);
- RRDDIM *rd = create_rrdim_entry(st, (char *)sqlite3_column_text(res, 1), (char *)sqlite3_column_text(res, 2), &rrdeng_uuid);
+ RRDDIM *rd = create_rrdim_entry(owa, st, (char *)sqlite3_column_text(res, 1), (char *)sqlite3_column_text(res, 2), &rrdeng_uuid);
if (sqlite3_column_int(res, 9) == 1)
rrddim_flag_set(rd, RRDDIM_FLAG_HIDDEN);
rd->next = (*param_list)->rd;
@@ -1561,13 +1620,13 @@ void sql_build_context_param_list(struct context_param **param_list, RRDHOST *ho
}
if (st) {
if (!st->counter) {
- freez(st->context);
- freez((char *)st->name);
- freez(st);
+ onewayalloc_freez(owa,st->context);
+ onewayalloc_freez(owa,(char *)st->name);
+ onewayalloc_freez(owa,st);
}
else
if (!st->context && context)
- st->context = strdupz(context);
+ st->context = onewayalloc_strdupz(owa,context);
}
failed:
diff --git a/database/sqlite/sqlite_functions.h b/database/sqlite/sqlite_functions.h
index 30b8dee6c..d24484774 100644
--- a/database/sqlite/sqlite_functions.h
+++ b/database/sqlite/sqlite_functions.h
@@ -24,6 +24,7 @@ typedef enum db_check_action_type {
DB_CHECK_CONT = 0x00008
} db_check_action_type_t;
+#define SQL_MAX_RETRY (100)
#define SQLITE_INSERT_DELAY (50) // Insert delay in case of lock
#define SQL_STORE_HOST "insert or replace into host (host_id,hostname,registry_hostname,update_every,os,timezone,tags) values (?1,?2,?3,?4,?5,?6,?7);"
@@ -56,7 +57,7 @@ typedef enum db_check_action_type {
return 1; \
}
-extern int sql_init_database(db_check_action_type_t rebuild);
+extern int sql_init_database(db_check_action_type_t rebuild, int memory);
extern void sql_close_database(void);
extern int sql_store_host(uuid_t *guid, const char *hostname, const char *registry_hostname, int update_every, const char *os, const char *timezone, const char *tags);
@@ -89,7 +90,7 @@ extern void db_unlock(void);
extern void db_lock(void);
extern void delete_dimension_uuid(uuid_t *dimension_uuid);
extern void sql_store_chart_label(uuid_t *chart_uuid, int source_type, char *label, char *value);
-extern void sql_build_context_param_list(struct context_param **param_list, RRDHOST *host, char *context, char *chart);
+extern void sql_build_context_param_list(ONEWAYALLOC *owa, struct context_param **param_list, RRDHOST *host, char *context, char *chart);
extern void store_claim_id(uuid_t *host_id, uuid_t *claim_id);
extern int update_node_id(uuid_t *host_id, uuid_t *node_id);
extern int get_node_id(uuid_t *host_id, uuid_t *node_id);
@@ -100,4 +101,5 @@ extern void sql_load_node_id(RRDHOST *host);
extern void compute_chart_hash(RRDSET *st);
extern int sql_set_dimension_option(uuid_t *dim_uuid, char *option);
char *get_hostname_by_node_id(char *node_id);
+void free_temporary_host(RRDHOST *host);
#endif //NETDATA_SQLITE_FUNCTIONS_H
diff --git a/database/sqlite/sqlite_health.c b/database/sqlite/sqlite_health.c
index 8ba95628f..53742a1a6 100644
--- a/database/sqlite/sqlite_health.c
+++ b/database/sqlite/sqlite_health.c
@@ -433,6 +433,168 @@ void sql_health_alarm_log_count(RRDHOST *host) {
info("HEALTH [%s]: Table health_log_%s, contains %lu entries.", host->hostname, uuid_str, host->health_log_entries_written);
}
+#define SQL_INJECT_REMOVED(guid, guid2) "insert into health_log_%s (hostname, unique_id, alarm_id, alarm_event_id, config_hash_id, updated_by_id, updates_id, when_key, duration, non_clear_duration, flags, exec_run_timestamp, " \
+"delay_up_to_timestamp, name, chart, family, exec, recipient, source, units, info, exec_code, new_status, old_status, delay, new_value, old_value, last_repeat, class, component, type) " \
+"select hostname, ?1, ?2, ?3, config_hash_id, 0, ?4, strftime('%%s'), 0, 0, flags, exec_run_timestamp, " \
+"strftime('%%s'), name, chart, family, exec, recipient, source, units, info, exec_code, -2, new_status, delay, NULL, new_value, 0, class, component, type " \
+"from health_log_%s where unique_id = ?5", guid, guid2
+#define SQL_INJECT_REMOVED_UPDATE(guid) "update health_log_%s set flags = flags | ?1, updated_by_id = ?2 where unique_id = ?3; ", guid
+void sql_inject_removed_status(char *uuid_str, uint32_t alarm_id, uint32_t alarm_event_id, uint32_t unique_id, uint32_t max_unique_id)
+{
+ int rc = 0;
+ char command[MAX_HEALTH_SQL_SIZE + 1];
+
+ if (!alarm_id || !alarm_event_id || !unique_id || !max_unique_id)
+ return;
+
+ sqlite3_stmt *res = NULL;
+
+ snprintfz(command, MAX_HEALTH_SQL_SIZE, SQL_INJECT_REMOVED(uuid_str, uuid_str));
+ rc = sqlite3_prepare_v2(db_meta, command, -1, &res, 0);
+ if (rc != SQLITE_OK) {
+ error_report("Failed to prepare statement when trying to inject removed event");
+ return;
+ }
+
+ rc = sqlite3_bind_int64(res, 1, (sqlite3_int64) max_unique_id);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind max_unique_id parameter for SQL_INJECT_REMOVED");
+ goto failed;
+ }
+
+ rc = sqlite3_bind_int64(res, 2, (sqlite3_int64) alarm_id);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind alarm_id parameter for SQL_INJECT_REMOVED");
+ goto failed;
+ }
+
+ rc = sqlite3_bind_int64(res, 3, (sqlite3_int64) alarm_event_id + 1);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind alarm_event_id parameter for SQL_INJECT_REMOVED");
+ goto failed;
+ }
+
+ rc = sqlite3_bind_int64(res, 4, (sqlite3_int64) unique_id);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind unique_id parameter for SQL_INJECT_REMOVED");
+ goto failed;
+ }
+
+ rc = sqlite3_bind_int64(res, 5, (sqlite3_int64) unique_id);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind unique_id parameter for SQL_INJECT_REMOVED");
+ goto failed;
+ }
+
+ rc = execute_insert(res);
+ if (unlikely(rc != SQLITE_DONE)) {
+ error_report("HEALTH [N/A]: Failed to execute SQL_INJECT_REMOVED, rc = %d", rc);
+ goto failed;
+ }
+
+ if (unlikely(sqlite3_finalize(res) != SQLITE_OK))
+ error_report("HEALTH [N/A]: Failed to finalize the prepared statement for injecting removed event.");
+
+ //update the old entry
+ snprintfz(command, MAX_HEALTH_SQL_SIZE, SQL_INJECT_REMOVED_UPDATE(uuid_str));
+ rc = sqlite3_prepare_v2(db_meta, command, -1, &res, 0);
+ if (rc != SQLITE_OK) {
+ error_report("Failed to prepare statement when trying to update during inject removed event");
+ return;
+ }
+
+ rc = sqlite3_bind_int64(res, 1, (sqlite3_int64) HEALTH_ENTRY_FLAG_UPDATED);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind flags parameter for SQL_INJECT_REMOVED (update)");
+ goto failed;
+ }
+
+ rc = sqlite3_bind_int64(res, 2, (sqlite3_int64) max_unique_id);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind max_unique_id parameter for SQL_INJECT_REMOVED (update)");
+ goto failed;
+ }
+
+ rc = sqlite3_bind_int64(res, 3, (sqlite3_int64) unique_id);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind unique_id parameter for SQL_INJECT_REMOVED (update)");
+ goto failed;
+ }
+
+ rc = execute_insert(res);
+ if (unlikely(rc != SQLITE_DONE)) {
+ error_report("HEALTH [N/A]: Failed to execute SQL_INJECT_REMOVED_UPDATE, rc = %d", rc);
+ goto failed;
+ }
+
+failed:
+ if (unlikely(sqlite3_finalize(res) != SQLITE_OK))
+ error_report("HEALTH [N/A]: Failed to finalize the prepared statement for injecting removed event.");
+ return;
+
+}
+
+#define SQL_SELECT_MAX_UNIQUE_ID(guid) "SELECT MAX(unique_id) from health_log_%s", guid
+uint32_t sql_get_max_unique_id (char *uuid_str)
+{
+ int rc = 0;
+ char command[MAX_HEALTH_SQL_SIZE + 1];
+ uint32_t max_unique_id = 0;
+
+ sqlite3_stmt *res = NULL;
+
+ snprintfz(command, MAX_HEALTH_SQL_SIZE, SQL_SELECT_MAX_UNIQUE_ID(uuid_str));
+ rc = sqlite3_prepare_v2(db_meta, command, -1, &res, 0);
+ if (rc != SQLITE_OK) {
+ error_report("Failed to prepare statement when trying to get max unique id");
+ return 0;
+ }
+
+ while (sqlite3_step(res) == SQLITE_ROW) {
+ max_unique_id = (uint32_t) sqlite3_column_int64(res, 0);
+ }
+
+ rc = sqlite3_finalize(res);
+ if (unlikely(rc != SQLITE_OK))
+ error_report("Failed to finalize the statement");
+
+ return max_unique_id;
+}
+
+#define SQL_SELECT_LAST_STATUSES(guid) "SELECT new_status, unique_id, alarm_id, alarm_event_id from health_log_%s group by alarm_id having max(alarm_event_id)", guid
+void sql_check_removed_alerts_state(char *uuid_str)
+{
+ int rc = 0;
+ char command[MAX_HEALTH_SQL_SIZE + 1];
+ RRDCALC_STATUS status;
+ uint32_t alarm_id = 0, alarm_event_id = 0, unique_id = 0, max_unique_id = 0;
+
+ sqlite3_stmt *res = NULL;
+
+ snprintfz(command, MAX_HEALTH_SQL_SIZE, SQL_SELECT_LAST_STATUSES(uuid_str));
+ rc = sqlite3_prepare_v2(db_meta, command, -1, &res, 0);
+ if (rc != SQLITE_OK) {
+ error_report("Failed to prepare statement when trying to check removed statuses");
+ return;
+ }
+
+ while (sqlite3_step(res) == SQLITE_ROW) {
+ status = (RRDCALC_STATUS) sqlite3_column_int(res, 0);
+ unique_id = (uint32_t) sqlite3_column_int64(res, 1);
+ alarm_id = (uint32_t) sqlite3_column_int64(res, 2);
+ alarm_event_id = (uint32_t) sqlite3_column_int64(res, 3);
+ if (unlikely(status != RRDCALC_STATUS_REMOVED)) {
+ if (unlikely(!max_unique_id))
+ max_unique_id = sql_get_max_unique_id (uuid_str);
+ sql_inject_removed_status (uuid_str, alarm_id, alarm_event_id, unique_id, ++max_unique_id);
+ }
+ }
+
+ rc = sqlite3_finalize(res);
+ if (unlikely(rc != SQLITE_OK))
+ error_report("Failed to finalize the statement");
+}
+
/* Health related SQL queries
Load from the health log table
*/
@@ -454,6 +616,8 @@ void sql_health_alarm_log_load(RRDHOST *host) {
char uuid_str[GUID_LEN + 1];
uuid_unparse_lower_fix(&host->host_uuid, uuid_str);
+ sql_check_removed_alerts_state(uuid_str);
+
snprintfz(command, MAX_HEALTH_SQL_SIZE, SQL_LOAD_HEALTH_LOG(uuid_str, host->health_log.max));
rc = sqlite3_prepare_v2(db_meta, command, -1, &res, 0);
diff --git a/database/storage_engine.c b/database/storage_engine.c
new file mode 100644
index 000000000..36f01de16
--- /dev/null
+++ b/database/storage_engine.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "storage_engine.h"
+#include "ram/rrddim_mem.h"
+#ifdef ENABLE_DBENGINE
+#include "engine/rrdengineapi.h"
+#endif
+
+#define im_collect_ops { \
+ .init = rrddim_collect_init,\
+ .store_metric = rrddim_collect_store_metric,\
+ .finalize = rrddim_collect_finalize\
+}
+
+#define im_query_ops { \
+ .init = rrddim_query_init, \
+ .next_metric = rrddim_query_next_metric, \
+ .is_finished = rrddim_query_is_finished, \
+ .finalize = rrddim_query_finalize, \
+ .latest_time = rrddim_query_latest_time, \
+ .oldest_time = rrddim_query_oldest_time \
+}
+
+static STORAGE_ENGINE engines[] = {
+ {
+ .id = RRD_MEMORY_MODE_NONE,
+ .name = RRD_MEMORY_MODE_NONE_NAME,
+ .api = {
+ .collect_ops = im_collect_ops,
+ .query_ops = im_query_ops
+ }
+ },
+ {
+ .id = RRD_MEMORY_MODE_RAM,
+ .name = RRD_MEMORY_MODE_RAM_NAME,
+ .api = {
+ .collect_ops = im_collect_ops,
+ .query_ops = im_query_ops
+ }
+ },
+ {
+ .id = RRD_MEMORY_MODE_MAP,
+ .name = RRD_MEMORY_MODE_MAP_NAME,
+ .api = {
+ .collect_ops = im_collect_ops,
+ .query_ops = im_query_ops
+ }
+ },
+ {
+ .id = RRD_MEMORY_MODE_SAVE,
+ .name = RRD_MEMORY_MODE_SAVE_NAME,
+ .api = {
+ .collect_ops = im_collect_ops,
+ .query_ops = im_query_ops
+ }
+ },
+ {
+ .id = RRD_MEMORY_MODE_ALLOC,
+ .name = RRD_MEMORY_MODE_ALLOC_NAME,
+ .api = {
+ .collect_ops = im_collect_ops,
+ .query_ops = im_query_ops
+ }
+ },
+#ifdef ENABLE_DBENGINE
+ {
+ .id = RRD_MEMORY_MODE_DBENGINE,
+ .name = RRD_MEMORY_MODE_DBENGINE_NAME,
+ .api = {
+ .collect_ops = {
+ .init = rrdeng_store_metric_init,
+ .store_metric = rrdeng_store_metric_next,
+ .finalize = rrdeng_store_metric_finalize
+ },
+ .query_ops = {
+ .init = rrdeng_load_metric_init,
+ .next_metric = rrdeng_load_metric_next,
+ .is_finished = rrdeng_load_metric_is_finished,
+ .finalize = rrdeng_load_metric_finalize,
+ .latest_time = rrdeng_metric_latest_time,
+ .oldest_time = rrdeng_metric_oldest_time
+ }
+ }
+ },
+#endif
+ { .id = RRD_MEMORY_MODE_NONE, .name = NULL }
+};
+
+STORAGE_ENGINE* storage_engine_find(const char* name)
+{
+ for (STORAGE_ENGINE* it = engines; it->name; it++) {
+ if (strcmp(it->name, name) == 0)
+ return it;
+ }
+ return NULL;
+}
+
+STORAGE_ENGINE* storage_engine_get(RRD_MEMORY_MODE mmode)
+{
+ for (STORAGE_ENGINE* it = engines; it->name; it++) {
+ if (it->id == mmode)
+ return it;
+ }
+ return NULL;
+}
+
+STORAGE_ENGINE* storage_engine_foreach_init()
+{
+ // Assuming at least one engine exists
+ return &engines[0];
+}
+
+STORAGE_ENGINE* storage_engine_foreach_next(STORAGE_ENGINE* it)
+{
+ if (!it || !it->name)
+ return NULL;
+
+ it++;
+ return it->name ? it : NULL;
+}
diff --git a/database/storage_engine.h b/database/storage_engine.h
new file mode 100644
index 000000000..0aa70d093
--- /dev/null
+++ b/database/storage_engine.h
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_STORAGEENGINEAPI_H
+#define NETDATA_STORAGEENGINEAPI_H
+
+#include "rrd.h"
+
+typedef struct storage_engine STORAGE_ENGINE;
+
+// ------------------------------------------------------------------------
+// function pointers for all APIs provided by a storge engine
+typedef struct storage_engine_api {
+ struct rrddim_collect_ops collect_ops;
+ struct rrddim_query_ops query_ops;
+} STORAGE_ENGINE_API;
+
+struct storage_engine {
+ RRD_MEMORY_MODE id;
+ const char* name;
+ STORAGE_ENGINE_API api;
+};
+
+extern STORAGE_ENGINE* storage_engine_get(RRD_MEMORY_MODE mmode);
+extern STORAGE_ENGINE* storage_engine_find(const char* name);
+
+// Iterator over existing engines
+extern STORAGE_ENGINE* storage_engine_foreach_init();
+extern STORAGE_ENGINE* storage_engine_foreach_next(STORAGE_ENGINE* it);
+
+#endif