diff options
Diffstat (limited to 'libnetdata/statistical')
-rw-r--r-- | libnetdata/statistical/Makefile.am | 8 | ||||
-rw-r--r-- | libnetdata/statistical/README.md | 12 | ||||
-rw-r--r-- | libnetdata/statistical/statistical.c | 460 | ||||
-rw-r--r-- | libnetdata/statistical/statistical.h | 34 |
4 files changed, 0 insertions, 514 deletions
diff --git a/libnetdata/statistical/Makefile.am b/libnetdata/statistical/Makefile.am deleted file mode 100644 index 161784b8f..000000000 --- a/libnetdata/statistical/Makefile.am +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -AUTOMAKE_OPTIONS = subdir-objects -MAINTAINERCLEANFILES = $(srcdir)/Makefile.in - -dist_noinst_DATA = \ - README.md \ - $(NULL) diff --git a/libnetdata/statistical/README.md b/libnetdata/statistical/README.md deleted file mode 100644 index 937c26ac6..000000000 --- a/libnetdata/statistical/README.md +++ /dev/null @@ -1,12 +0,0 @@ -<!-- -title: "Statistical functions" -custom_edit_url: https://github.com/netdata/netdata/edit/master/libnetdata/statistical/README.md -sidebar_label: "Statistical functions" -learn_status: "Published" -learn_topic_type: "Tasks" -learn_rel_path: "Developers/libnetdata" ---> - -# Statistical functions - -A library for easy and fast calculations of statistical measurements like average, median etc. diff --git a/libnetdata/statistical/statistical.c b/libnetdata/statistical/statistical.c deleted file mode 100644 index ef9fe4e56..000000000 --- a/libnetdata/statistical/statistical.c +++ /dev/null @@ -1,460 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "../libnetdata.h" - -NETDATA_DOUBLE default_single_exponential_smoothing_alpha = 0.1; - -void log_series_to_stderr(NETDATA_DOUBLE *series, size_t entries, NETDATA_DOUBLE result, const char *msg) { - const NETDATA_DOUBLE *value, *end = &series[entries]; - - fprintf(stderr, "%s of %zu entries [ ", msg, entries); - for(value = series; value < end ;value++) { - if(value != series) fprintf(stderr, ", "); - fprintf(stderr, "%" NETDATA_DOUBLE_MODIFIER, *value); - } - fprintf(stderr, " ] results in " NETDATA_DOUBLE_FORMAT "\n", result); -} - -// -------------------------------------------------------------------------------------------------------------------- - -inline NETDATA_DOUBLE sum_and_count(const NETDATA_DOUBLE *series, size_t entries, size_t *count) { - const NETDATA_DOUBLE *value, *end = &series[entries]; - NETDATA_DOUBLE sum = 0; - size_t c = 0; - - for(value = series; value < end ; value++) { - if(netdata_double_isnumber(*value)) { - sum += *value; - c++; - } - } - - if(unlikely(!c)) sum = NAN; - if(likely(count)) *count = c; - - return sum; -} - -inline NETDATA_DOUBLE sum(const NETDATA_DOUBLE *series, size_t entries) { - return sum_and_count(series, entries, NULL); -} - -inline NETDATA_DOUBLE average(const NETDATA_DOUBLE *series, size_t entries) { - size_t count = 0; - NETDATA_DOUBLE sum = sum_and_count(series, entries, &count); - - if(unlikely(!count)) return NAN; - return sum / (NETDATA_DOUBLE)count; -} - -// -------------------------------------------------------------------------------------------------------------------- - -NETDATA_DOUBLE moving_average(const NETDATA_DOUBLE *series, size_t entries, size_t period) { - if(unlikely(period <= 0)) - return 0.0; - - size_t i, count; - NETDATA_DOUBLE sum = 0, avg = 0; - NETDATA_DOUBLE p[period]; - - for(count = 0; count < period ; count++) - p[count] = 0.0; - - for(i = 0, count = 0; i < entries; i++) { - NETDATA_DOUBLE value = series[i]; - if(unlikely(!netdata_double_isnumber(value))) continue; - - if(unlikely(count < period)) { - sum += value; - avg = (count == period - 1) ? sum / (NETDATA_DOUBLE)period : 0; - } - else { - sum = sum - p[count % period] + value; - avg = sum / (NETDATA_DOUBLE)period; - } - - p[count % period] = value; - count++; - } - - return avg; -} - -// -------------------------------------------------------------------------------------------------------------------- - -static int qsort_compare(const void *a, const void *b) { - NETDATA_DOUBLE *p1 = (NETDATA_DOUBLE *)a, *p2 = (NETDATA_DOUBLE *)b; - NETDATA_DOUBLE n1 = *p1, n2 = *p2; - - if(unlikely(isnan(n1) || isnan(n2))) { - if(isnan(n1) && !isnan(n2)) return -1; - if(!isnan(n1) && isnan(n2)) return 1; - return 0; - } - if(unlikely(isinf(n1) || isinf(n2))) { - if(!isinf(n1) && isinf(n2)) return -1; - if(isinf(n1) && !isinf(n2)) return 1; - return 0; - } - - if(unlikely(n1 < n2)) return -1; - if(unlikely(n1 > n2)) return 1; - return 0; -} - -inline void sort_series(NETDATA_DOUBLE *series, size_t entries) { - qsort(series, entries, sizeof(NETDATA_DOUBLE), qsort_compare); -} - -inline NETDATA_DOUBLE *copy_series(const NETDATA_DOUBLE *series, size_t entries) { - NETDATA_DOUBLE *copy = mallocz(sizeof(NETDATA_DOUBLE) * entries); - memcpy(copy, series, sizeof(NETDATA_DOUBLE) * entries); - return copy; -} - -NETDATA_DOUBLE median_on_sorted_series(const NETDATA_DOUBLE *series, size_t entries) { - if(unlikely(entries == 0)) return NAN; - if(unlikely(entries == 1)) return series[0]; - if(unlikely(entries == 2)) return (series[0] + series[1]) / 2; - - NETDATA_DOUBLE average; - if(entries % 2 == 0) { - size_t m = entries / 2; - average = (series[m] + series[m + 1]) / 2; - } - else { - average = series[entries / 2]; - } - - return average; -} - -NETDATA_DOUBLE median(const NETDATA_DOUBLE *series, size_t entries) { - if(unlikely(entries == 0)) return NAN; - if(unlikely(entries == 1)) return series[0]; - - if(unlikely(entries == 2)) - return (series[0] + series[1]) / 2; - - NETDATA_DOUBLE *copy = copy_series(series, entries); - sort_series(copy, entries); - - NETDATA_DOUBLE avg = median_on_sorted_series(copy, entries); - - freez(copy); - return avg; -} - -// -------------------------------------------------------------------------------------------------------------------- - -NETDATA_DOUBLE moving_median(const NETDATA_DOUBLE *series, size_t entries, size_t period) { - if(entries <= period) - return median(series, entries); - - NETDATA_DOUBLE *data = copy_series(series, entries); - - size_t i; - for(i = period; i < entries; i++) { - data[i - period] = median(&series[i - period], period); - } - - NETDATA_DOUBLE avg = median(data, entries - period); - freez(data); - return avg; -} - -// -------------------------------------------------------------------------------------------------------------------- - -// http://stackoverflow.com/a/15150143/4525767 -NETDATA_DOUBLE running_median_estimate(const NETDATA_DOUBLE *series, size_t entries) { - NETDATA_DOUBLE median = 0.0f; - NETDATA_DOUBLE average = 0.0f; - size_t i; - - for(i = 0; i < entries ; i++) { - NETDATA_DOUBLE value = series[i]; - if(unlikely(!netdata_double_isnumber(value))) continue; - - average += ( value - average ) * 0.1f; // rough running average. - median += copysignndd( average * 0.01, value - median ); - } - - return median; -} - -// -------------------------------------------------------------------------------------------------------------------- - -NETDATA_DOUBLE standard_deviation(const NETDATA_DOUBLE *series, size_t entries) { - if(unlikely(entries == 0)) return NAN; - if(unlikely(entries == 1)) return series[0]; - - const NETDATA_DOUBLE *value, *end = &series[entries]; - size_t count; - NETDATA_DOUBLE sum; - - for(count = 0, sum = 0, value = series ; value < end ;value++) { - if(likely(netdata_double_isnumber(*value))) { - count++; - sum += *value; - } - } - - if(unlikely(count == 0)) return NAN; - if(unlikely(count == 1)) return sum; - - NETDATA_DOUBLE average = sum / (NETDATA_DOUBLE)count; - - for(count = 0, sum = 0, value = series ; value < end ;value++) { - if(netdata_double_isnumber(*value)) { - count++; - sum += powndd(*value - average, 2); - } - } - - if(unlikely(count == 0)) return NAN; - if(unlikely(count == 1)) return average; - - NETDATA_DOUBLE variance = sum / (NETDATA_DOUBLE)(count); // remove -1 from count to have a population stddev - NETDATA_DOUBLE stddev = sqrtndd(variance); - return stddev; -} - -// -------------------------------------------------------------------------------------------------------------------- - -NETDATA_DOUBLE single_exponential_smoothing(const NETDATA_DOUBLE *series, size_t entries, NETDATA_DOUBLE alpha) { - if(unlikely(entries == 0)) - return NAN; - - if(unlikely(isnan(alpha))) - alpha = default_single_exponential_smoothing_alpha; - - const NETDATA_DOUBLE *value = series, *end = &series[entries]; - NETDATA_DOUBLE level = (1.0 - alpha) * (*value); - - for(value++ ; value < end; value++) { - if(likely(netdata_double_isnumber(*value))) - level = alpha * (*value) + (1.0 - alpha) * level; - } - - return level; -} - -NETDATA_DOUBLE single_exponential_smoothing_reverse(const NETDATA_DOUBLE *series, size_t entries, NETDATA_DOUBLE alpha) { - if(unlikely(entries == 0)) - return NAN; - - if(unlikely(isnan(alpha))) - alpha = default_single_exponential_smoothing_alpha; - - const NETDATA_DOUBLE *value = &series[entries -1]; - NETDATA_DOUBLE level = (1.0 - alpha) * (*value); - - for(value++ ; value >= series; value--) { - if(likely(netdata_double_isnumber(*value))) - level = alpha * (*value) + (1.0 - alpha) * level; - } - - return level; -} - -// -------------------------------------------------------------------------------------------------------------------- - -// http://grisha.org/blog/2016/02/16/triple-exponential-smoothing-forecasting-part-ii/ -NETDATA_DOUBLE double_exponential_smoothing(const NETDATA_DOUBLE *series, size_t entries, - NETDATA_DOUBLE alpha, - NETDATA_DOUBLE beta, - NETDATA_DOUBLE *forecast) { - if(unlikely(entries == 0)) - return NAN; - - NETDATA_DOUBLE level, trend; - - if(unlikely(isnan(alpha))) - alpha = 0.3; - - if(unlikely(isnan(beta))) - beta = 0.05; - - level = series[0]; - - if(likely(entries > 1)) - trend = series[1] - series[0]; - else - trend = 0; - - const NETDATA_DOUBLE *value = series; - for(value++ ; value >= series; value--) { - if(likely(netdata_double_isnumber(*value))) { - NETDATA_DOUBLE last_level = level; - level = alpha * *value + (1.0 - alpha) * (level + trend); - trend = beta * (level - last_level) + (1.0 - beta) * trend; - - } - } - - if(forecast) - *forecast = level + trend; - - return level; -} - -// -------------------------------------------------------------------------------------------------------------------- - -/* - * Based on th R implementation - * - * a: level component - * b: trend component - * s: seasonal component - * - * Additive: - * - * Yhat[t+h] = a[t] + h * b[t] + s[t + 1 + (h - 1) mod p], - * a[t] = α (Y[t] - s[t-p]) + (1-α) (a[t-1] + b[t-1]) - * b[t] = β (a[t] - a[t-1]) + (1-β) b[t-1] - * s[t] = γ (Y[t] - a[t]) + (1-γ) s[t-p] - * - * Multiplicative: - * - * Yhat[t+h] = (a[t] + h * b[t]) * s[t + 1 + (h - 1) mod p], - * a[t] = α (Y[t] / s[t-p]) + (1-α) (a[t-1] + b[t-1]) - * b[t] = β (a[t] - a[t-1]) + (1-β) b[t-1] - * s[t] = γ (Y[t] / a[t]) + (1-γ) s[t-p] - */ -static int __HoltWinters( - const NETDATA_DOUBLE *series, - int entries, // start_time + h - - NETDATA_DOUBLE alpha, // alpha parameter of Holt-Winters Filter. - NETDATA_DOUBLE - beta, // beta parameter of Holt-Winters Filter. If set to 0, the function will do exponential smoothing. - NETDATA_DOUBLE - gamma, // gamma parameter used for the seasonal component. If set to 0, an non-seasonal model is fitted. - - const int *seasonal, - const int *period, - const NETDATA_DOUBLE *a, // Start value for level (a[0]). - const NETDATA_DOUBLE *b, // Start value for trend (b[0]). - NETDATA_DOUBLE *s, // Vector of start values for the seasonal component (s_1[0] ... s_p[0]) - - /* return values */ - NETDATA_DOUBLE *SSE, // The final sum of squared errors achieved in optimizing - NETDATA_DOUBLE *level, // Estimated values for the level component (size entries - t + 2) - NETDATA_DOUBLE *trend, // Estimated values for the trend component (size entries - t + 2) - NETDATA_DOUBLE *season // Estimated values for the seasonal component (size entries - t + 2) -) -{ - if(unlikely(entries < 4)) - return 0; - - int start_time = 2; - - NETDATA_DOUBLE res = 0, xhat = 0, stmp = 0; - int i, i0, s0; - - /* copy start values to the beginning of the vectors */ - level[0] = *a; - if(beta > 0) trend[0] = *b; - if(gamma > 0) memcpy(season, s, *period * sizeof(NETDATA_DOUBLE)); - - for(i = start_time - 1; i < entries; i++) { - /* indices for period i */ - i0 = i - start_time + 2; - s0 = i0 + *period - 1; - - /* forecast *for* period i */ - xhat = level[i0 - 1] + (beta > 0 ? trend[i0 - 1] : 0); - stmp = gamma > 0 ? season[s0 - *period] : (*seasonal != 1); - if (*seasonal == 1) - xhat += stmp; - else - xhat *= stmp; - - /* Sum of Squared Errors */ - res = series[i] - xhat; - *SSE += res * res; - - /* estimate of level *in* period i */ - if (*seasonal == 1) - level[i0] = alpha * (series[i] - stmp) - + (1 - alpha) * (level[i0 - 1] + trend[i0 - 1]); - else - level[i0] = alpha * (series[i] / stmp) - + (1 - alpha) * (level[i0 - 1] + trend[i0 - 1]); - - /* estimate of trend *in* period i */ - if (beta > 0) - trend[i0] = beta * (level[i0] - level[i0 - 1]) - + (1 - beta) * trend[i0 - 1]; - - /* estimate of seasonal component *in* period i */ - if (gamma > 0) { - if (*seasonal == 1) - season[s0] = gamma * (series[i] - level[i0]) - + (1 - gamma) * stmp; - else - season[s0] = gamma * (series[i] / level[i0]) - + (1 - gamma) * stmp; - } - } - - return 1; -} - -NETDATA_DOUBLE holtwinters(const NETDATA_DOUBLE *series, size_t entries, - NETDATA_DOUBLE alpha, - NETDATA_DOUBLE beta, - NETDATA_DOUBLE gamma, - NETDATA_DOUBLE *forecast) { - if(unlikely(isnan(alpha))) - alpha = 0.3; - - if(unlikely(isnan(beta))) - beta = 0.05; - - if(unlikely(isnan(gamma))) - gamma = 0; - - int seasonal = 0; - int period = 0; - NETDATA_DOUBLE a0 = series[0]; - NETDATA_DOUBLE b0 = 0; - NETDATA_DOUBLE s[] = {}; - - NETDATA_DOUBLE errors = 0.0; - size_t nb_computations = entries; - NETDATA_DOUBLE *estimated_level = callocz(nb_computations, sizeof(NETDATA_DOUBLE)); - NETDATA_DOUBLE *estimated_trend = callocz(nb_computations, sizeof(NETDATA_DOUBLE)); - NETDATA_DOUBLE *estimated_season = callocz(nb_computations, sizeof(NETDATA_DOUBLE)); - - int ret = __HoltWinters( - series, - (int)entries, - alpha, - beta, - gamma, - &seasonal, - &period, - &a0, - &b0, - s, - &errors, - estimated_level, - estimated_trend, - estimated_season - ); - - NETDATA_DOUBLE value = estimated_level[nb_computations - 1]; - - if(forecast) - *forecast = 0.0; - - freez(estimated_level); - freez(estimated_trend); - freez(estimated_season); - - if(!ret) - return 0.0; - - return value; -} diff --git a/libnetdata/statistical/statistical.h b/libnetdata/statistical/statistical.h deleted file mode 100644 index f3ecfadb4..000000000 --- a/libnetdata/statistical/statistical.h +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_STATISTICAL_H -#define NETDATA_STATISTICAL_H 1 - -#include "../libnetdata.h" - -void log_series_to_stderr(NETDATA_DOUBLE *series, size_t entries, NETDATA_DOUBLE result, const char *msg); - -NETDATA_DOUBLE average(const NETDATA_DOUBLE *series, size_t entries); -NETDATA_DOUBLE moving_average(const NETDATA_DOUBLE *series, size_t entries, size_t period); -NETDATA_DOUBLE median(const NETDATA_DOUBLE *series, size_t entries); -NETDATA_DOUBLE moving_median(const NETDATA_DOUBLE *series, size_t entries, size_t period); -NETDATA_DOUBLE running_median_estimate(const NETDATA_DOUBLE *series, size_t entries); -NETDATA_DOUBLE standard_deviation(const NETDATA_DOUBLE *series, size_t entries); -NETDATA_DOUBLE single_exponential_smoothing(const NETDATA_DOUBLE *series, size_t entries, NETDATA_DOUBLE alpha); -extern NETDATA_DOUBLE -single_exponential_smoothing_reverse(const NETDATA_DOUBLE *series, size_t entries, NETDATA_DOUBLE alpha); -NETDATA_DOUBLE double_exponential_smoothing(const NETDATA_DOUBLE *series, size_t entries, - NETDATA_DOUBLE alpha, - NETDATA_DOUBLE beta, - NETDATA_DOUBLE *forecast); -NETDATA_DOUBLE holtwinters(const NETDATA_DOUBLE *series, size_t entries, - NETDATA_DOUBLE alpha, - NETDATA_DOUBLE beta, - NETDATA_DOUBLE gamma, - NETDATA_DOUBLE *forecast); -NETDATA_DOUBLE sum_and_count(const NETDATA_DOUBLE *series, size_t entries, size_t *count); -NETDATA_DOUBLE sum(const NETDATA_DOUBLE *series, size_t entries); -NETDATA_DOUBLE median_on_sorted_series(const NETDATA_DOUBLE *series, size_t entries); -NETDATA_DOUBLE *copy_series(const NETDATA_DOUBLE *series, size_t entries); -void sort_series(NETDATA_DOUBLE *series, size_t entries); - -#endif //NETDATA_STATISTICAL_H |