summaryrefslogtreecommitdiffstats
path: root/quantiles.c
diff options
context:
space:
mode:
Diffstat (limited to 'quantiles.c')
-rw-r--r--quantiles.c209
1 files changed, 209 insertions, 0 deletions
diff --git a/quantiles.c b/quantiles.c
new file mode 100644
index 0000000..52953db
--- /dev/null
+++ b/quantiles.c
@@ -0,0 +1,209 @@
+/*
+ chronyd/chronyc - Programs for keeping computer clocks accurate.
+
+ **********************************************************************
+ * Copyright (C) Miroslav Lichvar 2022
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ **********************************************************************
+
+ =======================================================================
+
+ Estimation of quantiles using the Frugal-2U streaming algorithm
+ (https://arxiv.org/pdf/1407.1121v1.pdf)
+ */
+
+#include "config.h"
+
+#include "logging.h"
+#include "memory.h"
+#include "quantiles.h"
+#include "regress.h"
+#include "util.h"
+
+/* Maximum number of repeated estimates for stabilisation */
+#define MAX_REPEAT 64
+
+struct Quantile {
+ double est;
+ double step;
+ int sign;
+};
+
+struct QNT_Instance_Record {
+ struct Quantile *quants;
+ int n_quants;
+ int repeat;
+ int q;
+ int min_k;
+ double min_step;
+ int n_set;
+};
+
+/* ================================================== */
+
+QNT_Instance
+QNT_CreateInstance(int min_k, int max_k, int q, int repeat, double min_step)
+{
+ QNT_Instance inst;
+ long seed;
+
+ if (q < 2 || min_k > max_k || min_k < 1 || max_k >= q ||
+ repeat < 1 || repeat > MAX_REPEAT || min_step <= 0.0)
+ assert(0);
+
+ inst = MallocNew(struct QNT_Instance_Record);
+ inst->n_quants = (max_k - min_k + 1) * repeat;
+ inst->quants = MallocArray(struct Quantile, inst->n_quants);
+ inst->repeat = repeat;
+ inst->q = q;
+ inst->min_k = min_k;
+ inst->min_step = min_step;
+
+ QNT_Reset(inst);
+
+ /* Seed the random number generator, which will not be isolated from
+ other instances and other random() users */
+ UTI_GetRandomBytes(&seed, sizeof (seed));
+ srandom(seed);
+
+ return inst;
+}
+
+/* ================================================== */
+
+void
+QNT_DestroyInstance(QNT_Instance inst)
+{
+ Free(inst->quants);
+ Free(inst);
+}
+
+/* ================================================== */
+
+void
+QNT_Reset(QNT_Instance inst)
+{
+ int i;
+
+ inst->n_set = 0;
+
+ for (i = 0; i < inst->n_quants; i++) {
+ inst->quants[i].est = 0.0;
+ inst->quants[i].step = inst->min_step;
+ inst->quants[i].sign = 1;
+ }
+}
+
+/* ================================================== */
+
+static void
+insert_initial_value(QNT_Instance inst, double value)
+{
+ int i, j, r = inst->repeat;
+
+ if (inst->n_set * r >= inst->n_quants)
+ assert(0);
+
+ /* Keep the initial estimates repeated and ordered */
+ for (i = inst->n_set; i > 0 && inst->quants[(i - 1) * r].est > value; i--) {
+ for (j = 0; j < r; j++)
+ inst->quants[i * r + j].est = inst->quants[(i - 1) * r].est;
+ }
+
+ for (j = 0; j < r; j++)
+ inst->quants[i * r + j].est = value;
+ inst->n_set++;
+
+ /* Duplicate the largest value in unset quantiles */
+ for (i = inst->n_set * r; i < inst->n_quants; i++)
+ inst->quants[i].est = inst->quants[i - 1].est;
+}
+
+/* ================================================== */
+
+static void
+update_estimate(struct Quantile *quantile, double value, double p, double rand,
+ double min_step)
+{
+ if (value > quantile->est && rand > (1.0 - p)) {
+ quantile->step += quantile->sign > 0 ? min_step : -min_step;
+ quantile->est += quantile->step > 0.0 ? fabs(quantile->step) : min_step;
+ if (quantile->est > value) {
+ quantile->step += value - quantile->est;
+ quantile->est = value;
+ }
+ if (quantile->sign < 0 && quantile->step > min_step)
+ quantile->step = min_step;
+ quantile->sign = 1;
+ } else if (value < quantile->est && rand > p) {
+ quantile->step += quantile->sign < 0 ? min_step : -min_step;
+ quantile->est -= quantile->step > 0.0 ? fabs(quantile->step) : min_step;
+ if (quantile->est < value) {
+ quantile->step += quantile->est - value;
+ quantile->est = value;
+ }
+ if (quantile->sign > 0 && quantile->step > min_step)
+ quantile->step = min_step;
+ quantile->sign = -1;
+ }
+}
+
+/* ================================================== */
+
+void
+QNT_Accumulate(QNT_Instance inst, double value)
+{
+ double p, rand;
+ int i;
+
+ /* Initialise the estimates with first received values */
+ if (inst->n_set * inst->repeat < inst->n_quants) {
+ insert_initial_value(inst, value);
+ return;
+ }
+
+ for (i = 0; i < inst->n_quants; i++) {
+ p = (double)(i / inst->repeat + inst->min_k) / inst->q;
+ rand = (double)random() / ((1U << 31) - 1);
+
+ update_estimate(&inst->quants[i], value, p, rand, inst->min_step);
+ }
+}
+
+/* ================================================== */
+
+int
+QNT_GetMinK(QNT_Instance inst)
+{
+ return inst->min_k;
+}
+
+/* ================================================== */
+
+double
+QNT_GetQuantile(QNT_Instance inst, int k)
+{
+ double estimates[MAX_REPEAT];
+ int i;
+
+ if (k < inst->min_k || k - inst->min_k >= inst->n_quants)
+ assert(0);
+
+ for (i = 0; i < inst->repeat; i++)
+ estimates[i] = inst->quants[(k - inst->min_k) * inst->repeat + i].est;
+
+ return RGR_FindMedian(estimates, inst->repeat);
+}