summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-09 13:19:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-09 13:20:02 +0000
commit58daab21cd043e1dc37024a7f99b396788372918 (patch)
tree96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h
parentReleasing debian version 1.43.2-1. (diff)
downloadnetdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz
netdata-58daab21cd043e1dc37024a7f99b396788372918.zip
Merging upstream version 1.44.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h')
-rw-r--r--ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h136
1 files changed, 136 insertions, 0 deletions
diff --git a/ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h b/ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h
new file mode 100644
index 000000000..72d362c12
--- /dev/null
+++ b/ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h
@@ -0,0 +1,136 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
+#ifdef DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
+
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename EXP
+ >
+ unsigned long bottom_up_cluster (
+ const matrix_exp<EXP>& dists,
+ std::vector<unsigned long>& labels,
+ unsigned long min_num_clusters,
+ double max_dist = std::numeric_limits<double>::infinity()
+ );
+ /*!
+ requires
+ - dists.nr() == dists.nc()
+ - min_num_clusters > 0
+ - dists == trans(dists)
+ (l.e. dists should be symmetric)
+ ensures
+ - Runs a bottom up agglomerative clustering algorithm.
+ - Interprets dists as a matrix that gives the distances between dists.nr()
+ items. In particular, we take dists(i,j) to be the distance between the ith
+ and jth element of some set. This function clusters the elements of this set
+ into at least min_num_clusters (or dists.nr() if there aren't enough
+ elements). Additionally, within each cluster, the maximum pairwise distance
+ between any two cluster elements is <= max_dist.
+ - returns the number of clusters found.
+ - #labels.size() == dists.nr()
+ - for all valid i:
+ - #labels[i] == the cluster ID of the node with index i (i.e. the node
+ corresponding to the distances dists(i,*)).
+ - 0 <= #labels[i] < the number of clusters found
+ (i.e. cluster IDs are assigned contiguously and start at 0)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ struct snl_range
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object represents an interval on the real number line. It is used
+ to store the outputs of the segment_number_line() routine defined below.
+ !*/
+
+ snl_range(
+ );
+ /*!
+ ensures
+ - #lower == 0
+ - #upper == 0
+ !*/
+
+ snl_range(
+ double val
+ );
+ /*!
+ ensures
+ - #lower == val
+ - #upper == val
+ !*/
+
+ snl_range(
+ double l,
+ double u
+ );
+ /*!
+ requires
+ - l <= u
+ ensures
+ - #lower == l
+ - #upper == u
+ !*/
+
+ double lower;
+ double upper;
+
+ double width(
+ ) const { return upper-lower; }
+ /*!
+ ensures
+ - returns the width of this interval on the number line.
+ !*/
+
+ bool operator<(const snl_range& item) const { return lower < item.lower; }
+ /*!
+ ensures
+ - provides a total ordering of snl_range objects assuming they are
+ non-overlapping.
+ !*/
+ };
+
+ std::ostream& operator<< (std::ostream& out, const snl_range& item );
+ /*!
+ ensures
+ - prints item to out in the form [lower,upper].
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ std::vector<snl_range> segment_number_line (
+ const std::vector<double>& x,
+ const double max_range_width
+ );
+ /*!
+ requires
+ - max_range_width >= 0
+ ensures
+ - Finds a clustering of the values in x and returns the ranges that define the
+ clustering. This routine uses a combination of bottom up clustering and a
+ simple greedy scan to try and find the most compact set of ranges that
+ contain all the values in x.
+ - This routine has approximately linear runtime.
+ - Every value in x will be contained inside one of the returned snl_range
+ objects;
+ - All returned snl_range object's will have a width() <= max_range_width and
+ will also be non-overlapping.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
+
+