diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:20:02 +0000 |
commit | 58daab21cd043e1dc37024a7f99b396788372918 (patch) | |
tree | 96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h | |
parent | Releasing debian version 1.43.2-1. (diff) | |
download | netdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz netdata-58daab21cd043e1dc37024a7f99b396788372918.zip |
Merging upstream version 1.44.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h')
-rw-r--r-- | ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h b/ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h new file mode 100644 index 000000000..72d362c12 --- /dev/null +++ b/ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h @@ -0,0 +1,136 @@ +// Copyright (C) 2015 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_ +#ifdef DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_ + +#include "../matrix.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename EXP + > + unsigned long bottom_up_cluster ( + const matrix_exp<EXP>& dists, + std::vector<unsigned long>& labels, + unsigned long min_num_clusters, + double max_dist = std::numeric_limits<double>::infinity() + ); + /*! + requires + - dists.nr() == dists.nc() + - min_num_clusters > 0 + - dists == trans(dists) + (l.e. dists should be symmetric) + ensures + - Runs a bottom up agglomerative clustering algorithm. + - Interprets dists as a matrix that gives the distances between dists.nr() + items. In particular, we take dists(i,j) to be the distance between the ith + and jth element of some set. This function clusters the elements of this set + into at least min_num_clusters (or dists.nr() if there aren't enough + elements). Additionally, within each cluster, the maximum pairwise distance + between any two cluster elements is <= max_dist. + - returns the number of clusters found. + - #labels.size() == dists.nr() + - for all valid i: + - #labels[i] == the cluster ID of the node with index i (i.e. the node + corresponding to the distances dists(i,*)). + - 0 <= #labels[i] < the number of clusters found + (i.e. cluster IDs are assigned contiguously and start at 0) + !*/ + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + struct snl_range + { + /*! + WHAT THIS OBJECT REPRESENTS + This object represents an interval on the real number line. It is used + to store the outputs of the segment_number_line() routine defined below. + !*/ + + snl_range( + ); + /*! + ensures + - #lower == 0 + - #upper == 0 + !*/ + + snl_range( + double val + ); + /*! + ensures + - #lower == val + - #upper == val + !*/ + + snl_range( + double l, + double u + ); + /*! + requires + - l <= u + ensures + - #lower == l + - #upper == u + !*/ + + double lower; + double upper; + + double width( + ) const { return upper-lower; } + /*! + ensures + - returns the width of this interval on the number line. + !*/ + + bool operator<(const snl_range& item) const { return lower < item.lower; } + /*! + ensures + - provides a total ordering of snl_range objects assuming they are + non-overlapping. + !*/ + }; + + std::ostream& operator<< (std::ostream& out, const snl_range& item ); + /*! + ensures + - prints item to out in the form [lower,upper]. + !*/ + +// ---------------------------------------------------------------------------------------- + + std::vector<snl_range> segment_number_line ( + const std::vector<double>& x, + const double max_range_width + ); + /*! + requires + - max_range_width >= 0 + ensures + - Finds a clustering of the values in x and returns the ranges that define the + clustering. This routine uses a combination of bottom up clustering and a + simple greedy scan to try and find the most compact set of ranges that + contain all the values in x. + - This routine has approximately linear runtime. + - Every value in x will be contained inside one of the returned snl_range + objects; + - All returned snl_range object's will have a width() <= max_range_width and + will also be non-overlapping. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_ + + |