summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/clustering
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/clustering')
-rw-r--r--ml/dlib/dlib/clustering/bottom_up_cluster.h253
-rw-r--r--ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h136
-rw-r--r--ml/dlib/dlib/clustering/chinese_whispers.h135
-rw-r--r--ml/dlib/dlib/clustering/chinese_whispers_abstract.h97
-rw-r--r--ml/dlib/dlib/clustering/modularity_clustering.h515
-rw-r--r--ml/dlib/dlib/clustering/modularity_clustering_abstract.h125
-rw-r--r--ml/dlib/dlib/clustering/spectral_cluster.h80
-rw-r--r--ml/dlib/dlib/clustering/spectral_cluster_abstract.h43
8 files changed, 1384 insertions, 0 deletions
diff --git a/ml/dlib/dlib/clustering/bottom_up_cluster.h b/ml/dlib/dlib/clustering/bottom_up_cluster.h
new file mode 100644
index 000000000..f80b65108
--- /dev/null
+++ b/ml/dlib/dlib/clustering/bottom_up_cluster.h
@@ -0,0 +1,253 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_BOTTOM_uP_CLUSTER_Hh_
+#define DLIB_BOTTOM_uP_CLUSTER_Hh_
+
+#include <queue>
+#include <map>
+
+#include "bottom_up_cluster_abstract.h"
+#include "../algs.h"
+#include "../matrix.h"
+#include "../disjoint_subsets.h"
+#include "../graph_utils.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ namespace buc_impl
+ {
+ inline void merge_sets (
+ matrix<double>& dists,
+ unsigned long dest,
+ unsigned long src
+ )
+ {
+ for (long r = 0; r < dists.nr(); ++r)
+ dists(dest,r) = dists(r,dest) = std::max(dists(r,dest), dists(r,src));
+ }
+
+ struct compare_dist
+ {
+ bool operator() (
+ const sample_pair& a,
+ const sample_pair& b
+ ) const
+ {
+ return a.distance() > b.distance();
+ }
+ };
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename EXP
+ >
+ unsigned long bottom_up_cluster (
+ const matrix_exp<EXP>& dists_,
+ std::vector<unsigned long>& labels,
+ unsigned long min_num_clusters,
+ double max_dist = std::numeric_limits<double>::infinity()
+ )
+ {
+ matrix<double> dists = matrix_cast<double>(dists_);
+ // make sure requires clause is not broken
+ DLIB_CASSERT(dists.nr() == dists.nc() && min_num_clusters > 0,
+ "\t unsigned long bottom_up_cluster()"
+ << "\n\t Invalid inputs were given to this function."
+ << "\n\t dists.nr(): " << dists.nr()
+ << "\n\t dists.nc(): " << dists.nc()
+ << "\n\t min_num_clusters: " << min_num_clusters
+ );
+
+ using namespace buc_impl;
+
+ labels.resize(dists.nr());
+ disjoint_subsets sets;
+ sets.set_size(dists.nr());
+ if (labels.size() == 0)
+ return 0;
+
+ // push all the edges in the graph into a priority queue so the best edges to merge
+ // come first.
+ std::priority_queue<sample_pair, std::vector<sample_pair>, compare_dist> que;
+ for (long r = 0; r < dists.nr(); ++r)
+ for (long c = r+1; c < dists.nc(); ++c)
+ que.push(sample_pair(r,c,dists(r,c)));
+
+ // Now start merging nodes.
+ for (unsigned long iter = min_num_clusters; iter < sets.size(); ++iter)
+ {
+ // find the next best thing to merge.
+ double best_dist = que.top().distance();
+ unsigned long a = sets.find_set(que.top().index1());
+ unsigned long b = sets.find_set(que.top().index2());
+ que.pop();
+ // we have been merging and modifying the distances, so make sure this distance
+ // is still valid and these guys haven't been merged already.
+ while(a == b || best_dist < dists(a,b))
+ {
+ // Haven't merged it yet, so put it back in with updated distance for
+ // reconsideration later.
+ if (a != b)
+ que.push(sample_pair(a, b, dists(a, b)));
+
+ best_dist = que.top().distance();
+ a = sets.find_set(que.top().index1());
+ b = sets.find_set(que.top().index2());
+ que.pop();
+ }
+
+
+ // now merge these sets if the best distance is small enough
+ if (best_dist > max_dist)
+ break;
+ unsigned long news = sets.merge_sets(a,b);
+ unsigned long olds = (news==a)?b:a;
+ merge_sets(dists, news, olds);
+ }
+
+ // figure out which cluster each element is in. Also make sure the labels are
+ // contiguous.
+ std::map<unsigned long, unsigned long> relabel;
+ for (unsigned long r = 0; r < labels.size(); ++r)
+ {
+ unsigned long l = sets.find_set(r);
+ // relabel to make contiguous
+ if (relabel.count(l) == 0)
+ {
+ unsigned long next = relabel.size();
+ relabel[l] = next;
+ }
+ labels[r] = relabel[l];
+ }
+
+
+ return relabel.size();
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ struct snl_range
+ {
+ snl_range() = default;
+ snl_range(double val) : lower(val), upper(val) {}
+ snl_range(double l, double u) : lower(l), upper(u) { DLIB_ASSERT(lower <= upper)}
+
+ double lower = 0;
+ double upper = 0;
+
+ double width() const { return upper-lower; }
+ bool operator<(const snl_range& item) const { return lower < item.lower; }
+ };
+
+ inline snl_range merge(const snl_range& a, const snl_range& b)
+ {
+ return snl_range(std::min(a.lower, b.lower), std::max(a.upper, b.upper));
+ }
+
+ inline double distance (const snl_range& a, const snl_range& b)
+ {
+ return std::max(a.lower,b.lower) - std::min(a.upper,b.upper);
+ }
+
+ inline std::ostream& operator<< (std::ostream& out, const snl_range& item )
+ {
+ out << "["<<item.lower<<","<<item.upper<<"]";
+ return out;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline std::vector<snl_range> segment_number_line (
+ const std::vector<double>& x,
+ const double max_range_width
+ )
+ {
+ DLIB_CASSERT(max_range_width >= 0);
+
+ // create initial ranges, one for each value in x. So initially, all the ranges have
+ // width of 0.
+ std::vector<snl_range> ranges;
+ for (auto v : x)
+ ranges.push_back(v);
+ std::sort(ranges.begin(), ranges.end());
+
+ std::vector<snl_range> greedy_final_ranges;
+ if (ranges.size() == 0)
+ return greedy_final_ranges;
+ // We will try two different clustering strategies. One that does a simple greedy left
+ // to right sweep and another that does a bottom up agglomerative clustering. This
+ // first loop runs the greedy left to right sweep. Then at the end of this routine we
+ // will return the results that produced the tightest clustering.
+ greedy_final_ranges.push_back(ranges[0]);
+ for (size_t i = 1; i < ranges.size(); ++i)
+ {
+ auto m = merge(greedy_final_ranges.back(), ranges[i]);
+ if (m.width() <= max_range_width)
+ greedy_final_ranges.back() = m;
+ else
+ greedy_final_ranges.push_back(ranges[i]);
+ }
+
+
+ // Here we do the bottom up clustering. So compute the edges connecting our ranges.
+ // We will simply say there are edges between ranges if and only if they are
+ // immediately adjacent on the number line.
+ std::vector<sample_pair> edges;
+ for (size_t i = 1; i < ranges.size(); ++i)
+ edges.push_back(sample_pair(i-1,i, distance(ranges[i-1],ranges[i])));
+ std::sort(edges.begin(), edges.end(), order_by_distance<sample_pair>);
+
+ disjoint_subsets sets;
+ sets.set_size(ranges.size());
+
+ // Now start merging nodes.
+ for (auto edge : edges)
+ {
+ // find the next best thing to merge.
+ unsigned long a = sets.find_set(edge.index1());
+ unsigned long b = sets.find_set(edge.index2());
+
+ // merge it if it doesn't result in an interval that's too big.
+ auto m = merge(ranges[a], ranges[b]);
+ if (m.width() <= max_range_width)
+ {
+ unsigned long news = sets.merge_sets(a,b);
+ ranges[news] = m;
+ }
+ }
+
+ // Now create a list of the final ranges. We will do this by keeping track of which
+ // range we already added to final_ranges.
+ std::vector<snl_range> final_ranges;
+ std::vector<bool> already_output(ranges.size(), false);
+ for (unsigned long i = 0; i < sets.size(); ++i)
+ {
+ auto s = sets.find_set(i);
+ if (!already_output[s])
+ {
+ final_ranges.push_back(ranges[s]);
+ already_output[s] = true;
+ }
+ }
+
+ // only use the greedy clusters if they found a clustering with fewer clusters.
+ // Otherwise, the bottom up clustering probably produced a more sensible clustering.
+ if (final_ranges.size() <= greedy_final_ranges.size())
+ return final_ranges;
+ else
+ return greedy_final_ranges;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_BOTTOM_uP_CLUSTER_Hh_
+
diff --git a/ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h b/ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h
new file mode 100644
index 000000000..72d362c12
--- /dev/null
+++ b/ml/dlib/dlib/clustering/bottom_up_cluster_abstract.h
@@ -0,0 +1,136 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
+#ifdef DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
+
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename EXP
+ >
+ unsigned long bottom_up_cluster (
+ const matrix_exp<EXP>& dists,
+ std::vector<unsigned long>& labels,
+ unsigned long min_num_clusters,
+ double max_dist = std::numeric_limits<double>::infinity()
+ );
+ /*!
+ requires
+ - dists.nr() == dists.nc()
+ - min_num_clusters > 0
+ - dists == trans(dists)
+ (l.e. dists should be symmetric)
+ ensures
+ - Runs a bottom up agglomerative clustering algorithm.
+ - Interprets dists as a matrix that gives the distances between dists.nr()
+ items. In particular, we take dists(i,j) to be the distance between the ith
+ and jth element of some set. This function clusters the elements of this set
+ into at least min_num_clusters (or dists.nr() if there aren't enough
+ elements). Additionally, within each cluster, the maximum pairwise distance
+ between any two cluster elements is <= max_dist.
+ - returns the number of clusters found.
+ - #labels.size() == dists.nr()
+ - for all valid i:
+ - #labels[i] == the cluster ID of the node with index i (i.e. the node
+ corresponding to the distances dists(i,*)).
+ - 0 <= #labels[i] < the number of clusters found
+ (i.e. cluster IDs are assigned contiguously and start at 0)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ struct snl_range
+ {
+ /*!
+ WHAT THIS OBJECT REPRESENTS
+ This object represents an interval on the real number line. It is used
+ to store the outputs of the segment_number_line() routine defined below.
+ !*/
+
+ snl_range(
+ );
+ /*!
+ ensures
+ - #lower == 0
+ - #upper == 0
+ !*/
+
+ snl_range(
+ double val
+ );
+ /*!
+ ensures
+ - #lower == val
+ - #upper == val
+ !*/
+
+ snl_range(
+ double l,
+ double u
+ );
+ /*!
+ requires
+ - l <= u
+ ensures
+ - #lower == l
+ - #upper == u
+ !*/
+
+ double lower;
+ double upper;
+
+ double width(
+ ) const { return upper-lower; }
+ /*!
+ ensures
+ - returns the width of this interval on the number line.
+ !*/
+
+ bool operator<(const snl_range& item) const { return lower < item.lower; }
+ /*!
+ ensures
+ - provides a total ordering of snl_range objects assuming they are
+ non-overlapping.
+ !*/
+ };
+
+ std::ostream& operator<< (std::ostream& out, const snl_range& item );
+ /*!
+ ensures
+ - prints item to out in the form [lower,upper].
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ std::vector<snl_range> segment_number_line (
+ const std::vector<double>& x,
+ const double max_range_width
+ );
+ /*!
+ requires
+ - max_range_width >= 0
+ ensures
+ - Finds a clustering of the values in x and returns the ranges that define the
+ clustering. This routine uses a combination of bottom up clustering and a
+ simple greedy scan to try and find the most compact set of ranges that
+ contain all the values in x.
+ - This routine has approximately linear runtime.
+ - Every value in x will be contained inside one of the returned snl_range
+ objects;
+ - All returned snl_range object's will have a width() <= max_range_width and
+ will also be non-overlapping.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_BOTTOM_uP_CLUSTER_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/clustering/chinese_whispers.h b/ml/dlib/dlib/clustering/chinese_whispers.h
new file mode 100644
index 000000000..332cce1a0
--- /dev/null
+++ b/ml/dlib/dlib/clustering/chinese_whispers.h
@@ -0,0 +1,135 @@
+// Copyright (C) 2012 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_CHINESE_WHISPErS_Hh_
+#define DLIB_CHINESE_WHISPErS_Hh_
+
+#include "chinese_whispers_abstract.h"
+#include <vector>
+#include "../rand.h"
+#include "../graph_utils/edge_list_graphs.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ inline unsigned long chinese_whispers (
+ const std::vector<ordered_sample_pair>& edges,
+ std::vector<unsigned long>& labels,
+ const unsigned long num_iterations,
+ dlib::rand& rnd
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(is_ordered_by_index(edges),
+ "\t unsigned long chinese_whispers()"
+ << "\n\t Invalid inputs were given to this function"
+ );
+
+ labels.clear();
+ if (edges.size() == 0)
+ return 0;
+
+ std::vector<std::pair<unsigned long, unsigned long> > neighbors;
+ find_neighbor_ranges(edges, neighbors);
+
+ // Initialize the labels, each node gets a different label.
+ labels.resize(neighbors.size());
+ for (unsigned long i = 0; i < labels.size(); ++i)
+ labels[i] = i;
+
+
+ for (unsigned long iter = 0; iter < neighbors.size()*num_iterations; ++iter)
+ {
+ // Pick a random node.
+ const unsigned long idx = rnd.get_random_64bit_number()%neighbors.size();
+
+ // Count how many times each label happens amongst our neighbors.
+ std::map<unsigned long, double> labels_to_counts;
+ const unsigned long end = neighbors[idx].second;
+ for (unsigned long i = neighbors[idx].first; i != end; ++i)
+ {
+ labels_to_counts[labels[edges[i].index2()]] += edges[i].distance();
+ }
+
+ // find the most common label
+ std::map<unsigned long, double>::iterator i;
+ double best_score = -std::numeric_limits<double>::infinity();
+ unsigned long best_label = labels[idx];
+ for (i = labels_to_counts.begin(); i != labels_to_counts.end(); ++i)
+ {
+ if (i->second > best_score)
+ {
+ best_score = i->second;
+ best_label = i->first;
+ }
+ }
+
+ labels[idx] = best_label;
+ }
+
+
+ // Remap the labels into a contiguous range. First we find the
+ // mapping.
+ std::map<unsigned long,unsigned long> label_remap;
+ for (unsigned long i = 0; i < labels.size(); ++i)
+ {
+ const unsigned long next_id = label_remap.size();
+ if (label_remap.count(labels[i]) == 0)
+ label_remap[labels[i]] = next_id;
+ }
+ // now apply the mapping to all the labels.
+ for (unsigned long i = 0; i < labels.size(); ++i)
+ {
+ labels[i] = label_remap[labels[i]];
+ }
+
+ return label_remap.size();
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline unsigned long chinese_whispers (
+ const std::vector<sample_pair>& edges,
+ std::vector<unsigned long>& labels,
+ const unsigned long num_iterations,
+ dlib::rand& rnd
+ )
+ {
+ std::vector<ordered_sample_pair> oedges;
+ convert_unordered_to_ordered(edges, oedges);
+ std::sort(oedges.begin(), oedges.end(), &order_by_index<ordered_sample_pair>);
+
+ return chinese_whispers(oedges, labels, num_iterations, rnd);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline unsigned long chinese_whispers (
+ const std::vector<sample_pair>& edges,
+ std::vector<unsigned long>& labels,
+ const unsigned long num_iterations = 100
+ )
+ {
+ dlib::rand rnd;
+ return chinese_whispers(edges, labels, num_iterations, rnd);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline unsigned long chinese_whispers (
+ const std::vector<ordered_sample_pair>& edges,
+ std::vector<unsigned long>& labels,
+ const unsigned long num_iterations = 100
+ )
+ {
+ dlib::rand rnd;
+ return chinese_whispers(edges, labels, num_iterations, rnd);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CHINESE_WHISPErS_Hh_
+
diff --git a/ml/dlib/dlib/clustering/chinese_whispers_abstract.h b/ml/dlib/dlib/clustering/chinese_whispers_abstract.h
new file mode 100644
index 000000000..7a184c6f9
--- /dev/null
+++ b/ml/dlib/dlib/clustering/chinese_whispers_abstract.h
@@ -0,0 +1,97 @@
+// Copyright (C) 2012 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_CHINESE_WHISPErS_ABSTRACT_Hh_
+#ifdef DLIB_CHINESE_WHISPErS_ABSTRACT_Hh_
+
+#include <vector>
+#include "../rand.h"
+#include "../graph_utils/ordered_sample_pair_abstract.h"
+#include "../graph_utils/sample_pair_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ unsigned long chinese_whispers (
+ const std::vector<ordered_sample_pair>& edges,
+ std::vector<unsigned long>& labels,
+ const unsigned long num_iterations,
+ dlib::rand& rnd
+ );
+ /*!
+ requires
+ - is_ordered_by_index(edges) == true
+ ensures
+ - This function implements the graph clustering algorithm described in the
+ paper: Chinese Whispers - an Efficient Graph Clustering Algorithm and its
+ Application to Natural Language Processing Problems by Chris Biemann.
+ - Interprets edges as a directed graph. That is, it contains the edges on the
+ said graph and the ordered_sample_pair::distance() values define the edge
+ weights (larger values indicating a stronger edge connection between the
+ nodes). If an edge has a distance() value of infinity then it is considered
+ a "must link" edge.
+ - returns the number of clusters found.
+ - #labels.size() == max_index_plus_one(edges)
+ - for all valid i:
+ - #labels[i] == the cluster ID of the node with index i in the graph.
+ - 0 <= #labels[i] < the number of clusters found
+ (i.e. cluster IDs are assigned contiguously and start at 0)
+ - Duplicate edges are interpreted as if there had been just one edge with a
+ distance value equal to the sum of all the duplicate edge's distance values.
+ - The algorithm performs exactly num_iterations passes over the graph before
+ terminating.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ unsigned long chinese_whispers (
+ const std::vector<sample_pair>& edges,
+ std::vector<unsigned long>& labels,
+ const unsigned long num_iterations,
+ dlib::rand& rnd
+ );
+ /*!
+ ensures
+ - This function is identical to the above chinese_whispers() routine except
+ that it operates on a vector of sample_pair objects instead of
+ ordered_sample_pairs. Therefore, this is simply a convenience routine. In
+ particular, it is implemented by transforming the given edges into
+ ordered_sample_pairs and then calling the chinese_whispers() routine defined
+ above.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ unsigned long chinese_whispers (
+ const std::vector<ordered_sample_pair>& edges,
+ std::vector<unsigned long>& labels,
+ const unsigned long num_iterations = 100
+ );
+ /*!
+ requires
+ - is_ordered_by_index(edges) == true
+ ensures
+ - performs: return chinese_whispers(edges, labels, num_iterations, rnd)
+ where rnd is a default initialized dlib::rand object.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ unsigned long chinese_whispers (
+ const std::vector<sample_pair>& edges,
+ std::vector<unsigned long>& labels,
+ const unsigned long num_iterations = 100
+ );
+ /*!
+ ensures
+ - performs: return chinese_whispers(edges, labels, num_iterations, rnd)
+ where rnd is a default initialized dlib::rand object.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CHINESE_WHISPErS_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/clustering/modularity_clustering.h b/ml/dlib/dlib/clustering/modularity_clustering.h
new file mode 100644
index 000000000..8b8a0b0a5
--- /dev/null
+++ b/ml/dlib/dlib/clustering/modularity_clustering.h
@@ -0,0 +1,515 @@
+// Copyright (C) 2012 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_MODULARITY_ClUSTERING__H__
+#define DLIB_MODULARITY_ClUSTERING__H__
+
+#include "modularity_clustering_abstract.h"
+#include "../sparse_vector.h"
+#include "../graph_utils/edge_list_graphs.h"
+#include "../matrix.h"
+#include "../rand.h"
+
+namespace dlib
+{
+
+// -----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ inline double newman_cluster_split (
+ dlib::rand& rnd,
+ const std::vector<ordered_sample_pair>& edges,
+ const matrix<double,0,1>& node_degrees, // k from the Newman paper
+ const matrix<double,0,1>& Bdiag, // diag(B) from the Newman paper
+ const double& edge_sum, // m from the Newman paper
+ matrix<double,0,1>& labels,
+ const double eps,
+ const unsigned long max_iterations
+ )
+ /*!
+ requires
+ - node_degrees.size() == max_index_plus_one(edges)
+ - Bdiag.size() == max_index_plus_one(edges)
+ - edges must be sorted according to order_by_index()
+ ensures
+ - This routine splits a graph into two subgraphs using the Newman
+ clustering method.
+ - returns the modularity obtained when the graph is split according
+ to the contents of #labels.
+ - #labels.size() == node_degrees.size()
+ - for all valid i: #labels(i) == -1 or +1
+ - if (this function returns 0) then
+ - all the labels are equal, i.e. the graph is not split.
+ !*/
+ {
+ // Scale epsilon so that it is relative to the expected value of an element of a
+ // unit vector of length node_degrees.size().
+ const double power_iter_eps = eps * std::sqrt(1.0/node_degrees.size());
+
+ // Make a random unit vector and put in labels.
+ labels.set_size(node_degrees.size());
+ for (long i = 0; i < labels.size(); ++i)
+ labels(i) = rnd.get_random_gaussian();
+ labels /= length(labels);
+
+ matrix<double,0,1> Bv, Bv_unit;
+
+ // Do the power iteration for a while.
+ double eig = -1;
+ double offset = 0;
+ while (eig < 0)
+ {
+
+ // any number larger than power_iter_eps
+ double iteration_change = power_iter_eps*2+1;
+ for (unsigned long i = 0; i < max_iterations && iteration_change > power_iter_eps; ++i)
+ {
+ sparse_matrix_vector_multiply(edges, labels, Bv);
+ Bv -= dot(node_degrees, labels)/(2*edge_sum) * node_degrees;
+
+ if (offset != 0)
+ {
+ Bv -= offset*labels;
+ }
+
+
+ const double len = length(Bv);
+ if (len != 0)
+ {
+ Bv_unit = Bv/len;
+ iteration_change = max(abs(labels-Bv_unit));
+ labels.swap(Bv_unit);
+ }
+ else
+ {
+ // Had a bad time, pick another random vector and try it with the
+ // power iteration.
+ for (long i = 0; i < labels.size(); ++i)
+ labels(i) = rnd.get_random_gaussian();
+ }
+ }
+
+ eig = dot(Bv,labels);
+ // we will repeat this loop if the largest eigenvalue is negative
+ offset = eig;
+ }
+
+
+ for (long i = 0; i < labels.size(); ++i)
+ {
+ if (labels(i) > 0)
+ labels(i) = 1;
+ else
+ labels(i) = -1;
+ }
+
+
+ // compute B*labels, store result in Bv.
+ sparse_matrix_vector_multiply(edges, labels, Bv);
+ Bv -= dot(node_degrees, labels)/(2*edge_sum) * node_degrees;
+
+ // Do some label refinement. In this step we swap labels if it
+ // improves the modularity score.
+ bool flipped_label = true;
+ while(flipped_label)
+ {
+ flipped_label = false;
+ unsigned long idx = 0;
+ for (long i = 0; i < labels.size(); ++i)
+ {
+ const double val = -2*labels(i);
+ const double increase = 4*Bdiag(i) + 2*val*Bv(i);
+
+ // if there is an increase in modularity for swapping this label
+ if (increase > 0)
+ {
+ labels(i) *= -1;
+ while (idx < edges.size() && edges[idx].index1() == (unsigned long)i)
+ {
+ const long j = edges[idx].index2();
+ Bv(j) += val*edges[idx].distance();
+ ++idx;
+ }
+
+ Bv -= (val*node_degrees(i)/(2*edge_sum))*node_degrees;
+
+ flipped_label = true;
+ }
+ else
+ {
+ while (idx < edges.size() && edges[idx].index1() == (unsigned long)i)
+ {
+ ++idx;
+ }
+ }
+ }
+ }
+
+
+ const double modularity = dot(Bv, labels)/(4*edge_sum);
+
+ return modularity;
+ }
+
+ // -------------------------------------------------------------------------------------
+
+ inline unsigned long newman_cluster_helper (
+ dlib::rand& rnd,
+ const std::vector<ordered_sample_pair>& edges,
+ const matrix<double,0,1>& node_degrees, // k from the Newman paper
+ const matrix<double,0,1>& Bdiag, // diag(B) from the Newman paper
+ const double& edge_sum, // m from the Newman paper
+ std::vector<unsigned long>& labels,
+ double modularity_threshold,
+ const double eps,
+ const unsigned long max_iterations
+ )
+ /*!
+ ensures
+ - returns the number of clusters the data was split into
+ !*/
+ {
+ matrix<double,0,1> l;
+ const double modularity = newman_cluster_split(rnd,edges,node_degrees,Bdiag,edge_sum,l,eps,max_iterations);
+
+
+ // We need to collapse the node index values down to contiguous values. So
+ // we use the following two vectors to contain the mappings from input index
+ // values to their corresponding index values in each split.
+ std::vector<unsigned long> left_idx_map(node_degrees.size());
+ std::vector<unsigned long> right_idx_map(node_degrees.size());
+
+ // figure out how many nodes went into each side of the split.
+ unsigned long num_left_split = 0;
+ unsigned long num_right_split = 0;
+ for (long i = 0; i < l.size(); ++i)
+ {
+ if (l(i) > 0)
+ {
+ left_idx_map[i] = num_left_split;
+ ++num_left_split;
+ }
+ else
+ {
+ right_idx_map[i] = num_right_split;
+ ++num_right_split;
+ }
+ }
+
+ // do a recursive split if it will improve the modularity.
+ if (modularity > modularity_threshold && num_left_split > 0 && num_right_split > 0)
+ {
+
+ // split the node_degrees and Bdiag matrices into left and right split parts
+ matrix<double,0,1> left_node_degrees(num_left_split);
+ matrix<double,0,1> right_node_degrees(num_right_split);
+ matrix<double,0,1> left_Bdiag(num_left_split);
+ matrix<double,0,1> right_Bdiag(num_right_split);
+ for (long i = 0; i < l.size(); ++i)
+ {
+ if (l(i) > 0)
+ {
+ left_node_degrees(left_idx_map[i]) = node_degrees(i);
+ left_Bdiag(left_idx_map[i]) = Bdiag(i);
+ }
+ else
+ {
+ right_node_degrees(right_idx_map[i]) = node_degrees(i);
+ right_Bdiag(right_idx_map[i]) = Bdiag(i);
+ }
+ }
+
+
+ // put the edges from one side of the split into split_edges
+ std::vector<ordered_sample_pair> split_edges;
+ modularity_threshold = 0;
+ for (unsigned long k = 0; k < edges.size(); ++k)
+ {
+ const unsigned long i = edges[k].index1();
+ const unsigned long j = edges[k].index2();
+ const double d = edges[k].distance();
+ if (l(i) > 0 && l(j) > 0)
+ {
+ split_edges.push_back(ordered_sample_pair(left_idx_map[i], left_idx_map[j], d));
+ modularity_threshold += d;
+ }
+ }
+ modularity_threshold -= sum(left_node_degrees*sum(left_node_degrees))/(2*edge_sum);
+ modularity_threshold /= 4*edge_sum;
+
+ unsigned long num_left_clusters;
+ std::vector<unsigned long> left_labels;
+ num_left_clusters = newman_cluster_helper(rnd,split_edges,left_node_degrees,left_Bdiag,
+ edge_sum,left_labels,modularity_threshold,
+ eps, max_iterations);
+
+ // now load the other side into split_edges and cluster it as well
+ split_edges.clear();
+ modularity_threshold = 0;
+ for (unsigned long k = 0; k < edges.size(); ++k)
+ {
+ const unsigned long i = edges[k].index1();
+ const unsigned long j = edges[k].index2();
+ const double d = edges[k].distance();
+ if (l(i) < 0 && l(j) < 0)
+ {
+ split_edges.push_back(ordered_sample_pair(right_idx_map[i], right_idx_map[j], d));
+ modularity_threshold += d;
+ }
+ }
+ modularity_threshold -= sum(right_node_degrees*sum(right_node_degrees))/(2*edge_sum);
+ modularity_threshold /= 4*edge_sum;
+
+ unsigned long num_right_clusters;
+ std::vector<unsigned long> right_labels;
+ num_right_clusters = newman_cluster_helper(rnd,split_edges,right_node_degrees,right_Bdiag,
+ edge_sum,right_labels,modularity_threshold,
+ eps, max_iterations);
+
+ // Now merge the labels from the two splits.
+ labels.resize(node_degrees.size());
+ for (unsigned long i = 0; i < labels.size(); ++i)
+ {
+ // if this node was in the left split
+ if (l(i) > 0)
+ {
+ labels[i] = left_labels[left_idx_map[i]];
+ }
+ else // if this node was in the right split
+ {
+ labels[i] = right_labels[right_idx_map[i]] + num_left_clusters;
+ }
+ }
+
+
+ return num_left_clusters + num_right_clusters;
+ }
+ else
+ {
+ labels.assign(node_degrees.size(),0);
+ return 1;
+ }
+
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline unsigned long newman_cluster (
+ const std::vector<ordered_sample_pair>& edges,
+ std::vector<unsigned long>& labels,
+ const double eps = 1e-4,
+ const unsigned long max_iterations = 2000
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(is_ordered_by_index(edges),
+ "\t unsigned long newman_cluster()"
+ << "\n\t Invalid inputs were given to this function"
+ );
+
+ labels.clear();
+ if (edges.size() == 0)
+ return 0;
+
+ const unsigned long num_nodes = max_index_plus_one(edges);
+
+ // compute the node_degrees vector, edge_sum value, and diag(B).
+ matrix<double,0,1> node_degrees(num_nodes);
+ matrix<double,0,1> Bdiag(num_nodes);
+ Bdiag = 0;
+ double edge_sum = 0;
+ node_degrees = 0;
+ for (unsigned long i = 0; i < edges.size(); ++i)
+ {
+ node_degrees(edges[i].index1()) += edges[i].distance();
+ edge_sum += edges[i].distance();
+ if (edges[i].index1() == edges[i].index2())
+ Bdiag(edges[i].index1()) += edges[i].distance();
+ }
+ edge_sum /= 2;
+ Bdiag -= squared(node_degrees)/(2*edge_sum);
+
+
+ dlib::rand rnd;
+ return impl::newman_cluster_helper(rnd,edges,node_degrees,Bdiag,edge_sum,labels,0,eps,max_iterations);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline unsigned long newman_cluster (
+ const std::vector<sample_pair>& edges,
+ std::vector<unsigned long>& labels,
+ const double eps = 1e-4,
+ const unsigned long max_iterations = 2000
+ )
+ {
+ std::vector<ordered_sample_pair> oedges;
+ convert_unordered_to_ordered(edges, oedges);
+ std::sort(oedges.begin(), oedges.end(), &order_by_index<ordered_sample_pair>);
+
+ return newman_cluster(oedges, labels, eps, max_iterations);
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ namespace impl
+ {
+ inline std::vector<unsigned long> remap_labels (
+ const std::vector<unsigned long>& labels,
+ unsigned long& num_labels
+ )
+ /*!
+ ensures
+ - This function takes labels and produces a mapping which maps elements of
+ labels into the most compact range in [0, max] as possible. In particular,
+ there won't be any unused integers in the mapped range.
+ - #num_labels == the number of distinct values in labels.
+ - returns a vector V such that:
+ - V.size() == labels.size()
+ - max(mat(V))+1 == num_labels.
+ - for all valid i,j:
+ - if (labels[i] == labels[j]) then
+ - V[i] == V[j]
+ - else
+ - V[i] != V[j]
+ !*/
+ {
+ std::map<unsigned long, unsigned long> temp;
+ for (unsigned long i = 0; i < labels.size(); ++i)
+ {
+ if (temp.count(labels[i]) == 0)
+ {
+ const unsigned long next = temp.size();
+ temp[labels[i]] = next;
+ }
+ }
+
+ num_labels = temp.size();
+
+ std::vector<unsigned long> result(labels.size());
+ for (unsigned long i = 0; i < labels.size(); ++i)
+ {
+ result[i] = temp[labels[i]];
+ }
+ return result;
+ }
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline double modularity (
+ const std::vector<sample_pair>& edges,
+ const std::vector<unsigned long>& labels
+ )
+ {
+ const unsigned long num_nodes = max_index_plus_one(edges);
+ // make sure requires clause is not broken
+ DLIB_ASSERT(labels.size() == num_nodes,
+ "\t double modularity()"
+ << "\n\t Invalid inputs were given to this function"
+ );
+
+ unsigned long num_labels;
+ const std::vector<unsigned long>& labels_ = dlib::impl::remap_labels(labels,num_labels);
+
+ std::vector<double> cluster_sums(num_labels,0);
+ std::vector<double> k(num_nodes,0);
+
+ double Q = 0;
+ double m = 0;
+ for (unsigned long i = 0; i < edges.size(); ++i)
+ {
+ const unsigned long n1 = edges[i].index1();
+ const unsigned long n2 = edges[i].index2();
+ k[n1] += edges[i].distance();
+ if (n1 != n2)
+ k[n2] += edges[i].distance();
+
+ if (n1 != n2)
+ m += edges[i].distance();
+ else
+ m += edges[i].distance()/2;
+
+ if (labels_[n1] == labels_[n2])
+ {
+ if (n1 != n2)
+ Q += 2*edges[i].distance();
+ else
+ Q += edges[i].distance();
+ }
+ }
+
+ if (m == 0)
+ return 0;
+
+ for (unsigned long i = 0; i < labels_.size(); ++i)
+ {
+ cluster_sums[labels_[i]] += k[i];
+ }
+
+ for (unsigned long i = 0; i < labels_.size(); ++i)
+ {
+ Q -= k[i]*cluster_sums[labels_[i]]/(2*m);
+ }
+
+ return 1.0/(2*m)*Q;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+ inline double modularity (
+ const std::vector<ordered_sample_pair>& edges,
+ const std::vector<unsigned long>& labels
+ )
+ {
+ const unsigned long num_nodes = max_index_plus_one(edges);
+ // make sure requires clause is not broken
+ DLIB_ASSERT(labels.size() == num_nodes,
+ "\t double modularity()"
+ << "\n\t Invalid inputs were given to this function"
+ );
+
+
+ unsigned long num_labels;
+ const std::vector<unsigned long>& labels_ = dlib::impl::remap_labels(labels,num_labels);
+
+ std::vector<double> cluster_sums(num_labels,0);
+ std::vector<double> k(num_nodes,0);
+
+ double Q = 0;
+ double m = 0;
+ for (unsigned long i = 0; i < edges.size(); ++i)
+ {
+ const unsigned long n1 = edges[i].index1();
+ const unsigned long n2 = edges[i].index2();
+ k[n1] += edges[i].distance();
+ m += edges[i].distance();
+ if (labels_[n1] == labels_[n2])
+ {
+ Q += edges[i].distance();
+ }
+ }
+
+ if (m == 0)
+ return 0;
+
+ for (unsigned long i = 0; i < labels_.size(); ++i)
+ {
+ cluster_sums[labels_[i]] += k[i];
+ }
+
+ for (unsigned long i = 0; i < labels_.size(); ++i)
+ {
+ Q -= k[i]*cluster_sums[labels_[i]]/m;
+ }
+
+ return 1.0/m*Q;
+ }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_MODULARITY_ClUSTERING__H__
+
diff --git a/ml/dlib/dlib/clustering/modularity_clustering_abstract.h b/ml/dlib/dlib/clustering/modularity_clustering_abstract.h
new file mode 100644
index 000000000..c1e7c20c4
--- /dev/null
+++ b/ml/dlib/dlib/clustering/modularity_clustering_abstract.h
@@ -0,0 +1,125 @@
+// Copyright (C) 2012 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_MODULARITY_ClUSTERING_ABSTRACT_Hh_
+#ifdef DLIB_MODULARITY_ClUSTERING_ABSTRACT_Hh_
+
+#include <vector>
+#include "../graph_utils/ordered_sample_pair_abstract.h"
+#include "../graph_utils/sample_pair_abstract.h"
+
+namespace dlib
+{
+
+// -----------------------------------------------------------------------------------------
+
+ double modularity (
+ const std::vector<sample_pair>& edges,
+ const std::vector<unsigned long>& labels
+ );
+ /*!
+ requires
+ - labels.size() == max_index_plus_one(edges)
+ - for all valid i:
+ - 0 <= edges[i].distance() < std::numeric_limits<double>::infinity()
+ ensures
+ - Interprets edges as an undirected graph. That is, it contains the edges on
+ the said graph and the sample_pair::distance() values define the edge weights
+ (larger values indicating a stronger edge connection between the nodes).
+ - This function returns the modularity value obtained when the given input
+ graph is broken into subgraphs according to the contents of labels. In
+ particular, we say that two nodes with indices i and j are in the same
+ subgraph or community if and only if labels[i] == labels[j].
+ - Duplicate edges are interpreted as if there had been just one edge with a
+ distance value equal to the sum of all the duplicate edge's distance values.
+ - See the paper Modularity and community structure in networks by M. E. J. Newman
+ for a detailed definition.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ double modularity (
+ const std::vector<ordered_sample_pair>& edges,
+ const std::vector<unsigned long>& labels
+ );
+ /*!
+ requires
+ - labels.size() == max_index_plus_one(edges)
+ - for all valid i:
+ - 0 <= edges[i].distance() < std::numeric_limits<double>::infinity()
+ ensures
+ - Interprets edges as a directed graph. That is, it contains the edges on the
+ said graph and the ordered_sample_pair::distance() values define the edge
+ weights (larger values indicating a stronger edge connection between the
+ nodes). Note that, generally, modularity is only really defined for
+ undirected graphs. Therefore, the "directed graph" given to this function
+ should have symmetric edges between all nodes. The reason this function is
+ provided at all is because sometimes a vector of ordered_sample_pair objects
+ is a useful representation of an undirected graph.
+ - This function returns the modularity value obtained when the given input
+ graph is broken into subgraphs according to the contents of labels. In
+ particular, we say that two nodes with indices i and j are in the same
+ subgraph or community if and only if labels[i] == labels[j].
+ - Duplicate edges are interpreted as if there had been just one edge with a
+ distance value equal to the sum of all the duplicate edge's distance values.
+ - See the paper Modularity and community structure in networks by M. E. J. Newman
+ for a detailed definition.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ unsigned long newman_cluster (
+ const std::vector<ordered_sample_pair>& edges,
+ std::vector<unsigned long>& labels,
+ const double eps = 1e-4,
+ const unsigned long max_iterations = 2000
+ );
+ /*!
+ requires
+ - is_ordered_by_index(edges) == true
+ - for all valid i:
+ - 0 <= edges[i].distance() < std::numeric_limits<double>::infinity()
+ ensures
+ - This function performs the clustering algorithm described in the paper
+ Modularity and community structure in networks by M. E. J. Newman.
+ - This function interprets edges as a graph and attempts to find the labeling
+ that maximizes modularity(edges, #labels).
+ - returns the number of clusters found.
+ - #labels.size() == max_index_plus_one(edges)
+ - for all valid i:
+ - #labels[i] == the cluster ID of the node with index i in the graph.
+ - 0 <= #labels[i] < the number of clusters found
+ (i.e. cluster IDs are assigned contiguously and start at 0)
+ - The main computation of the algorithm is involved in finding an eigenvector
+ of a certain matrix. To do this, we use the power iteration. In particular,
+ each time we try to find an eigenvector we will let the power iteration loop
+ at most max_iterations times or until it reaches an accuracy of eps.
+ Whichever comes first.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ unsigned long newman_cluster (
+ const std::vector<sample_pair>& edges,
+ std::vector<unsigned long>& labels,
+ const double eps = 1e-4,
+ const unsigned long max_iterations = 2000
+ );
+ /*!
+ requires
+ - for all valid i:
+ - 0 <= edges[i].distance() < std::numeric_limits<double>::infinity()
+ ensures
+ - This function is identical to the above newman_cluster() routine except that
+ it operates on a vector of sample_pair objects instead of
+ ordered_sample_pairs. Therefore, this is simply a convenience routine. In
+ particular, it is implemented by transforming the given edges into
+ ordered_sample_pairs and then calling the newman_cluster() routine defined
+ above.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_MODULARITY_ClUSTERING_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/clustering/spectral_cluster.h b/ml/dlib/dlib/clustering/spectral_cluster.h
new file mode 100644
index 000000000..2cac9870f
--- /dev/null
+++ b/ml/dlib/dlib/clustering/spectral_cluster.h
@@ -0,0 +1,80 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_SPECTRAL_CLUSTEr_H_
+#define DLIB_SPECTRAL_CLUSTEr_H_
+
+#include "spectral_cluster_abstract.h"
+#include <vector>
+#include "../matrix.h"
+#include "../svm/kkmeans.h"
+
+namespace dlib
+{
+ template <
+ typename kernel_type,
+ typename vector_type
+ >
+ std::vector<unsigned long> spectral_cluster (
+ const kernel_type& k,
+ const vector_type& samples,
+ const unsigned long num_clusters
+ )
+ {
+ DLIB_CASSERT(num_clusters > 0,
+ "\t std::vector<unsigned long> spectral_cluster(k,samples,num_clusters)"
+ << "\n\t num_clusters can't be 0."
+ );
+
+ if (num_clusters == 1)
+ {
+ // nothing to do, just assign everything to the 0 cluster.
+ return std::vector<unsigned long>(samples.size(), 0);
+ }
+
+ // compute the similarity matrix.
+ matrix<double> K(samples.size(), samples.size());
+ for (long r = 0; r < K.nr(); ++r)
+ for (long c = r+1; c < K.nc(); ++c)
+ K(r,c) = K(c,r) = (double)k(samples[r], samples[c]);
+ for (long r = 0; r < K.nr(); ++r)
+ K(r,r) = 0;
+
+ matrix<double,0,1> D(K.nr());
+ for (long r = 0; r < K.nr(); ++r)
+ D(r) = sum(rowm(K,r));
+ D = sqrt(reciprocal(D));
+ K = diagm(D)*K*diagm(D);
+ matrix<double> u,w,v;
+ // Use the normal SVD routine unless the matrix is really big, then use the fast
+ // approximate version.
+ if (K.nr() < 1000)
+ svd3(K,u,w,v);
+ else
+ svd_fast(K,u,w,v, num_clusters+100, 5);
+ // Pick out the eigenvectors associated with the largest eigenvalues.
+ rsort_columns(v,w);
+ v = colm(v, range(0,num_clusters-1));
+ // Now build the normalized spectral vectors, one for each input vector.
+ std::vector<matrix<double,0,1> > spec_samps, centers;
+ for (long r = 0; r < v.nr(); ++r)
+ {
+ spec_samps.push_back(trans(rowm(v,r)));
+ const double len = length(spec_samps.back());
+ if (len != 0)
+ spec_samps.back() /= len;
+ }
+ // Finally do the K-means clustering
+ pick_initial_centers(num_clusters, centers, spec_samps);
+ find_clusters_using_kmeans(spec_samps, centers);
+ // And then compute the cluster assignments based on the output of K-means.
+ std::vector<unsigned long> assignments;
+ for (unsigned long i = 0; i < spec_samps.size(); ++i)
+ assignments.push_back(nearest_center(centers, spec_samps[i]));
+
+ return assignments;
+ }
+
+}
+
+#endif // DLIB_SPECTRAL_CLUSTEr_H_
+
diff --git a/ml/dlib/dlib/clustering/spectral_cluster_abstract.h b/ml/dlib/dlib/clustering/spectral_cluster_abstract.h
new file mode 100644
index 000000000..880ad80af
--- /dev/null
+++ b/ml/dlib/dlib/clustering/spectral_cluster_abstract.h
@@ -0,0 +1,43 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_SPECTRAL_CLUSTEr_ABSTRACT_H_
+#ifdef DLIB_SPECTRAL_CLUSTEr_ABSTRACT_H_
+
+#include <vector>
+
+namespace dlib
+{
+ template <
+ typename kernel_type,
+ typename vector_type
+ >
+ std::vector<unsigned long> spectral_cluster (
+ const kernel_type& k,
+ const vector_type& samples,
+ const unsigned long num_clusters
+ );
+ /*!
+ requires
+ - samples must be something with an interface compatible with std::vector.
+ - The following expression must evaluate to a double or float:
+ k(samples[i], samples[j])
+ - num_clusters > 0
+ ensures
+ - Performs the spectral clustering algorithm described in the paper:
+ On spectral clustering: Analysis and an algorithm by Ng, Jordan, and Weiss.
+ and returns the results.
+ - This function clusters the input data samples into num_clusters clusters and
+ returns a vector that indicates which cluster each sample falls into. In
+ particular, we return an array A such that:
+ - A.size() == samples.size()
+ - A[i] == the cluster assignment of samples[i].
+ - for all valid i: 0 <= A[i] < num_clusters
+ - The "similarity" of samples[i] with samples[j] is given by
+ k(samples[i],samples[j]). This means that k() should output a number >= 0
+ and the number should be larger for samples that are more similar.
+ !*/
+}
+
+#endif // DLIB_SPECTRAL_CLUSTEr_ABSTRACT_H_
+
+