summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/clustering/spectral_cluster.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/clustering/spectral_cluster.h')
-rw-r--r--ml/dlib/dlib/clustering/spectral_cluster.h80
1 files changed, 80 insertions, 0 deletions
diff --git a/ml/dlib/dlib/clustering/spectral_cluster.h b/ml/dlib/dlib/clustering/spectral_cluster.h
new file mode 100644
index 000000000..2cac9870f
--- /dev/null
+++ b/ml/dlib/dlib/clustering/spectral_cluster.h
@@ -0,0 +1,80 @@
+// Copyright (C) 2015 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_SPECTRAL_CLUSTEr_H_
+#define DLIB_SPECTRAL_CLUSTEr_H_
+
+#include "spectral_cluster_abstract.h"
+#include <vector>
+#include "../matrix.h"
+#include "../svm/kkmeans.h"
+
+namespace dlib
+{
+ template <
+ typename kernel_type,
+ typename vector_type
+ >
+ std::vector<unsigned long> spectral_cluster (
+ const kernel_type& k,
+ const vector_type& samples,
+ const unsigned long num_clusters
+ )
+ {
+ DLIB_CASSERT(num_clusters > 0,
+ "\t std::vector<unsigned long> spectral_cluster(k,samples,num_clusters)"
+ << "\n\t num_clusters can't be 0."
+ );
+
+ if (num_clusters == 1)
+ {
+ // nothing to do, just assign everything to the 0 cluster.
+ return std::vector<unsigned long>(samples.size(), 0);
+ }
+
+ // compute the similarity matrix.
+ matrix<double> K(samples.size(), samples.size());
+ for (long r = 0; r < K.nr(); ++r)
+ for (long c = r+1; c < K.nc(); ++c)
+ K(r,c) = K(c,r) = (double)k(samples[r], samples[c]);
+ for (long r = 0; r < K.nr(); ++r)
+ K(r,r) = 0;
+
+ matrix<double,0,1> D(K.nr());
+ for (long r = 0; r < K.nr(); ++r)
+ D(r) = sum(rowm(K,r));
+ D = sqrt(reciprocal(D));
+ K = diagm(D)*K*diagm(D);
+ matrix<double> u,w,v;
+ // Use the normal SVD routine unless the matrix is really big, then use the fast
+ // approximate version.
+ if (K.nr() < 1000)
+ svd3(K,u,w,v);
+ else
+ svd_fast(K,u,w,v, num_clusters+100, 5);
+ // Pick out the eigenvectors associated with the largest eigenvalues.
+ rsort_columns(v,w);
+ v = colm(v, range(0,num_clusters-1));
+ // Now build the normalized spectral vectors, one for each input vector.
+ std::vector<matrix<double,0,1> > spec_samps, centers;
+ for (long r = 0; r < v.nr(); ++r)
+ {
+ spec_samps.push_back(trans(rowm(v,r)));
+ const double len = length(spec_samps.back());
+ if (len != 0)
+ spec_samps.back() /= len;
+ }
+ // Finally do the K-means clustering
+ pick_initial_centers(num_clusters, centers, spec_samps);
+ find_clusters_using_kmeans(spec_samps, centers);
+ // And then compute the cluster assignments based on the output of K-means.
+ std::vector<unsigned long> assignments;
+ for (unsigned long i = 0; i < spec_samps.size(); ++i)
+ assignments.push_back(nearest_center(centers, spec_samps[i]));
+
+ return assignments;
+ }
+
+}
+
+#endif // DLIB_SPECTRAL_CLUSTEr_H_
+