diff options
Diffstat (limited to 'ml/dlib/dlib/svm/kkmeans_abstract.h')
-rw-r--r-- | ml/dlib/dlib/svm/kkmeans_abstract.h | 365 |
1 files changed, 0 insertions, 365 deletions
diff --git a/ml/dlib/dlib/svm/kkmeans_abstract.h b/ml/dlib/dlib/svm/kkmeans_abstract.h deleted file mode 100644 index 9f9d7ccce..000000000 --- a/ml/dlib/dlib/svm/kkmeans_abstract.h +++ /dev/null @@ -1,365 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_KKMEANs_ABSTRACT_ -#ifdef DLIB_KKMEANs_ABSTRACT_ - -#include <cmath> -#include "../matrix/matrix_abstract.h" -#include "../algs.h" -#include "../serialize.h" -#include "kernel_abstract.h" -#include "kcentroid_abstract.h" -#include "../noncopyable.h" - -namespace dlib -{ - - template < - typename kernel_type - > - class kkmeans : public noncopyable - { - /*! - REQUIREMENTS ON kernel_type - is a kernel function object as defined in dlib/svm/kernel_abstract.h - - INITIAL VALUE - - number_of_centers() == 1 - - get_min_change() == 0.01 - - WHAT THIS OBJECT REPRESENTS - This is an implementation of a kernelized k-means clustering algorithm. - It performs k-means clustering by using the kcentroid object. - !*/ - - public: - typedef typename kernel_type::scalar_type scalar_type; - typedef typename kernel_type::sample_type sample_type; - typedef typename kernel_type::mem_manager_type mem_manager_type; - - kkmeans ( - const kcentroid<kernel_type>& kc_ - ); - /*! - ensures - - #number_of_centers() == 1 - - #get_min_change() == 0.01 - - #get_kcentroid(0) == a copy of kc_ - !*/ - - ~kkmeans( - ); - /*! - ensures - - all resources associated with *this have been released - !*/ - - void set_kcentroid ( - const kcentroid<kernel_type>& kc_ - ); - /*! - ensures - - for all idx: - - #get_kcentroid(idx) == a copy of kc_ - !*/ - - const kcentroid<kernel_type>& get_kcentroid ( - unsigned long i - ) const; - /*! - requires - - i < number_of_centers() - ensures - - returns a const reference to the ith kcentroid object contained in - this object. Each kcentroid represents one of the centers found - by the k-means clustering algorithm. - !*/ - - const kernel_type& get_kernel ( - ) const; - /*! - ensures - - returns a const reference to the kernel used by this object - !*/ - - void set_number_of_centers ( - unsigned long num - ); - /*! - requires - - num > 0 - ensures - - #number_of_centers() == num - !*/ - - unsigned long number_of_centers ( - ) const; - /*! - ensures - - returns the number of centers used in this instance of the k-means clustering - algorithm. - !*/ - - template < - typename matrix_type, - typename matrix_type2 - > - void train ( - const matrix_type& samples, - const matrix_type2& initial_centers, - long max_iter = 1000 - ); - /*! - requires - - matrix_type and matrix_type2 must either be dlib::matrix objects or convertible to dlib::matrix - via mat() - - matrix_type::type == sample_type (i.e. matrix_type should contain sample_type objects) - - matrix_type2::type == sample_type (i.e. matrix_type2 should contain sample_type objects) - - initial_centers.nc() == 1 (i.e. must be a column vector) - - samples.nc() == 1 (i.e. must be a column vector) - - initial_centers.nr() == number_of_centers() - ensures - - performs k-means clustering of the given set of samples. The initial center points - are taken from the initial_centers argument. - - loops over the data and continues to refine the clustering until either less than - get_min_change() fraction of the data points change clusters or we have done max_iter - iterations over the data. - - After this function finishes you can call the operator() function below - to determine which centroid a given sample is closest to. - !*/ - - unsigned long operator() ( - const sample_type& sample - ) const; - /*! - ensures - - returns a number idx such that: - - idx < number_of_centers() - - get_kcentroid(idx) == the centroid that is closest to the given - sample. - !*/ - - void set_min_change ( - scalar_type min_change - ); - /*! - requires - - 0 <= min_change < 1 - ensures - - #get_min_change() == min_change - !*/ - - const scalar_type get_min_change ( - ) const; - /*! - ensures - - returns the minimum fraction of data points that need to change - centers in an iteration of kmeans for the algorithm to keep going. - !*/ - - void swap ( - kkmeans& item - ); - /*! - ensures - - swaps *this and item - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename kernel_type - > - void swap( - kkmeans<kernel_type>& a, - kkmeans<kernel_type>& b - ) { a.swap(b); } - /*! - provides a global swap function - !*/ - - template < - typename kernel_type - > - void serialize ( - const kkmeans<kernel_type>& item, - std::ostream& out - ); - /*! - provides serialization support for kkmeans objects - !*/ - - template < - typename kernel_type - > - void deserialize ( - kkmeans<kernel_type>& item, - std::istream& in - ); - /*! - provides serialization support for kkmeans objects - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type1, - typename vector_type2, - typename kernel_type - > - void pick_initial_centers( - long num_centers, - vector_type1& centers, - const vector_type2& samples, - const kernel_type& k, - double percentile = 0.01 - ); - /*! - requires - - num_centers > 1 - - 0 <= percentile < 1 - - samples.size() > 1 - - vector_type1 == something with an interface compatible with std::vector - - vector_type2 == something with an interface compatible with std::vector - - k(samples[0],samples[0]) must be a valid expression that returns a double - - both centers and samples must be able to contain kernel_type::sample_type - objects - ensures - - finds num_centers candidate cluster centers in the data in the samples - vector. Assumes that k is the kernel that will be used during clustering - to define the space in which clustering occurs. - - The centers are found by looking for points that are far away from other - candidate centers. However, if the data is noisy you probably want to - ignore the farthest way points since they will be outliers. To do this - set percentile to the fraction of outliers you expect the data to contain. - - #centers.size() == num_centers - - #centers == a vector containing the candidate centers found - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename vector_type1, - typename vector_type2 - > - void pick_initial_centers( - long num_centers, - vector_type1& centers, - const vector_type2& samples, - double percentile = 0.01 - ); - /*! - requires - - num_centers > 1 - - 0 <= percentile < 1 - - samples.size() > 1 - - vector_type1 == something with an interface compatible with std::vector - - vector_type2 == something with an interface compatible with std::vector - - Both centers and samples must be able to contain dlib::matrix based row or - column vectors. - ensures - - performs: pick_initial_centers(num_centers, centers, samples, linear_kernel<sample_type>(), percentile) - (i.e. this function is simply an overload that uses the linear kernel. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename array_type, - typename sample_type, - typename alloc - > - void find_clusters_using_kmeans ( - const array_type& samples, - std::vector<sample_type, alloc>& centers, - unsigned long max_iter = 1000 - ); - /*! - requires - - samples.size() > 0 - - samples == a bunch of row or column vectors and they all must be of the - same length. - - centers.size() > 0 - - array_type == something with an interface compatible with std::vector - and it must contain row or column vectors capable of being stored in - sample_type objects. - - sample_type == a dlib::matrix capable of representing vectors - ensures - - performs regular old linear kmeans clustering on the samples. The clustering - begins with the initial set of centers given as an argument to this function. - When it finishes #centers will contain the resulting centers. - - no more than max_iter iterations will be performed before this function - terminates. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename array_type, - typename sample_type, - typename alloc - > - void find_clusters_using_angular_kmeans ( - const array_type& samples, - std::vector<sample_type, alloc>& centers, - unsigned long max_iter = 1000 - ); - /*! - requires - - samples.size() > 0 - - samples == a bunch of row or column vectors and they all must be of the - same length. - - centers.size() > 0 - - array_type == something with an interface compatible with std::vector - and it must contain row or column vectors capable of being stored in - sample_type objects. - - sample_type == a dlib::matrix capable of representing vectors - ensures - - performs linear kmeans clustering on the samples, except instead of using - Euclidean distance to compare samples to the centers it uses the angle - between a sample and a center (with respect to the origin). So we try to - cluster samples together if they have small angles with respect to each - other. The clustering begins with the initial set of centers given as an - argument to this function. When it finishes #centers will contain the - resulting centers. - - for all valid i: - - length(#centers[i]) == 1 - (i.e. the output centers are scaled to be unit vectors since their - magnitude is irrelevant. Moreover, this makes it so you can use - functions like nearest_center() with #centers to find the cluster - assignments.) - - No more than max_iter iterations will be performed before this function - terminates. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename array_type, - typename EXP - > - unsigned long nearest_center ( - const array_type& centers, - const matrix_exp<EXP>& sample - ); - /*! - requires - - centers.size() > 0 - - sample.size() > 0 - - is_vector(sample) == true - - centers must be an array of vectors such that the following expression is - valid: length_squared(sample - centers[0]). (e.g. centers could be a - std::vector of matrix objects holding column vectors). - ensures - - returns the index that identifies the element of centers that is nearest to - sample. That is, returns a number IDX such that centers[IDX] is the element - of centers that minimizes length(centers[IDX]-sample). - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_KKMEANs_ABSTRACT_ - |