summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/svm/feature_ranking_abstract.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/svm/feature_ranking_abstract.h')
-rw-r--r--ml/dlib/dlib/svm/feature_ranking_abstract.h136
1 files changed, 136 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/feature_ranking_abstract.h b/ml/dlib/dlib/svm/feature_ranking_abstract.h
new file mode 100644
index 000000000..5a6fd3bb9
--- /dev/null
+++ b/ml/dlib/dlib/svm/feature_ranking_abstract.h
@@ -0,0 +1,136 @@
+// Copyright (C) 2008 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_KERNEL_FEATURE_RANKINg_ABSTRACT_H_
+#ifdef DLIB_KERNEL_FEATURE_RANKINg_ABSTRACT_H_
+
+#include <vector>
+#include <limits>
+
+#include "svm_abstract.h"
+#include "kcentroid_abstract.h"
+#include "../is_kind.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename kernel_type,
+ typename sample_matrix_type,
+ typename label_matrix_type
+ >
+ matrix<typename kernel_type::scalar_type> rank_features (
+ const kcentroid<kernel_type>& kc,
+ const sample_matrix_type& samples,
+ const label_matrix_type& labels,
+ const long num_features = samples(0).nr()
+ );
+ /*!
+ requires
+ - sample_matrix_type == a matrix or something convertible to a matrix via mat()
+ - label_matrix_type == a matrix or something convertible to a matrix via mat()
+ - is_binary_classification_problem(samples, labels) == true
+ - kc.train(samples(0)) must be a valid expression. This means that
+ kc must use a kernel type that is capable of operating on the
+ contents of the samples matrix
+ - 0 < num_features <= samples(0).nr()
+ ensures
+ - Let Class1 denote the centroid of all the samples with labels that are < 0
+ - Let Class2 denote the centroid of all the samples with labels that are > 0
+ - finds a ranking of the features where the best features come first. This
+ function does this by computing the distance between the centroid of the Class1
+ samples and the Class2 samples in kernel defined feature space.
+ Good features are then ones that result in the biggest separation between
+ the two centroids of Class1 and Class2.
+ - Uses the kc object to compute the centroids of the two classes
+ - returns a ranking matrix R where:
+ - R.nr() == num_features
+ - r.nc() == 2
+ - R(i,0) == the index of the ith best feature according to our ranking.
+ (e.g. samples(n)(R(0,0)) is the best feature from sample(n) and
+ samples(n)(R(1,0)) is the second best, samples(n)(R(2,0)) the
+ third best and so on)
+ - R(i,1) == a number that indicates how much separation exists between
+ the two centroids when features 0 through i are used.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename sample_matrix_type,
+ typename label_matrix_type
+ >
+ double find_gamma_with_big_centroid_gap (
+ const sample_matrix_type& samples,
+ const label_matrix_type& labels,
+ double initial_gamma = 0.1,
+ unsigned long num_sv = 40
+ );
+ /*!
+ requires
+ - initial_gamma > 0
+ - num_sv > 0
+ - is_binary_classification_problem(samples, labels) == true
+ ensures
+ - This is a function that tries to pick a reasonable default value for the gamma
+ parameter of the radial_basis_kernel. It picks the parameter that gives the
+ largest separation between the centroids, in kernel feature space, of two classes
+ of data. It does this using the kcentroid object and it sets the kcentroid up
+ to use num_sv dictionary vectors.
+ - This function does a search for the best gamma and the search starts with
+ the value given by initial_gamma. Better initial guesses will give
+ better results since the routine may get stuck in a local minima.
+ - returns the value of gamma that results in the largest separation.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename sample_matrix_type,
+ typename label_matrix_type
+ >
+ double verbose_find_gamma_with_big_centroid_gap (
+ const sample_matrix_type& samples,
+ const label_matrix_type& labels,
+ double initial_gamma = 0.1,
+ unsigned long num_sv = 40
+ );
+ /*!
+ requires
+ - initial_gamma > 0
+ - num_sv > 0
+ - is_binary_classification_problem(samples, labels) == true
+ ensures
+ - This function does the same exact thing as the above find_gamma_with_big_centroid_gap()
+ except that it is also verbose in the sense that it will print status messages to
+ standard out during its processing.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename vector_type
+ >
+ double compute_mean_squared_distance (
+ const vector_type& samples
+ );
+ /*!
+ requires
+ - vector_type is something with an interface compatible with std::vector.
+ Additionally, it must in turn contain dlib::matrix types which contain
+ scalars such as float or double values.
+ - for all valid i: is_vector(samples[i]) == true
+ ensures
+ - computes the average value of the squares of all the pairwise
+ distances between every element of samples.
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_KERNEL_FEATURE_RANKINg_ABSTRACT_H_
+
+
+