summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/svm/feature_ranking.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/svm/feature_ranking.h')
-rw-r--r--ml/dlib/dlib/svm/feature_ranking.h477
1 files changed, 0 insertions, 477 deletions
diff --git a/ml/dlib/dlib/svm/feature_ranking.h b/ml/dlib/dlib/svm/feature_ranking.h
deleted file mode 100644
index f6324fe3d..000000000
--- a/ml/dlib/dlib/svm/feature_ranking.h
+++ /dev/null
@@ -1,477 +0,0 @@
-// Copyright (C) 2008 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_KERNEL_FEATURE_RANKINg_H_
-#define DLIB_KERNEL_FEATURE_RANKINg_H_
-
-#include <vector>
-#include <limits>
-
-#include "feature_ranking_abstract.h"
-#include "kcentroid.h"
-#include "../optimization.h"
-#include "../statistics.h"
-#include <iostream>
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename kernel_type,
- typename sample_matrix_type,
- typename label_matrix_type
- >
- matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features_impl (
- const kcentroid<kernel_type>& kc,
- const sample_matrix_type& samples,
- const label_matrix_type& labels
- )
- {
- /*
- This function ranks features by doing recursive feature elimination
-
- */
- typedef typename kernel_type::scalar_type scalar_type;
- typedef typename kernel_type::mem_manager_type mm;
-
-
- // make sure requires clause is not broken
- DLIB_ASSERT(is_binary_classification_problem(samples, labels) == true,
- "\tmatrix rank_features()"
- << "\n\t you have given invalid arguments to this function"
- );
-
- matrix<scalar_type,0,2,mm> results(samples(0).nr(), 2);
- matrix<scalar_type,sample_matrix_type::type::NR,1,mm> mask(samples(0).nr());
- set_all_elements(mask,1);
-
- // figure out what the separation is between the two centroids when all the features are
- // present.
- scalar_type first_separation;
- {
- kcentroid<kernel_type> c1(kc);
- kcentroid<kernel_type> c2(kc);
- // find the centers of each class
- for (long s = 0; s < samples.size(); ++s)
- {
- if (labels(s) < 0)
- {
- c1.train(samples(s));
- }
- else
- {
- c2.train(samples(s));
- }
-
- }
- first_separation = c1(c2);
- }
-
-
- using namespace std;
-
- for (long i = results.nr()-1; i >= 0; --i)
- {
- long worst_feature_idx = 0;
- scalar_type worst_feature_score = -std::numeric_limits<scalar_type>::infinity();
-
- // figure out which feature to remove next
- for (long j = 0; j < mask.size(); ++j)
- {
- // skip features we have already removed
- if (mask(j) == 0)
- continue;
-
- kcentroid<kernel_type> c1(kc);
- kcentroid<kernel_type> c2(kc);
-
- // temporarily remove this feature from the working set of features
- mask(j) = 0;
-
- // find the centers of each class
- for (long s = 0; s < samples.size(); ++s)
- {
- if (labels(s) < 0)
- {
- c1.train(pointwise_multiply(samples(s),mask));
- }
- else
- {
- c2.train(pointwise_multiply(samples(s),mask));
- }
-
- }
-
- // find the distance between the two centroids and use that
- // as the score
- const double score = c1(c2);
-
- if (score > worst_feature_score)
- {
- worst_feature_score = score;
- worst_feature_idx = j;
- }
-
- // add this feature back to the working set of features
- mask(j) = 1;
-
- }
-
- // now that we know what the next worst feature is record it
- mask(worst_feature_idx) = 0;
- results(i,0) = worst_feature_idx;
- results(i,1) = worst_feature_score;
- }
-
- // now normalize the results
- const scalar_type max_separation = std::max(max(colm(results,1)), first_separation);
- set_colm(results,1) = colm(results,1)/max_separation;
- for (long r = 0; r < results.nr()-1; ++r)
- {
- results(r,1) = results(r+1,1);
- }
- results(results.nr()-1,1) = first_separation/max_separation;
-
- return results;
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename kernel_type,
- typename sample_matrix_type,
- typename label_matrix_type
- >
- matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features (
- const kcentroid<kernel_type>& kc,
- const sample_matrix_type& samples,
- const label_matrix_type& labels
- )
- {
- return rank_features_impl(kc, mat(samples), mat(labels));
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename kernel_type,
- typename sample_matrix_type,
- typename label_matrix_type
- >
- matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features_impl (
- const kcentroid<kernel_type>& kc,
- const sample_matrix_type& samples,
- const label_matrix_type& labels,
- const long num_features
- )
- {
- /*
- This function ranks features by doing recursive feature addition
-
- */
- typedef typename kernel_type::scalar_type scalar_type;
- typedef typename kernel_type::mem_manager_type mm;
-
- // make sure requires clause is not broken
- DLIB_ASSERT(is_binary_classification_problem(samples, labels) == true,
- "\tmatrix rank_features()"
- << "\n\t you have given invalid arguments to this function"
- );
- DLIB_ASSERT(0 < num_features && num_features <= samples(0).nr(),
- "\tmatrix rank_features()"
- << "\n\t you have given invalid arguments to this function"
- << "\n\t num_features: " << num_features
- << "\n\t samples(0).nr(): " << samples(0).nr()
- );
-
- matrix<scalar_type,0,2,mm> results(num_features, 2);
- matrix<scalar_type,sample_matrix_type::type::NR,1,mm> mask(samples(0).nr());
- set_all_elements(mask,0);
-
- using namespace std;
-
- for (long i = 0; i < results.nr(); ++i)
- {
- long best_feature_idx = 0;
- scalar_type best_feature_score = -std::numeric_limits<scalar_type>::infinity();
-
- // figure out which feature to add next
- for (long j = 0; j < mask.size(); ++j)
- {
- // skip features we have already added
- if (mask(j) == 1)
- continue;
-
- kcentroid<kernel_type> c1(kc);
- kcentroid<kernel_type> c2(kc);
-
- // temporarily add this feature to the working set of features
- mask(j) = 1;
-
- // find the centers of each class
- for (long s = 0; s < samples.size(); ++s)
- {
- if (labels(s) < 0)
- {
- c1.train(pointwise_multiply(samples(s),mask));
- }
- else
- {
- c2.train(pointwise_multiply(samples(s),mask));
- }
-
- }
-
- // find the distance between the two centroids and use that
- // as the score
- const double score = c1(c2);
-
- if (score > best_feature_score)
- {
- best_feature_score = score;
- best_feature_idx = j;
- }
-
- // take this feature back out of the working set of features
- mask(j) = 0;
-
- }
-
- // now that we know what the next best feature is record it
- mask(best_feature_idx) = 1;
- results(i,0) = best_feature_idx;
- results(i,1) = best_feature_score;
- }
-
- // now normalize the results
- set_colm(results,1) = colm(results,1)/max(colm(results,1));
-
- return results;
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename kernel_type,
- typename sample_matrix_type,
- typename label_matrix_type
- >
- matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features (
- const kcentroid<kernel_type>& kc,
- const sample_matrix_type& samples,
- const label_matrix_type& labels,
- const long num_features
- )
- {
- if (mat(samples).nr() > 0 && num_features == mat(samples)(0).nr())
- {
- // if we are going to rank them all then might as well do the recursive feature elimination version
- return rank_features_impl(kc, mat(samples), mat(labels));
- }
- else
- {
- return rank_features_impl(kc, mat(samples), mat(labels), num_features);
- }
- }
-
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-
- namespace rank_features_helpers
- {
- template <
- typename K,
- typename sample_matrix_type,
- typename label_matrix_type
- >
- typename K::scalar_type centroid_gap (
- const kcentroid<K>& kc,
- const sample_matrix_type& samples,
- const label_matrix_type& labels
- )
- {
- kcentroid<K> kc1(kc);
- kcentroid<K> kc2(kc);
-
- // toss all the samples into our kcentroids
- for (long i = 0; i < samples.size(); ++i)
- {
- if (labels(i) > 0)
- kc1.train(samples(i));
- else
- kc2.train(samples(i));
- }
-
- // now return the separation between the mean of these two centroids
- return kc1(kc2);
- }
-
- template <
- typename sample_matrix_type,
- typename label_matrix_type
- >
- class test
- {
- typedef typename sample_matrix_type::type sample_type;
- typedef typename sample_type::type scalar_type;
- typedef typename sample_type::mem_manager_type mem_manager_type;
-
- public:
- test (
- const sample_matrix_type& samples_,
- const label_matrix_type& labels_,
- unsigned long num_sv_,
- bool verbose_
- ) : samples(samples_), labels(labels_), num_sv(num_sv_), verbose(verbose_)
- {
- }
-
- double operator() (
- double gamma
- ) const
- {
- using namespace std;
-
- // we are doing the optimization in log space so don't forget to convert back to normal space
- gamma = std::exp(gamma);
-
- typedef radial_basis_kernel<sample_type> kernel_type;
- // Make a kcentroid and find out what the gap is at the current gamma. Try to pick a reasonable
- // tolerance.
- const double tolerance = std::min(gamma*0.01, 0.01);
- const kernel_type kern(gamma);
- kcentroid<kernel_type> kc(kern, tolerance, num_sv);
- scalar_type temp = centroid_gap(kc, samples, labels);
-
- if (verbose)
- {
- cout << "\rChecking goodness of gamma = " << gamma << ". Goodness = "
- << temp << " " << flush;
- }
- return temp;
- }
-
- const sample_matrix_type& samples;
- const label_matrix_type& labels;
- unsigned long num_sv;
- bool verbose;
-
- };
-
- template <
- typename sample_matrix_type,
- typename label_matrix_type
- >
- double find_gamma_with_big_centroid_gap_impl (
- const sample_matrix_type& samples,
- const label_matrix_type& labels,
- double initial_gamma,
- unsigned long num_sv,
- bool verbose
- )
- {
- using namespace std;
-
- if (verbose)
- {
- cout << endl;
- }
-
- test<sample_matrix_type, label_matrix_type> funct(samples, labels, num_sv, verbose);
- double best_gamma = std::log(initial_gamma);
- double goodness = find_max_single_variable(funct, best_gamma, -15, 15, 1e-3, 100);
-
- if (verbose)
- {
- cout << "\rBest gamma = " << std::exp(best_gamma) << ". Goodness = "
- << goodness << " " << endl;
- }
-
- return std::exp(best_gamma);
- }
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename sample_matrix_type,
- typename label_matrix_type
- >
- double find_gamma_with_big_centroid_gap (
- const sample_matrix_type& samples,
- const label_matrix_type& labels,
- double initial_gamma = 0.1,
- unsigned long num_sv = 40
- )
- {
- DLIB_ASSERT(initial_gamma > 0 && num_sv > 0 && is_binary_classification_problem(samples, labels),
- "\t double find_gamma_with_big_centroid_gap()"
- << "\n\t initial_gamma: " << initial_gamma
- << "\n\t num_sv: " << num_sv
- << "\n\t is_binary_classification_problem(): " << is_binary_classification_problem(samples, labels)
- );
-
- return rank_features_helpers::find_gamma_with_big_centroid_gap_impl(mat(samples),
- mat(labels),
- initial_gamma,
- num_sv,
- false);
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename sample_matrix_type,
- typename label_matrix_type
- >
- double verbose_find_gamma_with_big_centroid_gap (
- const sample_matrix_type& samples,
- const label_matrix_type& labels,
- double initial_gamma = 0.1,
- unsigned long num_sv = 40
- )
- {
- DLIB_ASSERT(initial_gamma > 0 && num_sv > 0 && is_binary_classification_problem(samples, labels),
- "\t double verbose_find_gamma_with_big_centroid_gap()"
- << "\n\t initial_gamma: " << initial_gamma
- << "\n\t num_sv: " << num_sv
- << "\n\t is_binary_classification_problem(): " << is_binary_classification_problem(samples, labels)
- );
-
- return rank_features_helpers::find_gamma_with_big_centroid_gap_impl(mat(samples),
- mat(labels),
- initial_gamma,
- num_sv,
- true);
- }
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename vector_type
- >
- double compute_mean_squared_distance (
- const vector_type& samples
- )
- {
- running_stats<double> rs;
- for (unsigned long i = 0; i < samples.size(); ++i)
- {
- for (unsigned long j = i+1; j < samples.size(); ++j)
- {
- rs.add(length_squared(samples[i] - samples[j]));
- }
- }
-
- return rs.mean();
- }
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_KERNEL_FEATURE_RANKINg_H_
-
-